diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 08909f8b28..7f6d6b0fb9 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -124,6 +124,10 @@ #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX struct VZeroUpperGuard { +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline VZeroUpperGuard() { _mm256_zeroupper(); } #ifdef __GNUC__ __attribute__((always_inline)) #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index e131909845..3b946ff7c6 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1202,14 +1202,16 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32) inline v_uint32x4 v_load_expand_q(const uchar* ptr) { - uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr); + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr); uint16x4_t v1 = vget_low_u16(vmovl_u8(v0)); return v_uint32x4(vmovl_u16(v1)); } inline v_int32x4 v_load_expand_q(const schar* ptr) { - int8x8_t v0 = vcreate_s8(*(unsigned*)ptr); + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr); int16x4_t v1 = vget_low_s16(vmovl_s8(v0)); return v_int32x4(vmovl_s16(v1)); } diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 32a5e6b6bd..6444e8c3c2 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -789,9 +789,9 @@ CV_EXPORTS InstrNode* getCurrentNode(); #endif #ifdef __CV_AVX_GUARD -#define CV_INSTRUMENT_REGION(); __CV_AVX_GUARD CV_INSTRUMENT_REGION_(); +#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_(); #else -#define CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION_(); +#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_(); #endif namespace cv { diff --git a/modules/core/misc/java/src/java/core+Mat.java b/modules/core/misc/java/src/java/core+Mat.java index c75f9d5c01..e42fca9897 100644 --- a/modules/core/misc/java/src/java/core+Mat.java +++ b/modules/core/misc/java/src/java/core+Mat.java @@ -67,6 +67,19 @@ public class Mat { return; } + // + // C++: Mat::Mat(int ndims, const int* sizes, int type) + // + + // javadoc: Mat::Mat(sizes, type) + public Mat(int[] sizes, int type) + { + + nativeObj = n_Mat(sizes.length, sizes, type); + + return; + } + // // C++: Mat::Mat(int rows, int cols, int type, Scalar s) // @@ -93,6 +106,19 @@ public class Mat { return; } + // + // C++: Mat::Mat(int ndims, const int* sizes, int type, Scalar s) + // + + // javadoc: Mat::Mat(sizes, type, s) + public Mat(int[] sizes, int type, Scalar s) + { + + nativeObj = n_Mat(sizes.length, sizes, type, s.val[0], s.val[1], s.val[2], s.val[3]); + + return; + } + // // C++: Mat::Mat(Mat m, Range rowRange, Range colRange = Range::all()) // @@ -115,6 +141,19 @@ public class Mat { return; } + // + // C++: Mat::Mat(const Mat& m, const std::vector& ranges) + // + + // javadoc: Mat::Mat(m, ranges) + public Mat(Mat m, Range[] ranges) + { + + nativeObj = n_Mat(m.nativeObj, ranges); + + return; + } + // // C++: Mat::Mat(Mat m, Rect roi) // @@ -370,6 +409,31 @@ public class Mat { return; } + // + // C++: void Mat::create(int ndims, const int* sizes, int type) + // + + // javadoc: Mat::create(sizes, type) + public void create(int[] sizes, int type) + { + + n_create(nativeObj, sizes.length, sizes, type); + + return; + } + + // + // C++: void Mat::copySize(const Mat& m); + // + + // javadoc: Mat::copySize(m) + public void copySize(Mat m) + { + n_copySize(nativeObj, m.nativeObj); + + return; + } + // // C++: Mat Mat::cross(Mat m) // @@ -633,6 +697,19 @@ public class Mat { return retVal; } + // + // C++: static Mat Mat::ones(int ndims, const int* sizes, int type) + // + + // javadoc: Mat::ones(sizes, type) + public static Mat ones(int[] sizes, int type) + { + + Mat retVal = new Mat(n_ones(sizes.length, sizes, type)); + + return retVal; + } + // // C++: void Mat::push_back(Mat m) // @@ -867,6 +944,19 @@ public class Mat { return retVal; } + // + // C++: Mat Mat::operator()(const std::vector& ranges) + // + + // javadoc: Mat::operator()(ranges[]) + public Mat submat(Range[] ranges) + { + + Mat retVal = new Mat(n_submat_ranges(nativeObj, ranges)); + + return retVal; + } + // // C++: Mat Mat::operator()(Rect roi) // @@ -945,6 +1035,19 @@ public class Mat { return retVal; } + // + // C++: static Mat Mat::zeros(int ndims, const int* sizes, int type) + // + + // javadoc: Mat::zeros(sizes, type) + public static Mat zeros(int[] sizes, int type) + { + + Mat retVal = new Mat(n_zeros(sizes.length, sizes, type)); + + return retVal; + } + @Override protected void finalize() throws Throwable { n_delete(nativeObj); @@ -979,6 +1082,20 @@ public class Mat { return nPutD(nativeObj, row, col, data.length, data); } + // javadoc:Mat::put(idx,data) + public int put(int[] idx, double... data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + return nPutDIdx(nativeObj, idx, data.length, data); + } + // javadoc:Mat::put(row,col,data) public int put(int row, int col, float[] data) { int t = type(); @@ -994,6 +1111,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::put(idx,data) + public int put(int[] idx, float[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_32F) { + return nPutFIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::put(row,col,data) public int put(int row, int col, int[] data) { int t = type(); @@ -1009,6 +1143,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::put(idx,data) + public int put(int[] idx, int[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_32S) { + return nPutIIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::put(row,col,data) public int put(int row, int col, short[] data) { int t = type(); @@ -1024,6 +1175,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::put(idx,data) + public int put(int[] idx, short[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { + return nPutSIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::put(row,col,data) public int put(int row, int col, byte[] data) { int t = type(); @@ -1039,6 +1207,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::put(idx,data) + public int put(int[] idx, byte[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { + return nPutBIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::put(row,col,data,offset,length) public int put(int row, int col, byte[] data, int offset, int length) { int t = type(); @@ -1054,6 +1239,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::put(idx,data,offset,length) + public int put(int[] idx, byte[] data, int offset, int length) { + int t = type(); + if (data == null || length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { + return nPutBwIdxOffset(nativeObj, idx, length, offset, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col,data) public int get(int row, int col, byte[] data) { int t = type(); @@ -1069,6 +1271,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::get(idx,data) + public int get(int[] idx, byte[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { + return nGetBIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col,data) public int get(int row, int col, short[] data) { int t = type(); @@ -1084,6 +1303,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::get(idx,data) + public int get(int[] idx, short[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { + return nGetSIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col,data) public int get(int row, int col, int[] data) { int t = type(); @@ -1099,6 +1335,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::get(idx,data) + public int get(int[] idx, int[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_32S) { + return nGetIIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col,data) public int get(int row, int col, float[] data) { int t = type(); @@ -1114,6 +1367,23 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::get(idx,data) + public int get(int[] idx, float[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_32F) { + return nGetFIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col,data) public int get(int row, int col, double[] data) { int t = type(); @@ -1129,11 +1399,35 @@ public class Mat { throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); } + // javadoc:Mat::get(idx,data) + public int get(int[] idx, double[] data) { + int t = type(); + if (data == null || data.length % CvType.channels(t) != 0) + throw new java.lang.UnsupportedOperationException( + "Provided data element number (" + + (data == null ? 0 : data.length) + + ") should be multiple of the Mat channels count (" + + CvType.channels(t) + ")"); + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + if (CvType.depth(t) == CvType.CV_64F) { + return nGetDIdx(nativeObj, idx, data.length, data); + } + throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + } + // javadoc:Mat::get(row,col) public double[] get(int row, int col) { return nGet(nativeObj, row, col); } + // javadoc:Mat::get(idx) + public double[] get(int[] idx) { + if (idx.length != dims()) + throw new IllegalArgumentException("Incorrect number of indices"); + return nGetIdx(nativeObj, idx); + } + // javadoc:Mat::height() public int height() { return rows(); @@ -1155,6 +1449,9 @@ public class Mat { // C++: Mat::Mat(int rows, int cols, int type) private static native long n_Mat(int rows, int cols, int type); + // C++: Mat::Mat(int ndims, const int* sizes, int type) + private static native long n_Mat(int ndims, int[] sizes, int type); + // C++: Mat::Mat(int rows, int cols, int type, void* data) private static native long n_Mat(int rows, int cols, int type, ByteBuffer data); @@ -1167,11 +1464,17 @@ public class Mat { // C++: Mat::Mat(Size size, int type, Scalar s) private static native long n_Mat(double size_width, double size_height, int type, double s_val0, double s_val1, double s_val2, double s_val3); + // C++: Mat::Mat(int ndims, const int* sizes, int type, Scalar s) + private static native long n_Mat(int ndims, int[] sizes, int type, double s_val0, double s_val1, double s_val2, double s_val3); + // C++: Mat::Mat(Mat m, Range rowRange, Range colRange = Range::all()) private static native long n_Mat(long m_nativeObj, int rowRange_start, int rowRange_end, int colRange_start, int colRange_end); private static native long n_Mat(long m_nativeObj, int rowRange_start, int rowRange_end); + // C++: Mat::Mat(const Mat& m, const std::vector& ranges) + private static native long n_Mat(long m_nativeObj, Range[] ranges); + // C++: Mat Mat::adjustROI(int dtop, int dbottom, int dleft, int dright) private static native long n_adjustROI(long nativeObj, int dtop, int dbottom, int dleft, int dright); @@ -1226,6 +1529,12 @@ public class Mat { // C++: void Mat::create(Size size, int type) private static native void n_create(long nativeObj, double size_width, double size_height, int type); + // C++: void Mat::create(int ndims, const int* sizes, int type) + private static native void n_create(long nativeObj, int ndims, int[] sizes, int type); + + // C++: void Mat::copySize(const Mat& m) + private static native void n_copySize(long nativeObj, long m_nativeObj); + // C++: Mat Mat::cross(Mat m) private static native long n_cross(long nativeObj, long m_nativeObj); @@ -1284,6 +1593,9 @@ public class Mat { // C++: static Mat Mat::ones(Size size, int type) private static native long n_ones(double size_width, double size_height, int type); + // C++: static Mat Mat::ones(int ndims, const int* sizes, int type) + private static native long n_ones(int ndims, int[] sizes, int type); + // C++: void Mat::push_back(Mat m) private static native void n_push_back(long nativeObj, long m_nativeObj); @@ -1332,6 +1644,9 @@ public class Mat { // C++: Mat Mat::operator()(Range rowRange, Range colRange) private static native long n_submat_rr(long nativeObj, int rowRange_start, int rowRange_end, int colRange_start, int colRange_end); + // C++: Mat Mat::operator()(const std::vector& ranges) + private static native long n_submat_ranges(long nativeObj, Range[] ranges); + // C++: Mat Mat::operator()(Rect roi) private static native long n_submat(long nativeObj, int roi_x, int roi_y, int roi_width, int roi_height); @@ -1350,32 +1665,59 @@ public class Mat { // C++: static Mat Mat::zeros(Size size, int type) private static native long n_zeros(double size_width, double size_height, int type); + // C++: static Mat Mat::zeros(int ndims, const int* sizes, int type) + private static native long n_zeros(int ndims, int[] sizes, int type); + // native support for java finalize() private static native void n_delete(long nativeObj); private static native int nPutD(long self, int row, int col, int count, double[] data); + private static native int nPutDIdx(long self, int[] idx, int count, double[] data); + private static native int nPutF(long self, int row, int col, int count, float[] data); + private static native int nPutFIdx(long self, int[] idx, int count, float[] data); + private static native int nPutI(long self, int row, int col, int count, int[] data); + private static native int nPutIIdx(long self, int[] idx, int count, int[] data); + private static native int nPutS(long self, int row, int col, int count, short[] data); + private static native int nPutSIdx(long self, int[] idx, int count, short[] data); + private static native int nPutB(long self, int row, int col, int count, byte[] data); + private static native int nPutBIdx(long self, int[] idx, int count, byte[] data); + private static native int nPutBwOffset(long self, int row, int col, int count, int offset, byte[] data); + private static native int nPutBwIdxOffset(long self, int[] idx, int count, int offset, byte[] data); + private static native int nGetB(long self, int row, int col, int count, byte[] vals); + private static native int nGetBIdx(long self, int[] idx, int count, byte[] vals); + private static native int nGetS(long self, int row, int col, int count, short[] vals); + private static native int nGetSIdx(long self, int[] idx, int count, short[] vals); + private static native int nGetI(long self, int row, int col, int count, int[] vals); + private static native int nGetIIdx(long self, int[] idx, int count, int[] vals); + private static native int nGetF(long self, int row, int col, int count, float[] vals); + private static native int nGetFIdx(long self, int[] idx, int count, float[] vals); + private static native int nGetD(long self, int row, int col, int count, double[] vals); + private static native int nGetDIdx(long self, int[] idx, int count, double[] vals); + private static native double[] nGet(long self, int row, int col); + private static native double[] nGetIdx(long self, int[] idx); + private static native String nDump(long self); } diff --git a/modules/core/misc/java/test/MatTest.java b/modules/core/misc/java/test/MatTest.java index f0f4fdc200..4c8c52b09c 100644 --- a/modules/core/misc/java/test/MatTest.java +++ b/modules/core/misc/java/test/MatTest.java @@ -185,6 +185,16 @@ public class MatTest extends OpenCVTestCase { assertEquals(CvType.CV_16U, dst.type()); } + public void testCreateIntArrayInt() { + int[] dims = new int[] {5, 6, 7}; + dst.create(dims, CvType.CV_16U); + + assertEquals(5, dst.size(0)); + assertEquals(6, dst.size(1)); + assertEquals(7, dst.size(2)); + assertEquals(CvType.CV_16U, dst.type()); + } + public void testCross() { Mat answer = new Mat(1, 3, CvType.CV_32F); answer.put(0, 0, 7.0, 1.0, -5.0); @@ -569,6 +579,15 @@ public class MatTest extends OpenCVTestCase { assertMatEqual(truth, dst, EPS); } + public void testMatMatRangeArray() { + dst = new Mat(gray255_32f_3d, new Range[]{new Range(0, 5), new Range(0, 5), new Range(0, 5)}); + + truth = new Mat(new int[] {5, 5, 5}, CvType.CV_32FC1, new Scalar(255)); + + assertFalse(dst.empty()); + assertMatEqual(truth, dst, EPS); + } + public void testMatMatRect() { Mat m = new Mat(7, 6, CvType.CV_32SC1); m.put(0, 0, @@ -606,6 +625,13 @@ public class MatTest extends OpenCVTestCase { assertMatEqual(gray255_32f, dst, EPS); } + public void testMatIntArrayIntScalar() { + dst = new Mat(new int[]{10, 10, 10}, CvType.CV_32F, new Scalar(255)); + + assertFalse(dst.empty()); + assertMatEqual(gray255_32f_3d, dst, EPS); + } + public void testMulMat() { assertMatEqual(gray0, gray0.mul(gray255)); @@ -619,6 +645,16 @@ public class MatTest extends OpenCVTestCase { } + public void testMulMat3d() { + Mat m1 = new Mat(new int[] {2, 2, 2}, CvType.CV_32F, new Scalar(2)); + Mat m2 = new Mat(new int[] {2, 2, 2}, CvType.CV_32F, new Scalar(3)); + + dst = m1.mul(m2); + + truth = new Mat(new int[] {2, 2, 2}, CvType.CV_32F, new Scalar(6)); + assertMatEqual(truth, dst, EPS); + } + public void testMulMatDouble() { Mat m1 = new Mat(2, 2, CvType.CV_32F, new Scalar(2)); Mat m2 = new Mat(2, 2, CvType.CV_32F, new Scalar(3)); @@ -642,6 +678,12 @@ public class MatTest extends OpenCVTestCase { assertMatEqual(truth, dst); } + public void testOnesIntArrayInt() { + dst = Mat.ones(new int[]{2, 2, 2}, CvType.CV_16S); + truth = new Mat(new int[]{2, 2, 2}, CvType.CV_16S, new Scalar(1)); + assertMatEqual(truth, dst); + } + public void testPush_back() { Mat m1 = new Mat(2, 4, CvType.CV_32F, new Scalar(2)); Mat m2 = new Mat(3, 4, CvType.CV_32F, new Scalar(3)); @@ -699,6 +741,46 @@ public class MatTest extends OpenCVTestCase { } } + public void testPutIntArrayByteArray() { + Mat m = new Mat(new int[]{5, 5, 5}, CvType.CV_8UC3, new Scalar(1, 2, 3)); + Mat sm = m.submat(new Range[]{ new Range(0, 2), new Range(1, 3), new Range(2, 4)}); + byte[] buff = new byte[] { 0, 0, 0, 0, 0, 0 }; + byte[] buff0 = new byte[] { 10, 20, 30, 40, 50, 60 }; + byte[] buff1 = new byte[] { -1, -2, -3, -4, -5, -6 }; + + int bytesNum = m.put(new int[]{1, 2, 0}, buff0); + + assertEquals(6, bytesNum); + bytesNum = m.get(new int[]{1, 2, 0}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, buff0)); + + bytesNum = sm.put(new int[]{0, 0, 0}, buff1); + + assertEquals(6, bytesNum); + bytesNum = sm.get(new int[]{0, 0, 0}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, buff1)); + + bytesNum = m.get(new int[]{0, 1, 2}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, buff1)); + + Mat m1 = m.submat(new Range[]{ new Range(1,2), Range.all(), Range.all() }); + bytesNum = m1.get(new int[]{ 0, 2, 0}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, buff0)); + + try { + byte[] bytes2 = new byte[] { 10, 20, 30, 40, 50 }; + m.put(new int[]{ 2, 2, 0 }, bytes2); + fail("Expected UnsupportedOperationException (data.length % CvType.channels(t) != 0)"); + } catch (UnsupportedOperationException e) { + // expected + } + + } + public void testPutIntIntDoubleArray() { Mat m = new Mat(5, 5, CvType.CV_8UC3, new Scalar(1, 2, 3)); Mat sm = m.submat(2, 4, 3, 5); @@ -722,6 +804,29 @@ public class MatTest extends OpenCVTestCase { assertTrue(Arrays.equals(buff, new byte[]{-1, -2, -3, -4, -5, -6})); } + public void testPutIntArrayDoubleArray() { + Mat m = new Mat(new int[]{5, 5, 5}, CvType.CV_8UC3, new Scalar(1, 2, 3)); + Mat sm = m.submat(new Range[]{ new Range(0, 2), new Range(1, 3), new Range(2, 4)}); + byte[] buff = new byte[] { 0, 0, 0, 0, 0, 0 }; + + int bytesNum = m.put(new int[]{1, 2, 0}, 10, 20, 30, 40, 50, 60); + + assertEquals(6, bytesNum); + bytesNum = m.get(new int[]{1, 2, 0}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, new byte[]{10, 20, 30, 40, 50, 60})); + + bytesNum = sm.put(new int[]{0, 0, 0}, 255, 254, 253, 252, 251, 250); + + assertEquals(6, bytesNum); + bytesNum = sm.get(new int[]{0, 0, 0}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, new byte[]{-1, -2, -3, -4, -5, -6})); + bytesNum = m.get(new int[]{0, 1, 2}, buff); + assertEquals(6, bytesNum); + assertTrue(Arrays.equals(buff, new byte[]{-1, -2, -3, -4, -5, -6})); + } + public void testPutIntIntFloatArray() { Mat m = new Mat(5, 5, CvType.CV_32FC3, new Scalar(1, 2, 3)); float[] elements = new float[] { 10, 20, 30, 40, 50, 60 }; @@ -745,6 +850,29 @@ public class MatTest extends OpenCVTestCase { } } + public void testPutIntArrayFloatArray() { + Mat m = new Mat(new int[]{5, 5, 5}, CvType.CV_32FC3, new Scalar(1, 2, 3)); + float[] elements = new float[] { 10, 20, 30, 40, 50, 60 }; + + int bytesNum = m.put(new int[]{0, 4, 3}, elements); + + assertEquals(elements.length * 4, bytesNum); + Mat m1 = m.submat(new Range[]{ Range.all(), new Range(4, 5), Range.all() }); + float buff[] = new float[3]; + bytesNum = m1.get(new int[]{ 0, 0, 4 }, buff); + assertEquals(buff.length * 4, bytesNum); + assertTrue(Arrays.equals(new float[]{40, 50, 60}, buff)); + assertArrayEquals(new double[]{10, 20, 30}, m.get(new int[]{ 0, 4, 3 }), EPS); + + try { + float[] elements2 = new float[] { 10, 20, 30, 40, 50 }; + m.put(new int[]{4, 2, 2}, elements2); + fail("Expected UnsupportedOperationException (data.length % CvType.channels(t) != 0)"); + } catch (UnsupportedOperationException e) { + // expected + } + } + public void testPutIntIntIntArray() { Mat m = new Mat(5, 5, CvType.CV_32SC3, new Scalar(-1, -2, -3)); int[] elements = new int[] { 10, 20, 30, 40, 50, 60 }; @@ -768,6 +896,29 @@ public class MatTest extends OpenCVTestCase { } } + public void testPutIntArrayIntArray() { + Mat m = new Mat(new int[]{5, 5, 5}, CvType.CV_32SC3, new Scalar(-1, -2, -3)); + int[] elements = new int[] { 10, 20, 30, 40, 50, 60 }; + + int bytesNum = m.put(new int[]{ 0, 0, 4 }, elements); + + assertEquals(elements.length * 4, bytesNum); + Mat m1 = m.submat(new Range[]{ Range.all(), Range.all(), new Range(4, 5)}); + int buff[] = new int[3]; + bytesNum = m1.get(new int[]{ 0, 0, 0 }, buff); + assertEquals(buff.length * 4, bytesNum); + assertTrue(Arrays.equals(new int[]{ 10, 20, 30 }, buff)); + assertArrayEquals(new double[]{ 40, 50, 60 }, m.get(new int[]{ 0, 1, 0 }), EPS); + + try { + int[] elements2 = new int[] { 10, 20, 30, 40, 50 }; + m.put(new int[] { 2, 2, 0 }, elements2); + fail("Expected UnsupportedOperationException (data.length % CvType.channels(t) != 0)"); + } catch (UnsupportedOperationException e) { + // expected + } + } + public void testPutIntIntShortArray() { Mat m = new Mat(5, 5, CvType.CV_16SC3, new Scalar(-1, -2, -3)); short[] elements = new short[] { 10, 20, 30, 40, 50, 60 }; @@ -790,6 +941,28 @@ public class MatTest extends OpenCVTestCase { } } + public void testPutIntArrayShortArray() { + Mat m = new Mat(new int[]{ 5, 5, 5}, CvType.CV_16SC3, new Scalar(-1, -2, -3)); + short[] elements = new short[] { 10, 20, 30, 40, 50, 60 }; + + int bytesNum = m.put(new int[]{ 0, 2, 3 }, elements); + + assertEquals(elements.length * 2, bytesNum); + Mat m1 = m.submat(new Range[]{ Range.all(), Range.all(), new Range(3, 4)}); + short buff[] = new short[3]; + bytesNum = m1.get(new int[]{ 0, 2, 0 }, buff); + assertTrue(Arrays.equals(new short[]{10, 20, 30}, buff)); + assertArrayEquals(new double[]{40, 50, 60}, m.get(new int[]{ 0, 2, 4 }), EPS); + + try { + short[] elements2 = new short[] { 10, 20, 30, 40, 50 }; + m.put(new int[] { 2, 2, 0 }, elements2); + fail("Expected UnsupportedOperationException (data.length % CvType.channels(t) != 0)"); + } catch (UnsupportedOperationException e) { + // expected + } + } + public void testRelease() { assertFalse(gray0.empty()); assertTrue(gray0.rows() > 0); @@ -818,6 +991,7 @@ public class MatTest extends OpenCVTestCase { } public void testReshapeIntIntArray() { + // 2D -> 4D Mat src = new Mat(6, 5, CvType.CV_8UC3, new Scalar(0)); assertEquals(2, src.dims()); assertEquals(src.rows(), src.size(0)); @@ -828,6 +1002,34 @@ public class MatTest extends OpenCVTestCase { assertEquals(newShape.length, dst.dims()); for (int i = 0; i < newShape.length; ++i) assertEquals(newShape[i], dst.size(i)); + + // 3D -> 2D + src = new Mat(new int[]{4, 6, 7}, CvType.CV_8UC3, new Scalar(0)); + assertEquals(3, src.dims()); + assertEquals(4, src.size(0)); + assertEquals(6, src.size(1)); + assertEquals(7, src.size(2)); + + int[] newShape2 = {src.channels() * src.size(2), src.size(0) * src.size(1)}; + dst = src.reshape(1, newShape2); + assertEquals(newShape2.length, dst.dims()); + for (int i = 0; i < newShape2.length; ++i) + assertEquals(newShape2[i], dst.size(i)); + } + + public void testCopySize() { + Mat src = new Mat(new int[]{1, 1, 10, 10}, CvType.CV_8UC1, new Scalar(1)); + assertEquals(4, src.dims()); + assertEquals(1, src.size(0)); + assertEquals(1, src.size(1)); + assertEquals(10, src.size(2)); + assertEquals(10, src.size(3)); + Mat other = new Mat(new int[]{10, 10}, src.type()); + + src.copySize(other); + assertEquals(other.dims(), src.dims()); + for (int i = 0; i < other.dims(); ++i) + assertEquals(other.size(i), src.size(i)); } public void testRow() { @@ -949,6 +1151,16 @@ public class MatTest extends OpenCVTestCase { assertEquals(2, submat.cols()); } + public void testSubmatRangeArray() { + Mat submat = gray255_32f_3d.submat(new Range[]{ new Range(2, 4), new Range(2, 4), new Range(3, 6) }); + assertTrue(submat.isSubmatrix()); + assertFalse(submat.isContinuous()); + + assertEquals(2, submat.size(0)); + assertEquals(2, submat.size(1)); + assertEquals(3, submat.size(2)); + } + public void testSubmatRect() { Mat submat = gray255.submat(new Rect(5, 5, gray255.cols() / 2, gray255.rows() / 2)); assertTrue(submat.isSubmatrix()); @@ -1015,6 +1227,13 @@ public class MatTest extends OpenCVTestCase { assertMatEqual(truth, dst); } + public void testZerosIntArray() { + dst = Mat.zeros(new int[]{2, 3, 4}, CvType.CV_16S); + + truth = new Mat(new int[]{2, 3, 4}, CvType.CV_16S, new Scalar(0)); + assertMatEqual(truth, dst); + } + public void testMatFromByteBuffer() { ByteBuffer bbuf = ByteBuffer.allocateDirect(64*64); bbuf.putInt(0x01010101); diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp index 5ee7ab3696..0b3d673139 100644 --- a/modules/core/src/matrix_wrap.cpp +++ b/modules/core/src/matrix_wrap.cpp @@ -1288,17 +1288,12 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i, { CV_Assert( i < 0 ); Mat& m = *(Mat*)obj; - if( allowTransposed ) + if (allowTransposed && !m.empty() && + d == 2 && m.dims == 2 && + m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] && + m.isContinuous()) { - if( !m.isContinuous() ) - { - CV_Assert(!fixedType() && !fixedSize()); - m.release(); - } - - if( d == 2 && m.dims == 2 && m.data && - m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) - return; + return; } if(fixedType()) @@ -1306,13 +1301,13 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i, if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 ) mtype = m.type(); else - CV_Assert(CV_MAT_TYPE(mtype) == m.type()); + CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), ""); } if(fixedSize()) { - CV_Assert(m.dims == d); + CV_CheckEQ(m.dims, d, ""); for(int j = 0; j < d; ++j) - CV_Assert(m.size[j] == sizes[j]); + CV_CheckEQ(m.size[j], sizes[j], ""); } m.create(d, sizes, mtype); return; @@ -1322,17 +1317,12 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i, { CV_Assert( i < 0 ); UMat& m = *(UMat*)obj; - if( allowTransposed ) + if (allowTransposed && !m.empty() && + d == 2 && m.dims == 2 && + m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] && + m.isContinuous()) { - if( !m.isContinuous() ) - { - CV_Assert(!fixedType() && !fixedSize()); - m.release(); - } - - if( d == 2 && m.dims == 2 && !m.empty() && - m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) - return; + return; } if(fixedType()) @@ -1340,13 +1330,13 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i, if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 ) mtype = m.type(); else - CV_Assert(CV_MAT_TYPE(mtype) == m.type()); + CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), ""); } if(fixedSize()) { - CV_Assert(m.dims == d); + CV_CheckEQ(m.dims, d, ""); for(int j = 0; j < d; ++j) - CV_Assert(m.size[j] == sizes[j]); + CV_CheckEQ(m.size[j], sizes[j], ""); } m.create(d, sizes, mtype); return; diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index b98ec4ed34..46f10b694c 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -177,6 +177,13 @@ TEST(Core_OutputArray, FixedType) EXPECT_EQ(2, num_defaultResult); } +TEST(Core_OutputArrayCreate, _13772) +{ + cv::Mat1d mat; + cv::OutputArray o(mat); + ASSERT_NO_THROW(o.create(3, 5, CV_64F, -1, true)); +} + TEST(Core_String, find_last_of__with__empty_string) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 815b84f651..54a53fd867 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -371,7 +371,7 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setYolo(int classes, const std::vector& mask, const std::vector& anchors) + void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold) { cv::dnn::LayerParams region_param; region_param.name = "Region-name"; @@ -382,6 +382,8 @@ namespace cv { region_param.set("classes", classes); region_param.set("anchors", numAnchors); region_param.set("logistic", true); + region_param.set("thresh", thresh); + region_param.set("nms_threshold", nms_threshold); std::vector usedAnchors(numAnchors * 2); for (int i = 0; i < numAnchors; ++i) @@ -646,6 +648,8 @@ namespace cv { { int classes = getParam(layer_params, "classes", -1); int num_of_anchors = getParam(layer_params, "num", -1); + float thresh = getParam(layer_params, "thresh", 0.2); + float nms_threshold = getParam(layer_params, "nms_threshold", 0.4); std::string anchors_values = getParam(layer_params, "anchors", std::string()); CV_Assert(!anchors_values.empty()); @@ -658,7 +662,7 @@ namespace cv { CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); setParams.setPermute(false); - setParams.setYolo(classes, mask_vec, anchors_vec); + setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold); } else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 6c4d46a2f2..f0ca658569 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -62,6 +62,8 @@ namespace dnn class BaseConvolutionLayerImpl : public ConvolutionLayer { public: + bool newWeightAndBias; + std::vector weightsMultipliers; BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); @@ -85,6 +87,8 @@ public: CV_Assert(numOutput % ngroups == 0); CV_Assert(adjustPad.width < stride.width && adjustPad.height < stride.height); + + newWeightAndBias = false; } void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE @@ -135,6 +139,20 @@ public: (dilation.height == 1 && dilation.width == 1); } + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + Mat w, b; + top->getScaleShift(w, b); + if (!w.empty() || !b.empty()) + { + fuseWeights(w, b); + return true; + } + return false; + } + + virtual void fuseWeights(const Mat& w_, const Mat& b_) = 0; + virtual void applyHalideScheduler(Ptr& node, const std::vector &inputs, const std::vector &outputs, @@ -185,11 +203,9 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl public: enum { VEC_ALIGN = 8, DFT_TYPE = CV_32F }; Mat weightsMat; - std::vector weightsMultipliers; std::vector biasvec; std::vector reluslope; Ptr activ; - bool newWeightAndBias; bool fusedBias; #ifdef HAVE_OPENCL @@ -201,7 +217,6 @@ public: #endif ConvolutionLayerImpl(const LayerParams ¶ms) : BaseConvolutionLayerImpl(params) { - newWeightAndBias = false; fusedBias = false; #ifdef HAVE_OPENCL newActiv = false; @@ -349,19 +364,7 @@ public: return !activ.empty(); } - virtual bool tryFuse(Ptr& top) CV_OVERRIDE - { - Mat w, b; - top->getScaleShift(w, b); - if (!w.empty() || !b.empty()) - { - fuseWeights(w, b); - return true; - } - return false; - } - - void fuseWeights(const Mat& w_, const Mat& b_) + void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE { // Convolution weights have OIHW data layout. Parameters fusion in case of // (conv(I) + b1 ) * w + b2 @@ -1308,6 +1311,45 @@ public: pad.width = pad_l; pad.height = pad_t; + + weightsMultipliers.assign(numOutput, 1.0); + if (weightsMat.empty()) + { + transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat); + biasesMat = hasBias() ? blobs[1].reshape(1, numOutput) + : Mat::zeros(numOutput, 1, CV_32F); + } + } + + void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE + { + Mat w = w_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(w_.at(0))) : w_; + Mat b = b_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(b_.at(0))) : b_; + + CV_Assert_N(!weightsMat.empty(), + w.empty() || numOutput == w.total(), + b.empty() || numOutput == b.total()); + + if (!w.empty()) + { + transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat); + weightsMat = weightsMat.reshape(1, numOutput); + for (int i = 0; i < numOutput; ++i) + { + double wi = w.at(i); + weightsMultipliers[i] *= wi; + cv::multiply(weightsMat.row(i), weightsMultipliers[i], weightsMat.row(i)); + biasesMat.at(i) *= wi; + } + weightsMat = weightsMat.reshape(1, weightsMat.total() / blobs[0].size[0]); + } + + if (!b.empty()) + { + cv::add(biasesMat, b.reshape(1, numOutput), biasesMat); + } + + newWeightAndBias = !w.empty() || !b.empty(); } class MatMulInvoker : public ParallelLoopBody @@ -1575,11 +1617,19 @@ public: if (umat_weights.empty()) { - transpose(blobs[0].reshape(1, inpCn), umat_weights); - if (hasBias()) - blobs[1].reshape(1, outCn).copyTo(umat_biases); + if (newWeightAndBias) + { + weightsMat.copyTo(umat_weights); + biasesMat.copyTo(umat_biases); + } else - umat_biases = UMat::zeros(outCn, 1, CV_32F); + { + transpose(blobs[0].reshape(1, inpCn), umat_weights); + if (hasBias()) + blobs[1].reshape(1, outCn).copyTo(umat_biases); + else + umat_biases = UMat::zeros(outCn, 1, CV_32F); + } } String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type())); diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 3de33020eb..b788ce1343 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -305,9 +305,16 @@ TEST_P(DNNTestNetwork, DenseNet_121) TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) { if (backend == DNN_BACKEND_HALIDE || - (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)) throw SkipTestException(""); + +#if defined(INF_ENGINE_RELEASE) +#if INF_ENGINE_RELEASE <= 2018050000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) + throw SkipTestException(""); +#endif +#endif + Mat img = imread(findDataFile("dnn/googlenet_1.png", false)); Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false); // Output image has values in range [-143.526, 148.539]. diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 4e00b10279..11e6ee49e8 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -394,6 +394,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy) TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) { checkBackend(); + +#if defined(INF_ENGINE_RELEASE) +#if INF_ENGINE_RELEASE <= 2018050000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) + throw SkipTestException(""); +#endif +#endif + std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7", "dnn/fast_neural_style_instance_norm_feathers.t7"}; std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"}; diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index dd70d16da4..143926b7ba 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -95,6 +95,7 @@ enum ImwriteFlags { IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI + IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default. IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000. }; diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index f4b360e27d..6112e40f62 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -750,12 +750,11 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect } //Settings that matter to all images - // defaults for now, maybe base them on params in the future int compression = COMPRESSION_LZW; int predictor = PREDICTOR_HORIZONTAL; int resUnit = -1, dpiX = -1, dpiY = -1; - readParam(params, TIFFTAG_COMPRESSION, compression); + readParam(params, IMWRITE_TIFF_COMPRESSION, compression); readParam(params, TIFFTAG_PREDICTOR, predictor); readParam(params, IMWRITE_TIFF_RESUNIT, resUnit); readParam(params, IMWRITE_TIFF_XDPI, dpiX); diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index 1caadbbbad..0c7b3268df 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -1,3 +1,12 @@ set(the_description "Image Processing") ocv_add_dispatched_file(accum SSE4_1 AVX AVX2) +ocv_add_dispatched_file(bilateral_filter SSE2 AVX2) +ocv_add_dispatched_file(box_filter SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(filter SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(color_hsv SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(color_rgb SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_define_module(imgproc opencv_core WRAP java python js) diff --git a/modules/imgproc/perf/perf_filter2d.cpp b/modules/imgproc/perf/perf_filter2d.cpp index 6095eaad2f..249afd94c4 100644 --- a/modules/imgproc/perf/perf_filter2d.cpp +++ b/modules/imgproc/perf/perf_filter2d.cpp @@ -39,7 +39,7 @@ PERF_TEST_P( TestFilter2d, Filter2d, SANITY_CHECK(dst, 1); } -PERF_TEST_P(TestFilter2d, Filter2d_ovx, +PERF_TEST_P(TestFilter2d, DISABLED_Filter2d_ovx, Combine( Values(Size(320, 240), sz1080p), Values(3, 5), diff --git a/modules/imgproc/perf/perf_pyramids.cpp b/modules/imgproc/perf/perf_pyramids.cpp index 22c3fa9064..5dd5b9cece 100644 --- a/modules/imgproc/perf/perf_pyramids.cpp +++ b/modules/imgproc/perf/perf_pyramids.cpp @@ -26,7 +26,7 @@ PERF_TEST_P(Size_MatType, pyrDown, testing::Combine( SANITY_CHECK(dst, eps, error_type); } -PERF_TEST_P(Size_MatType, pyrDown_ovx, testing::Combine( +PERF_TEST_P(Size_MatType, DISABLED_pyrDown_ovx, testing::Combine( testing::Values(sz1080p, sz720p, szVGA, szQVGA, szODD), testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16SC1, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC3, CV_32FC4) ) diff --git a/modules/imgproc/perf/perf_warp.cpp b/modules/imgproc/perf/perf_warp.cpp index cb974e9f09..4e7de8d2fe 100644 --- a/modules/imgproc/perf/perf_warp.cpp +++ b/modules/imgproc/perf/perf_warp.cpp @@ -48,7 +48,7 @@ PERF_TEST_P( TestWarpAffine, WarpAffine, #endif } -PERF_TEST_P(TestWarpAffine, WarpAffine_ovx, +PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx, Combine( Values(szVGA, sz720p, sz1080p), InterType::all(), @@ -116,7 +116,7 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective, #endif } -PERF_TEST_P(TestWarpPerspective, WarpPerspective_ovx, +PERF_TEST_P(TestWarpPerspective, DISABLED_WarpPerspective_ovx, Combine( Values(szVGA, sz720p, sz1080p), InterType::all(), diff --git a/modules/imgproc/src/bilateral_filter.dispatch.cpp b/modules/imgproc/src/bilateral_filter.dispatch.cpp new file mode 100644 index 0000000000..a27ebb18f5 --- /dev/null +++ b/modules/imgproc/src/bilateral_filter.dispatch.cpp @@ -0,0 +1,427 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, 2018, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014-2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#include + +#include "opencv2/core/hal/intrin.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "bilateral_filter.simd.hpp" +#include "bilateral_filter.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +/****************************************************************************************\ + Bilateral Filtering +\****************************************************************************************/ + +namespace cv { + +#ifdef HAVE_OPENCL + +static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, + double sigma_color, double sigma_space, + int borderType) +{ + CV_INSTRUMENT_REGION(); +#ifdef __ANDROID__ + if (ocl::Device::getDefault().isNVidia()) + return false; +#endif + + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int i, j, maxk, radius; + + if (depth != CV_8U || cn > 4) + return false; + + if (sigma_color <= 0) + sigma_color = 1; + if (sigma_space <= 0) + sigma_space = 1; + + double gauss_color_coeff = -0.5 / (sigma_color * sigma_color); + double gauss_space_coeff = -0.5 / (sigma_space * sigma_space); + + if ( d <= 0 ) + radius = cvRound(sigma_space * 1.5); + else + radius = d / 2; + radius = MAX(radius, 1); + d = radius * 2 + 1; + + UMat src = _src.getUMat(), dst = _dst.getUMat(), temp; + if (src.u == dst.u) + return false; + + copyMakeBorder(src, temp, radius, radius, radius, radius, borderType); + std::vector _space_weight(d * d); + std::vector _space_ofs(d * d); + float * const space_weight = &_space_weight[0]; + int * const space_ofs = &_space_ofs[0]; + + // initialize space-related bilateral filter coefficients + for( i = -radius, maxk = 0; i <= radius; i++ ) + for( j = -radius; j <= radius; j++ ) + { + double r = std::sqrt((double)i * i + (double)j * j); + if ( r > radius ) + continue; + space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff); + space_ofs[maxk++] = (int)(i * temp.step + j * cn); + } + + char cvt[3][40]; + String cnstr = cn > 1 ? format("%d", cn) : ""; + String kernelName("bilateral"); + size_t sizeDiv = 1; + if ((ocl::Device::getDefault().isIntel()) && + (ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU)) + { + //Intel GPU + if (dst.cols % 4 == 0 && cn == 1) // For single channel x4 sized images. + { + kernelName = "bilateral_float4"; + sizeDiv = 4; + } + } + ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc, + format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s" + " -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s -D gauss_color_coeff=(float)%f", + radius, maxk, cn, ocl::typeToStr(CV_32SC(cn)), cnstr.c_str(), + ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), + ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), + ocl::convertTypeStr(CV_32S, CV_32F, cn, cvt[1]), + ocl::convertTypeStr(CV_32F, CV_8U, cn, cvt[2]), gauss_color_coeff)); + if (k.empty()) + return false; + + Mat mspace_weight(1, d * d, CV_32FC1, space_weight); + Mat mspace_ofs(1, d * d, CV_32SC1, space_ofs); + UMat ucolor_weight, uspace_weight, uspace_ofs; + + mspace_weight.copyTo(uspace_weight); + mspace_ofs.copyTo(uspace_ofs); + + k.args(ocl::KernelArg::ReadOnlyNoSize(temp), ocl::KernelArg::WriteOnly(dst), + ocl::KernelArg::PtrReadOnly(uspace_weight), + ocl::KernelArg::PtrReadOnly(uspace_ofs)); + + size_t globalsize[2] = { (size_t)dst.cols / sizeDiv, (size_t)dst.rows }; + return k.run(2, globalsize, NULL, false); +} +#endif + + +static void +bilateralFilter_8u( const Mat& src, Mat& dst, int d, + double sigma_color, double sigma_space, + int borderType ) +{ + CV_INSTRUMENT_REGION(); + + int cn = src.channels(); + int i, j, maxk, radius; + + CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) && src.data != dst.data ); + + if( sigma_color <= 0 ) + sigma_color = 1; + if( sigma_space <= 0 ) + sigma_space = 1; + + double gauss_color_coeff = -0.5/(sigma_color*sigma_color); + double gauss_space_coeff = -0.5/(sigma_space*sigma_space); + + if( d <= 0 ) + radius = cvRound(sigma_space*1.5); + else + radius = d/2; + radius = MAX(radius, 1); + d = radius*2 + 1; + + Mat temp; + copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); + + std::vector _color_weight(cn*256); + std::vector _space_weight(d*d); + std::vector _space_ofs(d*d); + float* color_weight = &_color_weight[0]; + float* space_weight = &_space_weight[0]; + int* space_ofs = &_space_ofs[0]; + + // initialize color-related bilateral filter coefficients + + for( i = 0; i < 256*cn; i++ ) + color_weight[i] = (float)std::exp(i*i*gauss_color_coeff); + + // initialize space-related bilateral filter coefficients + for( i = -radius, maxk = 0; i <= radius; i++ ) + { + j = -radius; + + for( ; j <= radius; j++ ) + { + double r = std::sqrt((double)i*i + (double)j*j); + if( r > radius ) + continue; + space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); + space_ofs[maxk++] = (int)(i*temp.step + j*cn); + } + } + + CV_CPU_DISPATCH(bilateralFilterInvoker_8u, (dst, temp, radius, maxk, space_ofs, space_weight, color_weight), + CV_CPU_DISPATCH_MODES_ALL); +} + + +static void +bilateralFilter_32f( const Mat& src, Mat& dst, int d, + double sigma_color, double sigma_space, + int borderType ) +{ + CV_INSTRUMENT_REGION(); + + int cn = src.channels(); + int i, j, maxk, radius; + double minValSrc=-1, maxValSrc=1; + const int kExpNumBinsPerChannel = 1 << 12; + int kExpNumBins = 0; + float lastExpVal = 1.f; + float len, scale_index; + + CV_Assert( (src.type() == CV_32FC1 || src.type() == CV_32FC3) && src.data != dst.data ); + + if( sigma_color <= 0 ) + sigma_color = 1; + if( sigma_space <= 0 ) + sigma_space = 1; + + double gauss_color_coeff = -0.5/(sigma_color*sigma_color); + double gauss_space_coeff = -0.5/(sigma_space*sigma_space); + + if( d <= 0 ) + radius = cvRound(sigma_space*1.5); + else + radius = d/2; + radius = MAX(radius, 1); + d = radius*2 + 1; + // compute the min/max range for the input image (even if multichannel) + + minMaxLoc( src.reshape(1), &minValSrc, &maxValSrc ); + if(std::abs(minValSrc - maxValSrc) < FLT_EPSILON) + { + src.copyTo(dst); + return; + } + + // temporary copy of the image with borders for easy processing + Mat temp; + copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); + + // allocate lookup tables + std::vector _space_weight(d*d); + std::vector _space_ofs(d*d); + float* space_weight = &_space_weight[0]; + int* space_ofs = &_space_ofs[0]; + + // assign a length which is slightly more than needed + len = (float)(maxValSrc - minValSrc) * cn; + kExpNumBins = kExpNumBinsPerChannel * cn; + std::vector _expLUT(kExpNumBins+2); + float* expLUT = &_expLUT[0]; + + scale_index = kExpNumBins/len; + + // initialize the exp LUT + for( i = 0; i < kExpNumBins+2; i++ ) + { + if( lastExpVal > 0.f ) + { + double val = i / scale_index; + expLUT[i] = (float)std::exp(val * val * gauss_color_coeff); + lastExpVal = expLUT[i]; + } + else + expLUT[i] = 0.f; + } + + // initialize space-related bilateral filter coefficients + for( i = -radius, maxk = 0; i <= radius; i++ ) + for( j = -radius; j <= radius; j++ ) + { + double r = std::sqrt((double)i*i + (double)j*j); + if( r > radius || ( i == 0 && j == 0 ) ) + continue; + space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); + space_ofs[maxk++] = (int)(i*(temp.step/sizeof(float)) + j*cn); + } + + // parallel_for usage + CV_CPU_DISPATCH(bilateralFilterInvoker_32f, (cn, radius, maxk, space_ofs, temp, dst, scale_index, space_weight, expLUT), + CV_CPU_DISPATCH_MODES_ALL); +} + +#ifdef HAVE_IPP +#define IPP_BILATERAL_PARALLEL 1 + +#ifdef HAVE_IPP_IW +class ipp_bilateralFilterParallel: public ParallelLoopBody +{ +public: + ipp_bilateralFilterParallel(::ipp::IwiImage &_src, ::ipp::IwiImage &_dst, int _radius, Ipp32f _valSquareSigma, Ipp32f _posSquareSigma, ::ipp::IwiBorderType _borderType, bool *_ok): + src(_src), dst(_dst) + { + pOk = _ok; + + radius = _radius; + valSquareSigma = _valSquareSigma; + posSquareSigma = _posSquareSigma; + borderType = _borderType; + + *pOk = true; + } + ~ipp_bilateralFilterParallel() {} + + virtual void operator() (const Range& range) const CV_OVERRIDE + { + if(*pOk == false) + return; + + try + { + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile); + } + catch(const ::ipp::IwException &) + { + *pOk = false; + return; + } + } +private: + ::ipp::IwiImage &src; + ::ipp::IwiImage &dst; + + int radius; + Ipp32f valSquareSigma; + Ipp32f posSquareSigma; + ::ipp::IwiBorderType borderType; + + bool *pOk; + const ipp_bilateralFilterParallel& operator= (const ipp_bilateralFilterParallel&); +}; +#endif + +static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, double sigmaSpace, int borderType) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + + int radius = IPP_MAX(((d <= 0)?cvRound(sigmaSpace*1.5):d/2), 1); + Ipp32f valSquareSigma = (Ipp32f)((sigmaColor <= 0)?1:sigmaColor*sigmaColor); + Ipp32f posSquareSigma = (Ipp32f)((sigmaSpace <= 0)?1:sigmaSpace*sigmaSpace); + + // Acquire data and begin processing + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiBorderSize borderSize(radius); + ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); + if(!ippBorder) + return false; + + const int threads = ippiSuggestThreadsNum(iwDst, 2); + if(IPP_BILATERAL_PARALLEL && threads > 1) { + bool ok = true; + Range range(0, (int)iwDst.m_size.height); + ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok); + if(!ok) + return false; + + parallel_for_(range, invoker, threads*4); + + if(!ok) + return false; + } else { + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder); + } + } + catch (const ::ipp::IwException &) + { + return false; + } + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(d); CV_UNUSED(sigmaColor); CV_UNUSED(sigmaSpace); CV_UNUSED(borderType); + return false; +#endif +} +#endif + +void bilateralFilter( InputArray _src, OutputArray _dst, int d, + double sigmaColor, double sigmaSpace, + int borderType ) +{ + CV_INSTRUMENT_REGION(); + + _dst.create( _src.size(), _src.type() ); + + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), + ocl_bilateralFilter_8u(_src, _dst, d, sigmaColor, sigmaSpace, borderType)) + + Mat src = _src.getMat(), dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, borderType)); + + if( src.depth() == CV_8U ) + bilateralFilter_8u( src, dst, d, sigmaColor, sigmaSpace, borderType ); + else if( src.depth() == CV_32F ) + bilateralFilter_32f( src, dst, d, sigmaColor, sigmaSpace, borderType ); + else + CV_Error( CV_StsUnsupportedFormat, + "Bilateral filtering is only implemented for 8u and 32f images" ); +} + +} // namespace diff --git a/modules/imgproc/src/bilateral_filter.cpp b/modules/imgproc/src/bilateral_filter.simd.hpp similarity index 83% rename from modules/imgproc/src/bilateral_filter.cpp rename to modules/imgproc/src/bilateral_filter.simd.hpp index e9181f2182..65abcd4e40 100644 --- a/modules/imgproc/src/bilateral_filter.cpp +++ b/modules/imgproc/src/bilateral_filter.simd.hpp @@ -43,18 +43,25 @@ #include "precomp.hpp" -#include - #include "opencv2/core/hal/intrin.hpp" -#include "opencl_kernels_imgproc.hpp" /****************************************************************************************\ Bilateral Filtering \****************************************************************************************/ -namespace cv -{ +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +void bilateralFilterInvoker_8u( + Mat& dst, const Mat& temp, int radius, int maxk, + int* space_ofs, float *space_weight, float *color_weight); +void bilateralFilterInvoker_32f( + int cn, int radius, int maxk, int *space_ofs, + const Mat& temp, Mat& dst, float scale_index, float *space_weight, float *expLUT); +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace { class BilateralFilter_8u_Invoker : public ParallelLoopBody { @@ -68,6 +75,8 @@ public: virtual void operator() (const Range& range) const CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int i, j, cn = dest->channels(), k; Size size = dest->size(); @@ -536,161 +545,20 @@ private: float *space_weight, *color_weight; }; -#ifdef HAVE_OPENCL +} // namespace anon -static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, - double sigma_color, double sigma_space, - int borderType) +void bilateralFilterInvoker_8u( + Mat& dst, const Mat& temp, int radius, int maxk, + int* space_ofs, float *space_weight, float *color_weight) { -#ifdef __ANDROID__ - if (ocl::Device::getDefault().isNVidia()) - return false; -#endif - - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - int i, j, maxk, radius; - - if (depth != CV_8U || cn > 4) - return false; - - if (sigma_color <= 0) - sigma_color = 1; - if (sigma_space <= 0) - sigma_space = 1; - - double gauss_color_coeff = -0.5 / (sigma_color * sigma_color); - double gauss_space_coeff = -0.5 / (sigma_space * sigma_space); - - if ( d <= 0 ) - radius = cvRound(sigma_space * 1.5); - else - radius = d / 2; - radius = MAX(radius, 1); - d = radius * 2 + 1; - - UMat src = _src.getUMat(), dst = _dst.getUMat(), temp; - if (src.u == dst.u) - return false; - - copyMakeBorder(src, temp, radius, radius, radius, radius, borderType); - std::vector _space_weight(d * d); - std::vector _space_ofs(d * d); - float * const space_weight = &_space_weight[0]; - int * const space_ofs = &_space_ofs[0]; - - // initialize space-related bilateral filter coefficients - for( i = -radius, maxk = 0; i <= radius; i++ ) - for( j = -radius; j <= radius; j++ ) - { - double r = std::sqrt((double)i * i + (double)j * j); - if ( r > radius ) - continue; - space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff); - space_ofs[maxk++] = (int)(i * temp.step + j * cn); - } - - char cvt[3][40]; - String cnstr = cn > 1 ? format("%d", cn) : ""; - String kernelName("bilateral"); - size_t sizeDiv = 1; - if ((ocl::Device::getDefault().isIntel()) && - (ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU)) - { - //Intel GPU - if (dst.cols % 4 == 0 && cn == 1) // For single channel x4 sized images. - { - kernelName = "bilateral_float4"; - sizeDiv = 4; - } - } - ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc, - format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s" - " -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s -D gauss_color_coeff=(float)%f", - radius, maxk, cn, ocl::typeToStr(CV_32SC(cn)), cnstr.c_str(), - ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), - ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), - ocl::convertTypeStr(CV_32S, CV_32F, cn, cvt[1]), - ocl::convertTypeStr(CV_32F, CV_8U, cn, cvt[2]), gauss_color_coeff)); - if (k.empty()) - return false; - - Mat mspace_weight(1, d * d, CV_32FC1, space_weight); - Mat mspace_ofs(1, d * d, CV_32SC1, space_ofs); - UMat ucolor_weight, uspace_weight, uspace_ofs; - - mspace_weight.copyTo(uspace_weight); - mspace_ofs.copyTo(uspace_ofs); - - k.args(ocl::KernelArg::ReadOnlyNoSize(temp), ocl::KernelArg::WriteOnly(dst), - ocl::KernelArg::PtrReadOnly(uspace_weight), - ocl::KernelArg::PtrReadOnly(uspace_ofs)); - - size_t globalsize[2] = { (size_t)dst.cols / sizeDiv, (size_t)dst.rows }; - return k.run(2, globalsize, NULL, false); -} - -#endif -static void -bilateralFilter_8u( const Mat& src, Mat& dst, int d, - double sigma_color, double sigma_space, - int borderType ) -{ - int cn = src.channels(); - int i, j, maxk, radius; - Size size = src.size(); - - CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) && src.data != dst.data ); - - if( sigma_color <= 0 ) - sigma_color = 1; - if( sigma_space <= 0 ) - sigma_space = 1; - - double gauss_color_coeff = -0.5/(sigma_color*sigma_color); - double gauss_space_coeff = -0.5/(sigma_space*sigma_space); - - if( d <= 0 ) - radius = cvRound(sigma_space*1.5); - else - radius = d/2; - radius = MAX(radius, 1); - d = radius*2 + 1; - - Mat temp; - copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); - - std::vector _color_weight(cn*256); - std::vector _space_weight(d*d); - std::vector _space_ofs(d*d); - float* color_weight = &_color_weight[0]; - float* space_weight = &_space_weight[0]; - int* space_ofs = &_space_ofs[0]; - - // initialize color-related bilateral filter coefficients - - for( i = 0; i < 256*cn; i++ ) - color_weight[i] = (float)std::exp(i*i*gauss_color_coeff); - - // initialize space-related bilateral filter coefficients - for( i = -radius, maxk = 0; i <= radius; i++ ) - { - j = -radius; - - for( ; j <= radius; j++ ) - { - double r = std::sqrt((double)i*i + (double)j*j); - if( r > radius ) - continue; - space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); - space_ofs[maxk++] = (int)(i*temp.step + j*cn); - } - } - + CV_INSTRUMENT_REGION(); BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight); - parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16)); + parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16)); } +namespace { + class BilateralFilter_32f_Invoker : public ParallelLoopBody { @@ -705,6 +573,8 @@ public: virtual void operator() (const Range& range) const CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int i, j, k; Size size = dest->size(); @@ -1153,216 +1023,18 @@ private: float scale_index, *space_weight, *expLUT; }; +} // namespace anon -static void -bilateralFilter_32f( const Mat& src, Mat& dst, int d, - double sigma_color, double sigma_space, - int borderType ) -{ - int cn = src.channels(); - int i, j, maxk, radius; - double minValSrc=-1, maxValSrc=1; - const int kExpNumBinsPerChannel = 1 << 12; - int kExpNumBins = 0; - float lastExpVal = 1.f; - float len, scale_index; - Size size = src.size(); - - CV_Assert( (src.type() == CV_32FC1 || src.type() == CV_32FC3) && src.data != dst.data ); - - if( sigma_color <= 0 ) - sigma_color = 1; - if( sigma_space <= 0 ) - sigma_space = 1; - - double gauss_color_coeff = -0.5/(sigma_color*sigma_color); - double gauss_space_coeff = -0.5/(sigma_space*sigma_space); - - if( d <= 0 ) - radius = cvRound(sigma_space*1.5); - else - radius = d/2; - radius = MAX(radius, 1); - d = radius*2 + 1; - // compute the min/max range for the input image (even if multichannel) - - minMaxLoc( src.reshape(1), &minValSrc, &maxValSrc ); - if(std::abs(minValSrc - maxValSrc) < FLT_EPSILON) - { - src.copyTo(dst); - return; - } - - // temporary copy of the image with borders for easy processing - Mat temp; - copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); - - // allocate lookup tables - std::vector _space_weight(d*d); - std::vector _space_ofs(d*d); - float* space_weight = &_space_weight[0]; - int* space_ofs = &_space_ofs[0]; - - // assign a length which is slightly more than needed - len = (float)(maxValSrc - minValSrc) * cn; - kExpNumBins = kExpNumBinsPerChannel * cn; - std::vector _expLUT(kExpNumBins+2); - float* expLUT = &_expLUT[0]; - - scale_index = kExpNumBins/len; - - // initialize the exp LUT - for( i = 0; i < kExpNumBins+2; i++ ) - { - if( lastExpVal > 0.f ) - { - double val = i / scale_index; - expLUT[i] = (float)std::exp(val * val * gauss_color_coeff); - lastExpVal = expLUT[i]; - } - else - expLUT[i] = 0.f; - } - - // initialize space-related bilateral filter coefficients - for( i = -radius, maxk = 0; i <= radius; i++ ) - for( j = -radius; j <= radius; j++ ) - { - double r = std::sqrt((double)i*i + (double)j*j); - if( r > radius || ( i == 0 && j == 0 ) ) - continue; - space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); - space_ofs[maxk++] = (int)(i*(temp.step/sizeof(float)) + j*cn); - } - - // parallel_for usage - - BilateralFilter_32f_Invoker body(cn, radius, maxk, space_ofs, temp, dst, scale_index, space_weight, expLUT); - parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16)); -} - -#ifdef HAVE_IPP -#define IPP_BILATERAL_PARALLEL 1 - -#ifdef HAVE_IPP_IW -class ipp_bilateralFilterParallel: public ParallelLoopBody -{ -public: - ipp_bilateralFilterParallel(::ipp::IwiImage &_src, ::ipp::IwiImage &_dst, int _radius, Ipp32f _valSquareSigma, Ipp32f _posSquareSigma, ::ipp::IwiBorderType _borderType, bool *_ok): - src(_src), dst(_dst) - { - pOk = _ok; - - radius = _radius; - valSquareSigma = _valSquareSigma; - posSquareSigma = _posSquareSigma; - borderType = _borderType; - - *pOk = true; - } - ~ipp_bilateralFilterParallel() {} - - virtual void operator() (const Range& range) const CV_OVERRIDE - { - if(*pOk == false) - return; - - try - { - ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile); - } - catch(const ::ipp::IwException &) - { - *pOk = false; - return; - } - } -private: - ::ipp::IwiImage &src; - ::ipp::IwiImage &dst; - - int radius; - Ipp32f valSquareSigma; - Ipp32f posSquareSigma; - ::ipp::IwiBorderType borderType; - - bool *pOk; - const ipp_bilateralFilterParallel& operator= (const ipp_bilateralFilterParallel&); -}; -#endif - -static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, double sigmaSpace, int borderType) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - - int radius = IPP_MAX(((d <= 0)?cvRound(sigmaSpace*1.5):d/2), 1); - Ipp32f valSquareSigma = (Ipp32f)((sigmaColor <= 0)?1:sigmaColor*sigmaColor); - Ipp32f posSquareSigma = (Ipp32f)((sigmaSpace <= 0)?1:sigmaSpace*sigmaSpace); - - // Acquire data and begin processing - try - { - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - ::ipp::IwiBorderSize borderSize(radius); - ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder) - return false; - - const int threads = ippiSuggestThreadsNum(iwDst, 2); - if(IPP_BILATERAL_PARALLEL && threads > 1) { - bool ok = true; - Range range(0, (int)iwDst.m_size.height); - ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok); - if(!ok) - return false; - - parallel_for_(range, invoker, threads*4); - - if(!ok) - return false; - } else { - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder); - } - } - catch (const ::ipp::IwException &) - { - return false; - } - return true; -#else - CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(d); CV_UNUSED(sigmaColor); CV_UNUSED(sigmaSpace); CV_UNUSED(borderType); - return false; -#endif -} -#endif - -} - -void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d, - double sigmaColor, double sigmaSpace, - int borderType ) +void bilateralFilterInvoker_32f( + int cn, int radius, int maxk, int *space_ofs, + const Mat& temp, Mat& dst, float scale_index, float *space_weight, float *expLUT) { CV_INSTRUMENT_REGION(); - _dst.create( _src.size(), _src.type() ); - - CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), - ocl_bilateralFilter_8u(_src, _dst, d, sigmaColor, sigmaSpace, borderType)) - - Mat src = _src.getMat(), dst = _dst.getMat(); - - CV_IPP_RUN_FAST(ipp_bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, borderType)); - - if( src.depth() == CV_8U ) - bilateralFilter_8u( src, dst, d, sigmaColor, sigmaSpace, borderType ); - else if( src.depth() == CV_32F ) - bilateralFilter_32f( src, dst, d, sigmaColor, sigmaSpace, borderType ); - else - CV_Error( CV_StsUnsupportedFormat, - "Bilateral filtering is only implemented for 8u and 32f images" ); + BilateralFilter_32f_Invoker body(cn, radius, maxk, space_ofs, temp, dst, scale_index, space_weight, expLUT); + parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16)); } -/* End of file. */ +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/imgproc/src/box_filter.dispatch.cpp b/modules/imgproc/src/box_filter.dispatch.cpp new file mode 100644 index 0000000000..154ccfd09e --- /dev/null +++ b/modules/imgproc/src/box_filter.dispatch.cpp @@ -0,0 +1,557 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, 2018, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014-2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#include + +#include "opencv2/core/hal/intrin.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "opencv2/core/openvx/ovx_defs.hpp" + +#include "box_filter.simd.hpp" +#include "box_filter.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + + +namespace cv { + +#ifdef HAVE_OPENCL + +static bool ocl_boxFilter3x3_8UC1( InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, int borderType, bool normalize ) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + if (ddepth < 0) + ddepth = sdepth; + + if (anchor.x < 0) + anchor.x = ksize.width / 2; + if (anchor.y < 0) + anchor.y = ksize.height / 2; + + if ( !(dev.isIntel() && (type == CV_8UC1) && + (_src.offset() == 0) && (_src.step() % 4 == 0) && + (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0) && + (anchor.x == 1) && (anchor.y == 1) && + (ksize.width == 3) && (ksize.height == 3)) ) + return false; + + float alpha = 1.0f / (ksize.height * ksize.width); + Size size = _src.size(); + size_t globalsize[2] = { 0, 0 }; + size_t localsize[2] = { 0, 0 }; + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; + + globalsize[0] = size.width / 16; + globalsize[1] = size.height / 2; + + char build_opts[1024]; + sprintf(build_opts, "-D %s %s", borderMap[borderType], normalize ? "-D NORMALIZE" : ""); + + ocl::Kernel kernel("boxFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::boxFilter3x3_oclsrc, build_opts); + if (kernel.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(size, CV_MAKETYPE(ddepth, cn)); + if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) + return false; + UMat dst = _dst.getUMat(); + + int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); + idxArg = kernel.set(idxArg, (int)src.step); + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); + idxArg = kernel.set(idxArg, (int)dst.step); + idxArg = kernel.set(idxArg, (int)dst.rows); + idxArg = kernel.set(idxArg, (int)dst.cols); + if (normalize) + idxArg = kernel.set(idxArg, (float)alpha); + + return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); +} + +static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, int borderType, bool normalize, bool sqr = false ) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type); + bool doubleSupport = dev.doubleFPConfig() > 0; + + if (ddepth < 0) + ddepth = sdepth; + + if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) || + _src.offset() % esz != 0 || _src.step() % esz != 0) + return false; + + if (anchor.x < 0) + anchor.x = ksize.width / 2; + if (anchor.y < 0) + anchor.y = ksize.height / 2; + + int computeUnits = ocl::Device::getDefault().maxComputeUnits(); + float alpha = 1.0f / (ksize.height * ksize.width); + Size size = _src.size(), wholeSize; + bool isolated = (borderType & BORDER_ISOLATED) != 0; + borderType &= ~BORDER_ISOLATED; + int wdepth = std::max(CV_32F, std::max(ddepth, sdepth)), + wtype = CV_MAKE_TYPE(wdepth, cn), dtype = CV_MAKE_TYPE(ddepth, cn); + + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; + size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }; + size_t localsize_general[2] = { 0, 1 }, * localsize = NULL; + + UMat src = _src.getUMat(); + if (!isolated) + { + Point ofs; + src.locateROI(wholeSize, ofs); + } + + int h = isolated ? size.height : wholeSize.height; + int w = isolated ? size.width : wholeSize.width; + + size_t maxWorkItemSizes[32]; + ocl::Device::getDefault().maxWorkItemSizes(maxWorkItemSizes); + int tryWorkItems = (int)maxWorkItemSizes[0]; + + ocl::Kernel kernel; + + if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) && + ((ksize.width < 5 && ksize.height < 5 && esz <= 4) || + (ksize.width == 5 && ksize.height == 5 && cn == 1))) + { + if (w < ksize.width || h < ksize.height) + return false; + + // Figure out what vector size to use for loading the pixels. + int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4; + int pxLoadVecSize = cn * pxLoadNumPixels; + + // Figure out how many pixels per work item to compute in X and Y + // directions. Too many and we run out of registers. + int pxPerWorkItemX = 1, pxPerWorkItemY = 1; + if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4) + { + pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8; + pxPerWorkItemY = size.height % 2 ? 1 : 2; + } + else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4)) + { + pxPerWorkItemX = size.width % 2 ? 1 : 2; + pxPerWorkItemY = size.height % 2 ? 1 : 2; + } + globalsize[0] = size.width / pxPerWorkItemX; + globalsize[1] = size.height / pxPerWorkItemY; + + // Need some padding in the private array for pixels + int privDataWidth = roundUp(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels); + + // Make the global size a nice round number so the runtime can pick + // from reasonable choices for the workgroup size + const int wgRound = 256; + globalsize[0] = roundUp(globalsize[0], wgRound); + + char build_options[1024], cvt[2][40]; + sprintf(build_options, "-D cn=%d " + "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " + "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d " + "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s " + "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d " + "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " + "-D convertToWT=%s -D convertToDstT=%s%s%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D OP_BOX_FILTER", + cn, anchor.x, anchor.y, ksize.width, ksize.height, + pxLoadVecSize, pxLoadNumPixels, + pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType], + isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", + privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1, + ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), + ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), + ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), + normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "", + ocl::typeToStr(CV_MAKE_TYPE(wdepth, pxLoadVecSize)) //PX_LOAD_FLOAT_VEC_CONV + ); + + + if (!kernel.create("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, build_options)) + return false; + } + else + { + localsize = localsize_general; + for ( ; ; ) + { + int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = std::min(ksize.height * 10, size.height); + + while (BLOCK_SIZE_X > 32 && BLOCK_SIZE_X >= ksize.width * 2 && BLOCK_SIZE_X > size.width * 2) + BLOCK_SIZE_X /= 2; + while (BLOCK_SIZE_Y < BLOCK_SIZE_X / 8 && BLOCK_SIZE_Y * computeUnits * 32 < size.height) + BLOCK_SIZE_Y *= 2; + + if (ksize.width > BLOCK_SIZE_X || w < ksize.width || h < ksize.height) + return false; + + char cvt[2][50]; + String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s" + " -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s" + " -D ST1=%s -D DT1=%s -D cn=%d", + BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), + ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]), + anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType], + isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "", + normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "", + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn); + + localsize[0] = BLOCK_SIZE_X; + globalsize[0] = divUp(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X; + globalsize[1] = divUp(size.height, BLOCK_SIZE_Y); + + kernel.create("boxFilter", cv::ocl::imgproc::boxFilter_oclsrc, opts); + if (kernel.empty()) + return false; + + size_t kernelWorkGroupSize = kernel.workGroupSize(); + if (localsize[0] <= kernelWorkGroupSize) + break; + if (BLOCK_SIZE_X < (int)kernelWorkGroupSize) + return false; + + tryWorkItems = (int)kernelWorkGroupSize; + } + } + + _dst.create(size, CV_MAKETYPE(ddepth, cn)); + UMat dst = _dst.getUMat(); + + int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); + idxArg = kernel.set(idxArg, (int)src.step); + int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); + int srcOffsetY = (int)(src.offset / src.step); + int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width; + int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height; + idxArg = kernel.set(idxArg, srcOffsetX); + idxArg = kernel.set(idxArg, srcOffsetY); + idxArg = kernel.set(idxArg, srcEndX); + idxArg = kernel.set(idxArg, srcEndY); + idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst)); + if (normalize) + idxArg = kernel.set(idxArg, (float)alpha); + + return kernel.run(2, globalsize, localsize, false); +} + +#endif + +Ptr getRowSumFilter(int srcType, int sumType, int ksize, int anchor) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(getRowSumFilter, (srcType, sumType, ksize, anchor), + CV_CPU_DISPATCH_MODES_ALL); +} + + +Ptr getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(getColumnSumFilter, (sumType, dstType, ksize, anchor, scale), + CV_CPU_DISPATCH_MODES_ALL); +} + + +Ptr createBoxFilter(int srcType, int dstType, Size ksize, + Point anchor, bool normalize, int borderType) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(createBoxFilter, (srcType, dstType, ksize, anchor, normalize, borderType), + CV_CPU_DISPATCH_MODES_ALL); +} + +#ifdef HAVE_OPENVX + namespace ovx { + template <> inline bool skipSmallImages(int w, int h) { return w*h < 640 * 480; } + } + static bool openvx_boxfilter(InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, + bool normalize, int borderType) + { + if (ddepth < 0) + ddepth = CV_8UC1; + if (_src.type() != CV_8UC1 || ddepth != CV_8U || !normalize || + _src.cols() < 3 || _src.rows() < 3 || + ksize.width != 3 || ksize.height != 3 || + (anchor.x >= 0 && anchor.x != 1) || + (anchor.y >= 0 && anchor.y != 1) || + ovx::skipSmallImages(_src.cols(), _src.rows())) + return false; + + Mat src = _src.getMat(); + + if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix()) + return false; //Process isolated borders only + vx_enum border; + switch (borderType & ~BORDER_ISOLATED) + { + case BORDER_CONSTANT: + border = VX_BORDER_CONSTANT; + break; + case BORDER_REPLICATE: + border = VX_BORDER_REPLICATE; + break; + default: + return false; + } + + _dst.create(src.size(), CV_8UC1); + Mat dst = _dst.getMat(); + + try + { + ivx::Context ctx = ovx::getOpenVXContext(); + + Mat a; + if (dst.data != src.data) + a = src; + else + src.copyTo(a); + + ivx::Image + ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), + ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); + + //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments + //since OpenVX standard says nothing about thread-safety for now + ivx::border_t prevBorder = ctx.immediateBorder(); + ctx.setImmediateBorder(border, (vx_uint8)(0)); + ivx::IVX_CHECK_STATUS(vxuBox3x3(ctx, ia, ib)); + ctx.setImmediateBorder(prevBorder); + } + catch (const ivx::RuntimeError & e) + { + VX_DbgThrow(e.what()); + } + catch (const ivx::WrapperError & e) + { + VX_DbgThrow(e.what()); + } + + return true; + } +#endif + +#if defined(HAVE_IPP) +static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + +#if IPP_VERSION_X100 < 201801 + // Problem with SSE42 optimization for 16s and some 8u modes + if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5)))) + return false; + + // Other optimizations has some degradations too + if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5)))) + return false; +#endif + + if(!normalize) + return false; + + if(!ippiCheckAnchor(anchor, ksize)) + return false; + + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiSize iwKSize = ippiGetSize(ksize); + ::ipp::IwiBorderSize borderSize(iwKSize); + ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); + if(!ippBorder) + return false; + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder); + } + catch (const ::ipp::IwException &) + { + return false; + } + + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType); + return false; +#endif +} +#endif + + +void boxFilter(InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, + bool normalize, int borderType) +{ + CV_INSTRUMENT_REGION(); + + CV_OCL_RUN(_dst.isUMat() && + (borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || + borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101), + ocl_boxFilter3x3_8UC1(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) + + CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) + + Mat src = _src.getMat(); + int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); + if( ddepth < 0 ) + ddepth = sdepth; + _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); + Mat dst = _dst.getMat(); + if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 ) + { + if( src.rows == 1 ) + ksize.height = 1; + if( src.cols == 1 ) + ksize.width = 1; + } + + Point ofs; + Size wsz(src.cols, src.rows); + if(!(borderType&BORDER_ISOLATED)) + src.locateROI( wsz, ofs ); + + CALL_HAL(boxFilter, cv_hal_boxFilter, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn, + ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, + anchor.x, anchor.y, normalize, borderType&~BORDER_ISOLATED); + + CV_OVX_RUN(true, + openvx_boxfilter(src, dst, ddepth, ksize, anchor, normalize, borderType)) + + CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType)); + + borderType = (borderType&~BORDER_ISOLATED); + + Ptr f = createBoxFilter( src.type(), dst.type(), + ksize, anchor, normalize, borderType ); + + f->apply( src, dst, wsz, ofs ); +} + + +void blur(InputArray src, OutputArray dst, + Size ksize, Point anchor, int borderType) +{ + CV_INSTRUMENT_REGION(); + + boxFilter( src, dst, -1, ksize, anchor, true, borderType ); +} + + +/****************************************************************************************\ + Squared Box Filter +\****************************************************************************************/ + +static Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(getSqrRowSumFilter, (srcType, sumType, ksize, anchor), + CV_CPU_DISPATCH_MODES_ALL); +} + +void sqrBoxFilter(InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, + bool normalize, int borderType) +{ + CV_INSTRUMENT_REGION(); + + int srcType = _src.type(), sdepth = CV_MAT_DEPTH(srcType), cn = CV_MAT_CN(srcType); + Size size = _src.size(); + + if( ddepth < 0 ) + ddepth = sdepth < CV_32F ? CV_32F : CV_64F; + + if( borderType != BORDER_CONSTANT && normalize ) + { + if( size.height == 1 ) + ksize.height = 1; + if( size.width == 1 ) + ksize.width = 1; + } + + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, + ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize, true)) + + int sumDepth = CV_64F; + if( sdepth == CV_8U ) + sumDepth = CV_32S; + int sumType = CV_MAKETYPE( sumDepth, cn ), dstType = CV_MAKETYPE(ddepth, cn); + + Mat src = _src.getMat(); + _dst.create( size, dstType ); + Mat dst = _dst.getMat(); + + Ptr rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x ); + Ptr columnFilter = getColumnSumFilter(sumType, + dstType, ksize.height, anchor.y, + normalize ? 1./(ksize.width*ksize.height) : 1); + + Ptr f = makePtr(Ptr(), rowFilter, columnFilter, + srcType, dstType, sumType, borderType ); + Point ofs; + Size wsz(src.cols, src.rows); + src.locateROI( wsz, ofs ); + + f->apply( src, dst, wsz, ofs ); +} + +} // namespace diff --git a/modules/imgproc/src/box_filter.cpp b/modules/imgproc/src/box_filter.simd.hpp similarity index 69% rename from modules/imgproc/src/box_filter.cpp rename to modules/imgproc/src/box_filter.simd.hpp index e4405b4b69..4a4d205216 100644 --- a/modules/imgproc/src/box_filter.cpp +++ b/modules/imgproc/src/box_filter.simd.hpp @@ -42,21 +42,25 @@ //M*/ #include "precomp.hpp" - -#include - #include "opencv2/core/hal/intrin.hpp" -#include "opencl_kernels_imgproc.hpp" -#include "opencv2/core/openvx/ovx_defs.hpp" +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +Ptr getRowSumFilter(int srcType, int sumType, int ksize, int anchor); +Ptr getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale); +Ptr createBoxFilter(int srcType, int dstType, Size ksize, + Point anchor, bool normalize, int borderType); -namespace cv -{ +Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY /****************************************************************************************\ Box Filter \****************************************************************************************/ +namespace { template struct RowSum : public BaseRowFilter @@ -70,6 +74,8 @@ struct RowSum : virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + const T* S = (const T*)src; ST* D = (ST*)dst; int i = 0, k, ksz_cn = ksize*cn; @@ -183,6 +189,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int i; ST* SUM; bool haveScale = scale != 1; @@ -281,6 +289,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int* SUM; bool haveScale = scale != 1; double _scale = scale; @@ -408,9 +418,6 @@ struct ColumnSum : } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -452,6 +459,8 @@ public BaseColumnFilter virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + const int ds = divScale; const int dd = divDelta; ushort* SUM; @@ -586,9 +595,6 @@ public BaseColumnFilter } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -616,6 +622,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int i; int* SUM; bool haveScale = scale != 1; @@ -739,9 +747,6 @@ struct ColumnSum : } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -767,6 +772,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int* SUM; bool haveScale = scale != 1; double _scale = scale; @@ -888,9 +895,6 @@ struct ColumnSum : } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -915,6 +919,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int* SUM; bool haveScale = scale != 1; double _scale = scale; @@ -1022,9 +1028,6 @@ struct ColumnSum : } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -1050,6 +1053,8 @@ struct ColumnSum : virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int* SUM; bool haveScale = scale != 1; double _scale = scale; @@ -1154,9 +1159,6 @@ struct ColumnSum : } dst += dststep; } -#if CV_SIMD - vx_cleanup(); -#endif } double scale; @@ -1164,243 +1166,13 @@ struct ColumnSum : std::vector sum; }; -#ifdef HAVE_OPENCL +} // namespace anon -static bool ocl_boxFilter3x3_8UC1( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, int borderType, bool normalize ) + +Ptr getRowSumFilter(int srcType, int sumType, int ksize, int anchor) { - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_INSTRUMENT_REGION(); - if (ddepth < 0) - ddepth = sdepth; - - if (anchor.x < 0) - anchor.x = ksize.width / 2; - if (anchor.y < 0) - anchor.y = ksize.height / 2; - - if ( !(dev.isIntel() && (type == CV_8UC1) && - (_src.offset() == 0) && (_src.step() % 4 == 0) && - (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0) && - (anchor.x == 1) && (anchor.y == 1) && - (ksize.width == 3) && (ksize.height == 3)) ) - return false; - - float alpha = 1.0f / (ksize.height * ksize.width); - Size size = _src.size(); - size_t globalsize[2] = { 0, 0 }; - size_t localsize[2] = { 0, 0 }; - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; - - globalsize[0] = size.width / 16; - globalsize[1] = size.height / 2; - - char build_opts[1024]; - sprintf(build_opts, "-D %s %s", borderMap[borderType], normalize ? "-D NORMALIZE" : ""); - - ocl::Kernel kernel("boxFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::boxFilter3x3_oclsrc, build_opts); - if (kernel.empty()) - return false; - - UMat src = _src.getUMat(); - _dst.create(size, CV_MAKETYPE(ddepth, cn)); - if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) - return false; - UMat dst = _dst.getUMat(); - - int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); - idxArg = kernel.set(idxArg, (int)src.step); - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); - idxArg = kernel.set(idxArg, (int)dst.step); - idxArg = kernel.set(idxArg, (int)dst.rows); - idxArg = kernel.set(idxArg, (int)dst.cols); - if (normalize) - idxArg = kernel.set(idxArg, (float)alpha); - - return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); -} - -static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, int borderType, bool normalize, bool sqr = false ) -{ - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type); - bool doubleSupport = dev.doubleFPConfig() > 0; - - if (ddepth < 0) - ddepth = sdepth; - - if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) || - _src.offset() % esz != 0 || _src.step() % esz != 0) - return false; - - if (anchor.x < 0) - anchor.x = ksize.width / 2; - if (anchor.y < 0) - anchor.y = ksize.height / 2; - - int computeUnits = ocl::Device::getDefault().maxComputeUnits(); - float alpha = 1.0f / (ksize.height * ksize.width); - Size size = _src.size(), wholeSize; - bool isolated = (borderType & BORDER_ISOLATED) != 0; - borderType &= ~BORDER_ISOLATED; - int wdepth = std::max(CV_32F, std::max(ddepth, sdepth)), - wtype = CV_MAKE_TYPE(wdepth, cn), dtype = CV_MAKE_TYPE(ddepth, cn); - - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; - size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }; - size_t localsize_general[2] = { 0, 1 }, * localsize = NULL; - - UMat src = _src.getUMat(); - if (!isolated) - { - Point ofs; - src.locateROI(wholeSize, ofs); - } - - int h = isolated ? size.height : wholeSize.height; - int w = isolated ? size.width : wholeSize.width; - - size_t maxWorkItemSizes[32]; - ocl::Device::getDefault().maxWorkItemSizes(maxWorkItemSizes); - int tryWorkItems = (int)maxWorkItemSizes[0]; - - ocl::Kernel kernel; - - if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) && - ((ksize.width < 5 && ksize.height < 5 && esz <= 4) || - (ksize.width == 5 && ksize.height == 5 && cn == 1))) - { - if (w < ksize.width || h < ksize.height) - return false; - - // Figure out what vector size to use for loading the pixels. - int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4; - int pxLoadVecSize = cn * pxLoadNumPixels; - - // Figure out how many pixels per work item to compute in X and Y - // directions. Too many and we run out of registers. - int pxPerWorkItemX = 1, pxPerWorkItemY = 1; - if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4) - { - pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8; - pxPerWorkItemY = size.height % 2 ? 1 : 2; - } - else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4)) - { - pxPerWorkItemX = size.width % 2 ? 1 : 2; - pxPerWorkItemY = size.height % 2 ? 1 : 2; - } - globalsize[0] = size.width / pxPerWorkItemX; - globalsize[1] = size.height / pxPerWorkItemY; - - // Need some padding in the private array for pixels - int privDataWidth = roundUp(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels); - - // Make the global size a nice round number so the runtime can pick - // from reasonable choices for the workgroup size - const int wgRound = 256; - globalsize[0] = roundUp(globalsize[0], wgRound); - - char build_options[1024], cvt[2][40]; - sprintf(build_options, "-D cn=%d " - "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " - "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d " - "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s " - "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d " - "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " - "-D convertToWT=%s -D convertToDstT=%s%s%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D OP_BOX_FILTER", - cn, anchor.x, anchor.y, ksize.width, ksize.height, - pxLoadVecSize, pxLoadNumPixels, - pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType], - isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1, - ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), - ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), - ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), - normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "", - ocl::typeToStr(CV_MAKE_TYPE(wdepth, pxLoadVecSize)) //PX_LOAD_FLOAT_VEC_CONV - ); - - - if (!kernel.create("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, build_options)) - return false; - } - else - { - localsize = localsize_general; - for ( ; ; ) - { - int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = std::min(ksize.height * 10, size.height); - - while (BLOCK_SIZE_X > 32 && BLOCK_SIZE_X >= ksize.width * 2 && BLOCK_SIZE_X > size.width * 2) - BLOCK_SIZE_X /= 2; - while (BLOCK_SIZE_Y < BLOCK_SIZE_X / 8 && BLOCK_SIZE_Y * computeUnits * 32 < size.height) - BLOCK_SIZE_Y *= 2; - - if (ksize.width > BLOCK_SIZE_X || w < ksize.width || h < ksize.height) - return false; - - char cvt[2][50]; - String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s" - " -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s" - " -D ST1=%s -D DT1=%s -D cn=%d", - BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)), - ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), - ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]), - anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType], - isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "", - normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "", - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn); - - localsize[0] = BLOCK_SIZE_X; - globalsize[0] = divUp(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X; - globalsize[1] = divUp(size.height, BLOCK_SIZE_Y); - - kernel.create("boxFilter", cv::ocl::imgproc::boxFilter_oclsrc, opts); - if (kernel.empty()) - return false; - - size_t kernelWorkGroupSize = kernel.workGroupSize(); - if (localsize[0] <= kernelWorkGroupSize) - break; - if (BLOCK_SIZE_X < (int)kernelWorkGroupSize) - return false; - - tryWorkItems = (int)kernelWorkGroupSize; - } - } - - _dst.create(size, CV_MAKETYPE(ddepth, cn)); - UMat dst = _dst.getUMat(); - - int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); - idxArg = kernel.set(idxArg, (int)src.step); - int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); - int srcOffsetY = (int)(src.offset / src.step); - int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width; - int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height; - idxArg = kernel.set(idxArg, srcOffsetX); - idxArg = kernel.set(idxArg, srcOffsetY); - idxArg = kernel.set(idxArg, srcEndX); - idxArg = kernel.set(idxArg, srcEndY); - idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst)); - if (normalize) - idxArg = kernel.set(idxArg, (float)alpha); - - return kernel.run(2, globalsize, localsize, false); -} - -#endif - -} - - -cv::Ptr cv::getRowSumFilter(int srcType, int sumType, int ksize, int anchor) -{ int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType); CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) ); @@ -1434,9 +1206,10 @@ cv::Ptr cv::getRowSumFilter(int srcType, int sumType, int ksi } -cv::Ptr cv::getColumnSumFilter(int sumType, int dstType, int ksize, - int anchor, double scale) +Ptr getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale) { + CV_INSTRUMENT_REGION(); + int sdepth = CV_MAT_DEPTH(sumType), ddepth = CV_MAT_DEPTH(dstType); CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(dstType) ); @@ -1474,9 +1247,11 @@ cv::Ptr cv::getColumnSumFilter(int sumType, int dstType, i } -cv::Ptr cv::createBoxFilter( int srcType, int dstType, Size ksize, - Point anchor, bool normalize, int borderType ) +Ptr createBoxFilter(int srcType, int dstType, Size ksize, + Point anchor, bool normalize, int borderType) { + CV_INSTRUMENT_REGION(); + int sdepth = CV_MAT_DEPTH(srcType); int cn = CV_MAT_CN(srcType), sumType = CV_64F; if( sdepth == CV_8U && CV_MAT_DEPTH(dstType) == CV_8U && @@ -1496,201 +1271,11 @@ cv::Ptr cv::createBoxFilter( int srcType, int dstType, Size ks srcType, dstType, sumType, borderType ); } -#ifdef HAVE_OPENVX -namespace cv -{ - namespace ovx { - template <> inline bool skipSmallImages(int w, int h) { return w*h < 640 * 480; } - } - static bool openvx_boxfilter(InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, - bool normalize, int borderType) - { - if (ddepth < 0) - ddepth = CV_8UC1; - if (_src.type() != CV_8UC1 || ddepth != CV_8U || !normalize || - _src.cols() < 3 || _src.rows() < 3 || - ksize.width != 3 || ksize.height != 3 || - (anchor.x >= 0 && anchor.x != 1) || - (anchor.y >= 0 && anchor.y != 1) || - ovx::skipSmallImages(_src.cols(), _src.rows())) - return false; - - Mat src = _src.getMat(); - - if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix()) - return false; //Process isolated borders only - vx_enum border; - switch (borderType & ~BORDER_ISOLATED) - { - case BORDER_CONSTANT: - border = VX_BORDER_CONSTANT; - break; - case BORDER_REPLICATE: - border = VX_BORDER_REPLICATE; - break; - default: - return false; - } - - _dst.create(src.size(), CV_8UC1); - Mat dst = _dst.getMat(); - - try - { - ivx::Context ctx = ovx::getOpenVXContext(); - - Mat a; - if (dst.data != src.data) - a = src; - else - src.copyTo(a); - - ivx::Image - ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), - ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); - - //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments - //since OpenVX standard says nothing about thread-safety for now - ivx::border_t prevBorder = ctx.immediateBorder(); - ctx.setImmediateBorder(border, (vx_uint8)(0)); - ivx::IVX_CHECK_STATUS(vxuBox3x3(ctx, ia, ib)); - ctx.setImmediateBorder(prevBorder); - } - catch (const ivx::RuntimeError & e) - { - VX_DbgThrow(e.what()); - } - catch (const ivx::WrapperError & e) - { - VX_DbgThrow(e.what()); - } - - return true; - } -} -#endif - -#if defined(HAVE_IPP) && OPENCV_IPP_REDUCE_SIZE == 0 -namespace cv -{ -static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - -#if IPP_VERSION_X100 < 201801 - // Problem with SSE42 optimization for 16s and some 8u modes - if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5)))) - return false; - - // Other optimizations has some degradations too - if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5)))) - return false; -#endif - - if(!normalize) - return false; - - if(!ippiCheckAnchor(anchor, ksize)) - return false; - - try - { - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - ::ipp::IwiSize iwKSize = ippiGetSize(ksize); - ::ipp::IwiBorderSize borderSize(iwKSize); - ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder) - return false; - - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder); - } - catch (const ::ipp::IwException &) - { - return false; - } - - return true; -#else - CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType); - return false; -#endif -} -} -#endif - - -void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, - bool normalize, int borderType ) -{ - CV_INSTRUMENT_REGION(); - - CV_OCL_RUN(_dst.isUMat() && - (borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || - borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101), - ocl_boxFilter3x3_8UC1(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) - - CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) - - Mat src = _src.getMat(); - int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); - if( ddepth < 0 ) - ddepth = sdepth; - _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); - Mat dst = _dst.getMat(); - if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 ) - { - if( src.rows == 1 ) - ksize.height = 1; - if( src.cols == 1 ) - ksize.width = 1; - } - - Point ofs; - Size wsz(src.cols, src.rows); - if(!(borderType&BORDER_ISOLATED)) - src.locateROI( wsz, ofs ); - - CALL_HAL(boxFilter, cv_hal_boxFilter, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn, - ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, - anchor.x, anchor.y, normalize, borderType&~BORDER_ISOLATED); - - CV_OVX_RUN(true, - openvx_boxfilter(src, dst, ddepth, ksize, anchor, normalize, borderType)) - -#if OPENCV_IPP_REDUCE_SIZE == 0 - CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType)); -#endif - - borderType = (borderType&~BORDER_ISOLATED); - - Ptr f = createBoxFilter( src.type(), dst.type(), - ksize, anchor, normalize, borderType ); - - f->apply( src, dst, wsz, ofs ); -} - - -void cv::blur( InputArray src, OutputArray dst, - Size ksize, Point anchor, int borderType ) -{ - CV_INSTRUMENT_REGION(); - - boxFilter( src, dst, -1, ksize, anchor, true, borderType ); -} - /****************************************************************************************\ Squared Box Filter \****************************************************************************************/ - -namespace cv -{ +namespace { template struct SqrRowSum : @@ -1705,6 +1290,8 @@ struct SqrRowSum : virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + const T* S = (const T*)src; ST* D = (ST*)dst; int i = 0, k, ksz_cn = ksize*cn; @@ -1729,7 +1316,9 @@ struct SqrRowSum : } }; -static Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor) +} // namespace anon + +Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor) { int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType); CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) ); @@ -1755,52 +1344,6 @@ static Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize srcType, sumType)); } -} - -void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, - bool normalize, int borderType ) -{ - CV_INSTRUMENT_REGION(); - - int srcType = _src.type(), sdepth = CV_MAT_DEPTH(srcType), cn = CV_MAT_CN(srcType); - Size size = _src.size(); - - if( ddepth < 0 ) - ddepth = sdepth < CV_32F ? CV_32F : CV_64F; - - if( borderType != BORDER_CONSTANT && normalize ) - { - if( size.height == 1 ) - ksize.height = 1; - if( size.width == 1 ) - ksize.width = 1; - } - - CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, - ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize, true)) - - int sumDepth = CV_64F; - if( sdepth == CV_8U ) - sumDepth = CV_32S; - int sumType = CV_MAKETYPE( sumDepth, cn ), dstType = CV_MAKETYPE(ddepth, cn); - - Mat src = _src.getMat(); - _dst.create( size, dstType ); - Mat dst = _dst.getMat(); - - Ptr rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x ); - Ptr columnFilter = getColumnSumFilter(sumType, - dstType, ksize.height, anchor.y, - normalize ? 1./(ksize.width*ksize.height) : 1); - - Ptr f = makePtr(Ptr(), rowFilter, columnFilter, - srcType, dstType, sumType, borderType ); - Point ofs; - Size wsz(src.cols, src.rows); - src.locateROI( wsz, ofs ); - - f->apply( src, dst, wsz, ofs ); -} - -/* End of file. */ +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 4370339bd4..cc4dd13c92 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" +#include "opencl_kernels_imgproc.hpp" #include "color.hpp" namespace cv diff --git a/modules/imgproc/src/color.hpp b/modules/imgproc/src/color.hpp index a327aafefd..8c1f19fa8a 100644 --- a/modules/imgproc/src/color.hpp +++ b/modules/imgproc/src/color.hpp @@ -3,63 +3,17 @@ // of this distribution and at http://opencv.org/license.html #include "opencv2/imgproc.hpp" -#include "opencv2/core/utility.hpp" -#include -#include "opencl_kernels_imgproc.hpp" #include "hal_replacement.hpp" -#include "opencv2/core/hal/intrin.hpp" -#include "opencv2/core/softfloat.hpp" -#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) - -namespace cv -{ - -//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601 -const float B2YF = 0.114f; -const float G2YF = 0.587f; -const float R2YF = 0.299f; - -enum -{ - gray_shift = 15, - yuv_shift = 14, - xyz_shift = 12, - R2Y = 4899, // == R2YF*16384 - G2Y = 9617, // == G2YF*16384 - B2Y = 1868, // == B2YF*16384 - RY15 = 9798, // == R2YF*32768 + 0.5 - GY15 = 19235, // == G2YF*32768 + 0.5 - BY15 = 3735, // == B2YF*32768 + 0.5 - BLOCK_SIZE = 256 -}; - -template struct ColorChannel -{ - typedef float worktype_f; - static _Tp max() { return std::numeric_limits<_Tp>::max(); } - static _Tp half() { return (_Tp)(max()/2 + 1); } -}; - -template<> struct ColorChannel -{ - typedef float worktype_f; - static float max() { return 1.f; } - static float half() { return 0.5f; } -}; - -/*template<> struct ColorChannel -{ - typedef double worktype_f; - static double max() { return 1.; } - static double half() { return 0.5; } -};*/ +namespace cv { // // Helper functions // -namespace { +namespace impl { + +#include "color.simd_helpers.hpp" inline bool isHSV(int code) { @@ -213,40 +167,9 @@ inline int uIndex(int code) } } // namespace:: +using namespace impl; -template -struct Set -{ - static bool contains(int i) - { - return (i == i0 || i == i1 || i == i2); - } -}; - -template -struct Set -{ - static bool contains(int i) - { - return (i == i0 || i == i1); - } -}; - -template -struct Set -{ - static bool contains(int i) - { - return (i == i0); - } -}; - -enum SizePolicy -{ - TO_YUV, FROM_YUV, NONE -}; - -template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE > +/*template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE > struct CvtHelper { CvtHelper(InputArray _src, OutputArray _dst, int dcn) @@ -286,7 +209,7 @@ struct CvtHelper Mat src, dst; int depth, scn; Size dstSz; -}; +};*/ #ifdef HAVE_OPENCL @@ -384,49 +307,7 @@ struct OclHelper #endif -///////////////////////////// Top-level template function //////////////////////////////// -template -class CvtColorLoop_Invoker : public ParallelLoopBody -{ - typedef typename Cvt::channel_type _Tp; -public: - - CvtColorLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt) : - ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), - width(width_), cvt(_cvt) - { - } - - virtual void operator()(const Range& range) const CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - - const uchar* yS = src_data + static_cast(range.start) * src_step; - uchar* yD = dst_data + static_cast(range.start) * dst_step; - - for( int i = range.start; i < range.end; ++i, yS += src_step, yD += dst_step ) - cvt(reinterpret_cast(yS), reinterpret_cast<_Tp*>(yD), width); - } - -private: - const uchar * src_data; - const size_t src_step; - uchar * dst_data; - const size_t dst_step; - const int width; - const Cvt& cvt; - - const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&); -}; - -template -void CvtColorLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt) -{ - parallel_for_(Range(0, height), - CvtColorLoop_Invoker(src_data, src_step, dst_data, dst_step, width, cvt), - (width * height) / static_cast(1<<16)); -} #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700) # define NEED_IPP 1 diff --git a/modules/imgproc/src/color.simd_helpers.hpp b/modules/imgproc/src/color.simd_helpers.hpp new file mode 100644 index 0000000000..47f00fda20 --- /dev/null +++ b/modules/imgproc/src/color.simd_helpers.hpp @@ -0,0 +1,171 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) + +namespace { + +//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601 +static const float B2YF = 0.114f; +static const float G2YF = 0.587f; +static const float R2YF = 0.299f; + +enum +{ + gray_shift = 15, + yuv_shift = 14, + xyz_shift = 12, + R2Y = 4899, // == R2YF*16384 + G2Y = 9617, // == G2YF*16384 + B2Y = 1868, // == B2YF*16384 + RY15 = 9798, // == R2YF*32768 + 0.5 + GY15 = 19235, // == G2YF*32768 + 0.5 + BY15 = 3735, // == B2YF*32768 + 0.5 + BLOCK_SIZE = 256 +}; + +template struct ColorChannel +{ + typedef float worktype_f; + static inline _Tp max() { return std::numeric_limits<_Tp>::max(); } + static inline _Tp half() { return (_Tp)(max()/2 + 1); } +}; + +template<> struct ColorChannel +{ + typedef float worktype_f; + static inline float max() { return 1.f; } + static inline float half() { return 0.5f; } +}; + +/*template<> struct ColorChannel +{ + typedef double worktype_f; + static double max() { return 1.; } + static double half() { return 0.5; } +};*/ + + +template +struct Set +{ + static inline bool contains(int i) + { + return (i == i0 || i == i1 || i == i2); + } +}; + +template +struct Set +{ + static inline bool contains(int i) + { + return (i == i0 || i == i1); + } +}; + +template +struct Set +{ + static inline bool contains(int i) + { + return (i == i0); + } +}; + +enum SizePolicy +{ + TO_YUV, FROM_YUV, NONE +}; + +template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE > +struct CvtHelper +{ + CvtHelper(InputArray _src, OutputArray _dst, int dcn) + { + CV_Assert(!_src.empty()); + + int stype = _src.type(); + scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype); + + CV_Check(scn, VScn::contains(scn), "Invalid number of channels in input image"); + CV_Check(dcn, VDcn::contains(dcn), "Invalid number of channels in output image"); + CV_CheckDepth(depth, VDepth::contains(depth), "Unsupported depth of input image"); + + if (_src.getObj() == _dst.getObj()) // inplace processing (#6653) + _src.copyTo(src); + else + src = _src.getMat(); + Size sz = src.size(); + switch (sizePolicy) + { + case TO_YUV: + CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0); + dstSz = Size(sz.width, sz.height / 2 * 3); + break; + case FROM_YUV: + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0); + dstSz = Size(sz.width, sz.height * 2 / 3); + break; + case NONE: + default: + dstSz = sz; + break; + } + _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + dst = _dst.getMat(); + } + Mat src, dst; + int depth, scn; + Size dstSz; +}; + + +///////////////////////////// Top-level template function //////////////////////////////// + +template +class CvtColorLoop_Invoker : public ParallelLoopBody +{ + typedef typename Cvt::channel_type _Tp; +public: + + CvtColorLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt) : + ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), + width(width_), cvt(_cvt) + { + } + + virtual void operator()(const Range& range) const CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + const uchar* yS = src_data + static_cast(range.start) * src_step; + uchar* yD = dst_data + static_cast(range.start) * dst_step; + + for( int i = range.start; i < range.end; ++i, yS += src_step, yD += dst_step ) + cvt(reinterpret_cast(yS), reinterpret_cast<_Tp*>(yD), width); + } + +private: + const uchar * src_data; + const size_t src_step; + uchar * dst_data; + const size_t dst_step; + const int width; + const Cvt& cvt; + + CvtColorLoop_Invoker(const CvtColorLoop_Invoker&); // = delete; + const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&); // = delete; +}; + +template static inline +void CvtColorLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt) +{ + CV_AVX_GUARD + parallel_for_(Range(0, height), + CvtColorLoop_Invoker(src_data, src_step, dst_data, dst_step, width, cvt), + (width * height) / static_cast(1<<16)); +} + +} //namespace diff --git a/modules/imgproc/src/color_hsv.dispatch.cpp b/modules/imgproc/src/color_hsv.dispatch.cpp new file mode 100644 index 0000000000..f1678f5deb --- /dev/null +++ b/modules/imgproc/src/color_hsv.dispatch.cpp @@ -0,0 +1,358 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" + +#include "opencl_kernels_imgproc.hpp" + +#include "color.hpp" + +#include "color_hsv.simd.hpp" +#include "color_hsv.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +// +// IPP functions +// + +#if NEED_IPP + +#if !IPP_DISABLE_RGB_HSV +static ippiGeneralFunc ippiRGB2HSVTab[] = +{ + (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, + 0, 0, 0, 0 +}; +#endif + +static ippiGeneralFunc ippiHSV2RGBTab[] = +{ + (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0, + 0, 0, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2HLSTab[] = +{ + (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0 +}; + +static ippiGeneralFunc ippiHLS2RGBTab[] = +{ + (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0 +}; + +#endif + +// +// HAL functions +// + +namespace hal +{ + +// 8u, 32f +void cvtBGRtoHSV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoHSV, cv_hal_cvtBGRtoHSV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isFullRange, isHSV); + +#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 + CV_IPP_CHECK() + { + if(depth == CV_8U && isFullRange) + { + if (isHSV) + { +#if !IPP_DISABLE_RGB_HSV // breaks OCL accuracy tests + if(scn == 3 && !swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(scn == 4 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(scn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) + return; + } +#endif + } + else + { + if(scn == 3 && !swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(scn == 4 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(scn == 3 && swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) + return; + } + else if(scn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) + return; + } + } + } + } +#endif + + CV_CPU_DISPATCH(cvtBGRtoHSV, (src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isFullRange, isHSV), + CV_CPU_DISPATCH_MODES_ALL); +} + +// 8u, 32f +void cvtHSVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtHSVtoBGR, cv_hal_cvtHSVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isFullRange, isHSV); + +#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 + CV_IPP_CHECK() + { + if (depth == CV_8U && isFullRange) + { + if (isHSV) + { + if(dcn == 3 && !swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(dcn == 4 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(dcn == 3 && swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) + return; + } + else if(dcn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) + return; + } + } + else + { + if(dcn == 3 && !swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(dcn == 4 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if(dcn == 3 && swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) + return; + } + else if(dcn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) + return; + } + } + } + } +#endif + + CV_CPU_DISPATCH(cvtHSVtoBGR, (src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isFullRange, isHSV), + CV_CPU_DISPATCH_MODES_ALL); +} + +} // namespace hal + +// +// OCL calls +// + +#ifdef HAVE_OPENCL + +bool oclCvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full ) +{ + OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); + + if(!h.createKernel("HSV2RGB", ocl::imgproc::color_hsv_oclsrc, + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full ) +{ + OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); + + if(!h.createKernel("HLS2RGB", ocl::imgproc::color_hsv_oclsrc, + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2HLS( InputArray _src, OutputArray _dst, int bidx, bool full ) +{ + OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + float hscale = (_src.depth() == CV_32F ? 360.f : (!full ? 180.f : 256.f))/360.f; + + if(!h.createKernel("RGB2HLS", ocl::imgproc::color_hsv_oclsrc, + format("-D hscale=%ff -D bidx=%d -D dcn=3", hscale, bidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full ) +{ + OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 256); + + cv::String options = (_src.depth() == CV_8U ? + format("-D hrange=%d -D bidx=%d -D dcn=3", hrange, bidx) : + format("-D hscale=%ff -D bidx=%d -D dcn=3", hrange*(1.f/360.f), bidx)); + + if(!h.createKernel("RGB2HSV", ocl::imgproc::color_hsv_oclsrc, options)) + { + return false; + } + + if(_src.depth() == CV_8U) + { + static UMat sdiv_data; + static UMat hdiv_data180; + static UMat hdiv_data256; + static int sdiv_table[256]; + static int hdiv_table180[256]; + static int hdiv_table256[256]; + static volatile bool initialized180 = false, initialized256 = false; + volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; + + if (!initialized) + { + int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; + UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; + + sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; + + int v = 255 << hsv_shift; + if (!initialized180 && !initialized256) + { + for(int i = 1; i < 256; i++ ) + sdiv_table[i] = saturate_cast(v/(1.*i)); + Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data); + } + + v = hrange << hsv_shift; + for (int i = 1; i < 256; i++ ) + hdiv_table[i] = saturate_cast(v/(6.*i)); + + Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data); + initialized = true; + } + + h.setArg(ocl::KernelArg::PtrReadOnly(sdiv_data)); + h.setArg(hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) : + ocl::KernelArg::PtrReadOnly(hdiv_data180)); + } + + return h.run(); +} + +#endif + +// +// HAL calls +// + +void cvtColorBGR2HLS( InputArray _src, OutputArray _dst, bool swapb, bool fullRange ) +{ + CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + hal::cvtBGRtoHSV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, h.scn, swapb, fullRange, false); +} + +void cvtColorBGR2HSV( InputArray _src, OutputArray _dst, bool swapb, bool fullRange ) +{ + CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + hal::cvtBGRtoHSV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, h.scn, swapb, fullRange, true); +} + +void cvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtHSVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, dcn, swapb, fullRange, false); +} + +void cvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtHSVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, dcn, swapb, fullRange, true); +} + + +} // namespace cv diff --git a/modules/imgproc/src/color_hsv.cpp b/modules/imgproc/src/color_hsv.simd.hpp similarity index 78% rename from modules/imgproc/src/color_hsv.cpp rename to modules/imgproc/src/color_hsv.simd.hpp index f0a4c87558..30ae7064bc 100644 --- a/modules/imgproc/src/color_hsv.cpp +++ b/modules/imgproc/src/color_hsv.simd.hpp @@ -3,11 +3,31 @@ // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" -#include "color.hpp" +#include "opencv2/core/hal/intrin.hpp" -namespace cv -{ +namespace cv { +namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +void cvtBGRtoHSV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV); +void cvtHSVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#if defined(CV_CPU_BASELINE_MODE) +// included in color.hpp +#else +#include "color.simd_helpers.hpp" +#endif + +namespace { ////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// @@ -1192,46 +1212,7 @@ struct HLS2RGB_b #endif }; -// -// IPP functions -// - -#if NEED_IPP - -#if !IPP_DISABLE_RGB_HSV -static ippiGeneralFunc ippiRGB2HSVTab[] = -{ - (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, - 0, 0, 0, 0 -}; -#endif - -static ippiGeneralFunc ippiHSV2RGBTab[] = -{ - (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0, - 0, 0, 0, 0 -}; - -static ippiGeneralFunc ippiRGB2HLSTab[] = -{ - (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0, - 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0 -}; - -static ippiGeneralFunc ippiHLS2RGBTab[] = -{ - (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0, - 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0 -}; - -#endif - -// -// HAL functions -// - -namespace hal -{ +} // namespace anon // 8u, 32f void cvtBGRtoHSV(const uchar * src_data, size_t src_step, @@ -1241,67 +1222,6 @@ void cvtBGRtoHSV(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoHSV, cv_hal_cvtBGRtoHSV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isFullRange, isHSV); - -#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 - CV_IPP_CHECK() - { - if(depth == CV_8U && isFullRange) - { - if (isHSV) - { -#if !IPP_DISABLE_RGB_HSV // breaks OCL accuracy tests - if(scn == 3 && !swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(scn == 4 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(scn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) - return; - } -#endif - } - else - { - if(scn == 3 && !swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(scn == 4 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(scn == 3 && swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKE_TYPE(depth, scn), dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) - return; - } - else if(scn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) - return; - } - } - } - } -#endif - int hrange = depth == CV_32F ? 360 : isFullRange ? 256 : 180; int blueIdx = swapBlue ? 2 : 0; if(isHSV) @@ -1322,77 +1242,12 @@ void cvtBGRtoHSV(const uchar * src_data, size_t src_step, // 8u, 32f void cvtHSVtoBGR(const uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV) + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV) { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtHSVtoBGR, cv_hal_cvtHSVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isFullRange, isHSV); - -#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 - CV_IPP_CHECK() - { - if (depth == CV_8U && isFullRange) - { - if (isHSV) - { - if(dcn == 3 && !swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(dcn == 4 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(dcn == 3 && swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) - return; - } - else if(dcn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) - return; - } - } - else - { - if(dcn == 3 && !swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(dcn == 4 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) - return; - } - else if(dcn == 3 && swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, 3), dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) - return; - } - else if(dcn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) - return; - } - } - } - } -#endif - int hrange = depth == CV_32F ? 360 : isFullRange ? 255 : 180; int blueIdx = swapBlue ? 2 : 0; if(isHSV) @@ -1411,155 +1266,6 @@ void cvtHSVtoBGR(const uchar * src_data, size_t src_step, } } -} // namespace hal - -// -// OCL calls -// - -#ifdef HAVE_OPENCL - -bool oclCvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full ) -{ - OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); - - if(!h.createKernel("HSV2RGB", ocl::imgproc::color_hsv_oclsrc, - format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full ) -{ - OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); - - if(!h.createKernel("HLS2RGB", ocl::imgproc::color_hsv_oclsrc, - format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2HLS( InputArray _src, OutputArray _dst, int bidx, bool full ) -{ - OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - float hscale = (_src.depth() == CV_32F ? 360.f : (!full ? 180.f : 256.f))/360.f; - - if(!h.createKernel("RGB2HLS", ocl::imgproc::color_hsv_oclsrc, - format("-D hscale=%ff -D bidx=%d -D dcn=3", hscale, bidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full ) -{ - OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 256); - - cv::String options = (_src.depth() == CV_8U ? - format("-D hrange=%d -D bidx=%d -D dcn=3", hrange, bidx) : - format("-D hscale=%ff -D bidx=%d -D dcn=3", hrange*(1.f/360.f), bidx)); - - if(!h.createKernel("RGB2HSV", ocl::imgproc::color_hsv_oclsrc, options)) - { - return false; - } - - if(_src.depth() == CV_8U) - { - static UMat sdiv_data; - static UMat hdiv_data180; - static UMat hdiv_data256; - static int sdiv_table[256]; - static int hdiv_table180[256]; - static int hdiv_table256[256]; - static volatile bool initialized180 = false, initialized256 = false; - volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; - - if (!initialized) - { - int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; - UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; - - sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; - - int v = 255 << hsv_shift; - if (!initialized180 && !initialized256) - { - for(int i = 1; i < 256; i++ ) - sdiv_table[i] = saturate_cast(v/(1.*i)); - Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data); - } - - v = hrange << hsv_shift; - for (int i = 1; i < 256; i++ ) - hdiv_table[i] = saturate_cast(v/(6.*i)); - - Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data); - initialized = true; - } - - h.setArg(ocl::KernelArg::PtrReadOnly(sdiv_data)); - h.setArg(hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) : - ocl::KernelArg::PtrReadOnly(hdiv_data180)); - } - - return h.run(); -} - #endif - -// -// HAL calls -// - -void cvtColorBGR2HLS( InputArray _src, OutputArray _dst, bool swapb, bool fullRange ) -{ - CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - hal::cvtBGRtoHSV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, swapb, fullRange, false); -} - -void cvtColorBGR2HSV( InputArray _src, OutputArray _dst, bool swapb, bool fullRange ) -{ - CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - hal::cvtBGRtoHSV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, swapb, fullRange, true); -} - -void cvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtHSVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, dcn, swapb, fullRange, false); -} - -void cvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtHSVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, dcn, swapb, fullRange, true); -} - - -} // namespace cv +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace diff --git a/modules/imgproc/src/color_lab.cpp b/modules/imgproc/src/color_lab.cpp index 0fff89358c..cb5c0fdf53 100644 --- a/modules/imgproc/src/color_lab.cpp +++ b/modules/imgproc/src/color_lab.cpp @@ -9,6 +9,10 @@ \**********************************************************************************/ #include "precomp.hpp" +#include "opencl_kernels_imgproc.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "opencv2/core/softfloat.hpp" + #include "color.hpp" using cv::softfloat; diff --git a/modules/imgproc/src/color_rgb.dispatch.cpp b/modules/imgproc/src/color_rgb.dispatch.cpp new file mode 100644 index 0000000000..ed2961f0fb --- /dev/null +++ b/modules/imgproc/src/color_rgb.dispatch.cpp @@ -0,0 +1,619 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "color.hpp" + +#include "color_rgb.simd.hpp" +#include "color_rgb.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +#define IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 1 + +namespace cv { + +// +// IPP functions +// + +#if NEED_IPP + +static const ippiColor2GrayFunc ippiColor2GrayC3Tab[] = +{ + (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, + 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 +}; + +static const ippiColor2GrayFunc ippiColor2GrayC4Tab[] = +{ + (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0, + 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0 +}; + +static const ippiGeneralFunc ippiRGB2GrayC3Tab[] = +{ + (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0, + 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0 +}; + +static const ippiGeneralFunc ippiRGB2GrayC4Tab[] = +{ + (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0, + 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 +}; + + +#if !IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 +static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} +#endif +static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} +static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} + +static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} +static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} +static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} + +struct IPPColor2GrayFunctor +{ + IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : + ippiColorToGray(_func) + { + coeffs[0] = B2YF; + coeffs[1] = G2YF; + coeffs[2] = R2YF; + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return ippiColorToGray ? CV_INSTRUMENT_FUN_IPP(ippiColorToGray, src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false; + } +private: + ippiColor2GrayFunc ippiColorToGray; + Ipp32f coeffs[3]; +}; + +template +struct IPPGray2BGRFunctor +{ + IPPGray2BGRFunctor(){} + + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0; + } +}; + +template +struct IPPGray2BGRAFunctor +{ + IPPGray2BGRAFunctor() + { + alpha = ColorChannel::max(); + } + + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0; + } + + T alpha; +}; + +static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_8u_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u); +} + +static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_16u_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u); +} + +static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_32f_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f); +} + +// shared +ippiReorderFunc ippiSwapChannelsC3C4RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0 +}; + +static ippiGeneralFunc ippiCopyAC4C3RTab[] = +{ + (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0, + 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0 +}; + +// shared +ippiReorderFunc ippiSwapChannelsC4C3RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0 +}; + +// shared +ippiReorderFunc ippiSwapChannelsC3RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 +}; + +#if IPP_VERSION_X100 >= 810 +static ippiReorderFunc ippiSwapChannelsC4RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0 +}; +#endif + +#endif + +// +// HAL functions +// + +namespace hal { + +// 8u, 16u, 32f +void cvtBGRtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, int dcn, bool swapBlue) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoBGR, cv_hal_cvtBGRtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, scn, dcn, swapBlue); + +#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 + CV_IPP_CHECK() + { + if(scn == 3 && dcn == 4 && !swapBlue) + { + if ( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) + return; + } + else if(scn == 4 && dcn == 3 && !swapBlue) + { + if ( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) ) + return; + } + else if(scn == 3 && dcn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) ) + return; + } + else if(scn == 4 && dcn == 3 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) ) + return; + } + else if(scn == 3 && dcn == 3 && swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, scn), dst_data, dst_step, width, height, + IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) + return; + } +#if IPP_VERSION_X100 >= 810 + else if(scn == 4 && dcn == 4 && swapBlue) + { + if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, scn), dst_data, dst_step, width, height, + IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) + return; + } + } +#endif +#endif + + CV_CPU_DISPATCH(cvtBGRtoBGR, (src_data, src_step, dst_data, dst_step, width, height, depth, scn, dcn, swapBlue), + CV_CPU_DISPATCH_MODES_ALL); +} + +// only 8u +void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int greenBits) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoBGR5x5, cv_hal_cvtBGRtoBGR5x5, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, greenBits); + + CV_CPU_DISPATCH(cvtBGRtoBGR5x5, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, greenBits), + CV_CPU_DISPATCH_MODES_ALL); +} + +// only 8u +void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int greenBits) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGR5x5toBGR, cv_hal_cvtBGR5x5toBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, greenBits); + + CV_CPU_DISPATCH(cvtBGR5x5toBGR, (src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, greenBits), + CV_CPU_DISPATCH_MODES_ALL); +} + +// 8u, 16u, 32f +void cvtBGRtoGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoGray, cv_hal_cvtBGRtoGray, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue); + +#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 + CV_IPP_CHECK() + { + if(depth == CV_32F && scn == 3 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) + return; + } + else if(depth == CV_32F && scn == 3 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) + return; + } + else if(depth == CV_32F && scn == 4 && !swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) ) + return; + } + else if(depth == CV_32F && scn == 4 && swapBlue) + { + if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) ) + return; + } + } +#endif + + CV_CPU_DISPATCH(cvtBGRtoGray, (src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue), + CV_CPU_DISPATCH_MODES_ALL); +} + +// 8u, 16u, 32f +void cvtGraytoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtGraytoBGR, cv_hal_cvtGraytoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn); + +#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 + CV_IPP_CHECK() + { + bool ippres = false; + if(dcn == 3) + { + if( depth == CV_8U ) + { +#if !IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); +#endif + } + else if( depth == CV_16U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); + else + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); + } + else if(dcn == 4) + { + if( depth == CV_8U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); + else if( depth == CV_16U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); + else + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); + } + if(ippres) + return; + } +#endif + + CV_CPU_DISPATCH(cvtGraytoBGR, (src_data, src_step, dst_data, dst_step, width, height, depth, dcn), + CV_CPU_DISPATCH_MODES_ALL); +} + +// only 8u +void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGR5x5toGray, cv_hal_cvtBGR5x5toGray, src_data, src_step, dst_data, dst_step, width, height, greenBits); + + CV_CPU_DISPATCH(cvtBGR5x5toGray, (src_data, src_step, dst_data, dst_step, width, height, greenBits), + CV_CPU_DISPATCH_MODES_ALL); +} + +// only 8u +void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtGraytoBGR5x5, cv_hal_cvtGraytoBGR5x5, src_data, src_step, dst_data, dst_step, width, height, greenBits); + + CV_CPU_DISPATCH(cvtGraytoBGR5x5, (src_data, src_step, dst_data, dst_step, width, height, greenBits), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtRGBAtoMultipliedRGBA, cv_hal_cvtRGBAtoMultipliedRGBA, src_data, src_step, dst_data, dst_step, width, height); + +#ifdef HAVE_IPP + CV_IPP_CHECK() + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R))) + return; + } +#endif + + CV_CPU_DISPATCH(cvtRGBAtoMultipliedRGBA, (src_data, src_step, dst_data, dst_step, width, height), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtMultipliedRGBAtoRGBA, cv_hal_cvtMultipliedRGBAtoRGBA, src_data, src_step, dst_data, dst_step, width, height); + + CV_CPU_DISPATCH(cvtMultipliedRGBAtoRGBA, (src_data, src_step, dst_data, dst_step, width, height), + CV_CPU_DISPATCH_MODES_ALL); +} + +} // namespace hal + +// +// OCL calls +// + +#ifdef HAVE_OPENCL + +bool oclCvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool reverse ) +{ + OclHelper< Set<3, 4>, Set<3, 4>, Set > h(_src, _dst, dcn); + + if(!h.createKernel("RGB", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=%d -D bidx=0 -D %s", dcn, reverse ? "REVERSE" : "ORDER"))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR25x5( InputArray _src, OutputArray _dst, int bidx, int gbits ) +{ + OclHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, 2); + + if(!h.createKernel("RGB2RGB5x5", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, gbits))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int gbits) +{ + OclHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); + + if(!h.createKernel("RGB5x52RGB", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, gbits))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits) +{ + OclHelper< Set<2>, Set<1>, Set > h(_src, _dst, 1); + + if(!h.createKernel("BGR5x52Gray", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=1 -D bidx=0 -D greenbits=%d", gbits))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits) +{ + OclHelper< Set<1>, Set<2>, Set > h(_src, _dst, 2); + + if(!h.createKernel("Gray2BGR5x5", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=2 -D bidx=0 -D greenbits=%d", gbits))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2Gray( InputArray _src, OutputArray _dst, int bidx) +{ + OclHelper< Set<3, 4>, Set<1>, Set > h(_src, _dst, 1); + + int stripeSize = 1; + if(!h.createKernel("RGB2Gray", ocl::imgproc::color_rgb_oclsrc, + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d", bidx, stripeSize))) + { + return false; + } + + h.globalSize[0] = (h.src.cols + stripeSize - 1)/stripeSize; + return h.run(); +} + +bool oclCvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn) +{ + OclHelper< Set<1>, Set<3, 4>, Set > h(_src, _dst, dcn); + if(!h.createKernel("Gray2RGB", ocl::imgproc::color_rgb_oclsrc, + format("-D bidx=0 -D dcn=%d", dcn))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst) +{ + OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); + + if(!h.createKernel("RGBA2mRGBA", ocl::imgproc::color_rgb_oclsrc, + "-D dcn=4 -D bidx=3")) + { + return false; + } + + return h.run(); +} + +bool oclCvtColormRGBA2RGBA( InputArray _src, OutputArray _dst) +{ + OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); + + if(!h.createKernel("mRGBA2RGBA", ocl::imgproc::color_rgb_oclsrc, + "-D dcn=4 -D bidx=3")) + { + return false; + } + + return h.run(); +} + +#endif + +// +// HAL calls +// + +void cvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb) +{ + CvtHelper< Set<3, 4>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtBGRtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, h.scn, dcn, swapb); +} + +void cvtColorBGR25x5( InputArray _src, OutputArray _dst, bool swapb, int gbits) +{ + CvtHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, 2); + + hal::cvtBGRtoBGR5x5(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.scn, swapb, gbits); +} + +void cvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int gbits) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtBGR5x5toBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + dcn, swapb, gbits); +} + +void cvtColorBGR2Gray( InputArray _src, OutputArray _dst, bool swapb) +{ + CvtHelper< Set<3, 4>, Set<1>, Set > h(_src, _dst, 1); + + hal::cvtBGRtoGray(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, h.scn, swapb); +} + +void cvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<1>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtGraytoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, h.depth, dcn); +} + +void cvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits) +{ + CvtHelper< Set<2>, Set<1>, Set > h(_src, _dst, 1); + + hal::cvtBGR5x5toGray(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, gbits); +} + +void cvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits) +{ + CvtHelper< Set<1>, Set<2>, Set > h(_src, _dst, 2); + + hal::cvtGraytoBGR5x5(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, gbits); +} + +void cvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst) +{ + CvtHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); + + hal::cvtRGBAtoMultipliedRGBA(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows); +} + +void cvtColormRGBA2RGBA( InputArray _src, OutputArray _dst) +{ + CvtHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); + + hal::cvtMultipliedRGBAtoRGBA(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows); +} + +} // namespace cv diff --git a/modules/imgproc/src/color_rgb.cpp b/modules/imgproc/src/color_rgb.simd.hpp similarity index 65% rename from modules/imgproc/src/color_rgb.cpp rename to modules/imgproc/src/color_rgb.simd.hpp index 41528d347b..6e11020197 100644 --- a/modules/imgproc/src/color_rgb.cpp +++ b/modules/imgproc/src/color_rgb.simd.hpp @@ -3,13 +3,58 @@ // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" -#include "color.hpp" +#include "opencv2/core/hal/intrin.hpp" -#define IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 1 +namespace cv { +namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations -namespace cv -{ +void cvtBGRtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, int dcn, bool swapBlue); +void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int greenBits); +void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int greenBits); +void cvtBGRtoGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); +void cvtGraytoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn); +void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); +void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); +void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); +void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#if defined(CV_CPU_BASELINE_MODE) +// included in color.hpp +#else +#include "color.simd_helpers.hpp" +#endif + +namespace { ////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// template struct v_type; @@ -1041,172 +1086,7 @@ struct mRGBA2RGBA } } }; - -// -// IPP functions -// - -#if NEED_IPP - -static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = -{ - (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, - 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 -}; - -static ippiColor2GrayFunc ippiColor2GrayC4Tab[] = -{ - (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0, - 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0 -}; - -static ippiGeneralFunc ippiRGB2GrayC3Tab[] = -{ - (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0, - 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0 -}; - -static ippiGeneralFunc ippiRGB2GrayC4Tab[] = -{ - (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0, - 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 -}; - - -#if !IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 -static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); -} -#endif -static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); -} -static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); -} - -static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); -} -static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); -} -static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval) -{ - return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); -} - -struct IPPColor2GrayFunctor -{ - IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : - ippiColorToGray(_func) - { - coeffs[0] = B2YF; - coeffs[1] = G2YF; - coeffs[2] = R2YF; - } - bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const - { - return ippiColorToGray ? CV_INSTRUMENT_FUN_IPP(ippiColorToGray, src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false; - } -private: - ippiColor2GrayFunc ippiColorToGray; - Ipp32f coeffs[3]; -}; - -template -struct IPPGray2BGRFunctor -{ - IPPGray2BGRFunctor(){} - - bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const - { - return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0; - } -}; - -template -struct IPPGray2BGRAFunctor -{ - IPPGray2BGRAFunctor() - { - alpha = ColorChannel::max(); - } - - bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const - { - return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0; - } - - T alpha; -}; - -static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, - IppiSize roiSize, const int *dstOrder) -{ - return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_8u_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u); -} - -static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, - IppiSize roiSize, const int *dstOrder) -{ - return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_16u_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u); -} - -static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, - IppiSize roiSize, const int *dstOrder) -{ - return CV_INSTRUMENT_FUN_IPP(ippiSwapChannels_32f_C3C4R, pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f); -} - -// shared -ippiReorderFunc ippiSwapChannelsC3C4RTab[] = -{ - (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0, - 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0 -}; - -static ippiGeneralFunc ippiCopyAC4C3RTab[] = -{ - (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0, - 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0 -}; - -// shared -ippiReorderFunc ippiSwapChannelsC4C3RTab[] = -{ - (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0, - 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0 -}; - -// shared -ippiReorderFunc ippiSwapChannelsC3RTab[] = -{ - (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0, - 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 -}; - -#if IPP_VERSION_X100 >= 810 -static ippiReorderFunc ippiSwapChannelsC4RTab[] = -{ - (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, - 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0 -}; -#endif - -#endif - -// -// HAL functions -// - -namespace hal -{ +} // namespace anon // 8u, 16u, 32f void cvtBGRtoBGR(const uchar * src_data, size_t src_step, @@ -1216,52 +1096,6 @@ void cvtBGRtoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoBGR, cv_hal_cvtBGRtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, scn, dcn, swapBlue); - -#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 - CV_IPP_CHECK() - { - if(scn == 3 && dcn == 4 && !swapBlue) - { - if ( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) - return; - } - else if(scn == 4 && dcn == 3 && !swapBlue) - { - if ( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) ) - return; - } - else if(scn == 3 && dcn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) ) - return; - } - else if(scn == 4 && dcn == 3 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) ) - return; - } - else if(scn == 3 && dcn == 3 && swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, scn), dst_data, dst_step, width, height, - IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) - return; - } -#if IPP_VERSION_X100 >= 810 - else if(scn == 4 && dcn == 4 && swapBlue) - { - if( CvtColorIPPLoopCopy(src_data, src_step, CV_MAKETYPE(depth, scn), dst_data, dst_step, width, height, - IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) - return; - } - } -#endif -#endif - int blueIdx = swapBlue ? 2 : 0; if( depth == CV_8U ) CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2RGB(scn, dcn, blueIdx)); @@ -1279,8 +1113,6 @@ void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoBGR5x5, cv_hal_cvtBGRtoBGR5x5, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, greenBits); - CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2RGB5x5(scn, swapBlue ? 2 : 0, greenBits)); } @@ -1292,8 +1124,6 @@ void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGR5x5toBGR, cv_hal_cvtBGR5x5toBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, greenBits); - CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB5x52RGB(dcn, swapBlue ? 2 : 0, greenBits)); } @@ -1305,38 +1135,6 @@ void cvtBGRtoGray(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoGray, cv_hal_cvtBGRtoGray, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue); - -#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 - CV_IPP_CHECK() - { - if(depth == CV_32F && scn == 3 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) - return; - } - else if(depth == CV_32F && scn == 3 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) - return; - } - else if(depth == CV_32F && scn == 4 && !swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) ) - return; - } - else if(depth == CV_32F && scn == 4 && swapBlue) - { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) ) - return; - } - } -#endif - int blueIdx = swapBlue ? 2 : 0; if( depth == CV_8U ) CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2Gray(scn, blueIdx, 0)); @@ -1354,39 +1152,6 @@ void cvtGraytoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtGraytoBGR, cv_hal_cvtGraytoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn); - -#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 - CV_IPP_CHECK() - { - bool ippres = false; - if(dcn == 3) - { - if( depth == CV_8U ) - { -#if !IPP_DISABLE_CVTCOLOR_GRAY2BGR_8UC3 - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); -#endif - } - else if( depth == CV_16U ) - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); - else - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); - } - else if(dcn == 4) - { - if( depth == CV_8U ) - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); - else if( depth == CV_16U ) - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); - else - ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); - } - if(ippres) - return; - } -#endif - if( depth == CV_8U ) CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, Gray2RGB(dcn)); else if( depth == CV_16U ) @@ -1403,7 +1168,6 @@ void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGR5x5toGray, cv_hal_cvtBGR5x5toGray, src_data, src_step, dst_data, dst_step, width, height, greenBits); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB5x52Gray(greenBits)); } @@ -1415,7 +1179,6 @@ void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtGraytoBGR5x5, cv_hal_cvtGraytoBGR5x5, src_data, src_step, dst_data, dst_step, width, height, greenBits); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, Gray2RGB5x5(greenBits)); } @@ -1425,17 +1188,6 @@ void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtRGBAtoMultipliedRGBA, cv_hal_cvtRGBAtoMultipliedRGBA, src_data, src_step, dst_data, dst_step, width, height); - -#ifdef HAVE_IPP - CV_IPP_CHECK() - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R))) - return; - } -#endif - CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGBA2mRGBA()); } @@ -1445,209 +1197,9 @@ void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtMultipliedRGBAtoRGBA, cv_hal_cvtMultipliedRGBAtoRGBA, src_data, src_step, dst_data, dst_step, width, height); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, mRGBA2RGBA()); } -} // namespace hal - -// -// OCL calls -// - -#ifdef HAVE_OPENCL - -bool oclCvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool reverse ) -{ - OclHelper< Set<3, 4>, Set<3, 4>, Set > h(_src, _dst, dcn); - - if(!h.createKernel("RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=%d -D bidx=0 -D %s", dcn, reverse ? "REVERSE" : "ORDER"))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR25x5( InputArray _src, OutputArray _dst, int bidx, int gbits ) -{ - OclHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, 2); - - if(!h.createKernel("RGB2RGB5x5", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, gbits))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int gbits) -{ - OclHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); - - if(!h.createKernel("RGB5x52RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, gbits))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits) -{ - OclHelper< Set<2>, Set<1>, Set > h(_src, _dst, 1); - - if(!h.createKernel("BGR5x52Gray", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=1 -D bidx=0 -D greenbits=%d", gbits))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits) -{ - OclHelper< Set<1>, Set<2>, Set > h(_src, _dst, 2); - - if(!h.createKernel("Gray2BGR5x5", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=2 -D bidx=0 -D greenbits=%d", gbits))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2Gray( InputArray _src, OutputArray _dst, int bidx) -{ - OclHelper< Set<3, 4>, Set<1>, Set > h(_src, _dst, 1); - - int stripeSize = 1; - if(!h.createKernel("RGB2Gray", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d", bidx, stripeSize))) - { - return false; - } - - h.globalSize[0] = (h.src.cols + stripeSize - 1)/stripeSize; - return h.run(); -} - -bool oclCvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn) -{ - OclHelper< Set<1>, Set<3, 4>, Set > h(_src, _dst, dcn); - if(!h.createKernel("Gray2RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D bidx=0 -D dcn=%d", dcn))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst) -{ - OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); - - if(!h.createKernel("RGBA2mRGBA", ocl::imgproc::color_rgb_oclsrc, - "-D dcn=4 -D bidx=3")) - { - return false; - } - - return h.run(); -} - -bool oclCvtColormRGBA2RGBA( InputArray _src, OutputArray _dst) -{ - OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); - - if(!h.createKernel("mRGBA2RGBA", ocl::imgproc::color_rgb_oclsrc, - "-D dcn=4 -D bidx=3")) - { - return false; - } - - return h.run(); -} - #endif - -// -// HAL calls -// - -void cvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb) -{ - CvtHelper< Set<3, 4>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtBGRtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, dcn, swapb); -} - -void cvtColorBGR25x5( InputArray _src, OutputArray _dst, bool swapb, int gbits) -{ - CvtHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, 2); - - hal::cvtBGRtoBGR5x5(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.scn, swapb, gbits); -} - -void cvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int gbits) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtBGR5x5toBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - dcn, swapb, gbits); -} - -void cvtColorBGR2Gray( InputArray _src, OutputArray _dst, bool swapb) -{ - CvtHelper< Set<3, 4>, Set<1>, Set > h(_src, _dst, 1); - - hal::cvtBGRtoGray(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, swapb); -} - -void cvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<1>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtGraytoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, h.depth, dcn); -} - -void cvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits) -{ - CvtHelper< Set<2>, Set<1>, Set > h(_src, _dst, 1); - - hal::cvtBGR5x5toGray(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, gbits); -} - -void cvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits) -{ - CvtHelper< Set<1>, Set<2>, Set > h(_src, _dst, 2); - - hal::cvtGraytoBGR5x5(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, gbits); -} - -void cvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst) -{ - CvtHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); - - hal::cvtRGBAtoMultipliedRGBA(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows); -} - -void cvtColormRGBA2RGBA( InputArray _src, OutputArray _dst) -{ - CvtHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); - - hal::cvtMultipliedRGBAtoRGBA(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows); -} - -} // namespace cv +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace diff --git a/modules/imgproc/src/color_yuv.dispatch.cpp b/modules/imgproc/src/color_yuv.dispatch.cpp new file mode 100644 index 0000000000..6cb508f980 --- /dev/null +++ b/modules/imgproc/src/color_yuv.dispatch.cpp @@ -0,0 +1,417 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "color.hpp" + +#include "color_yuv.simd.hpp" +#include "color_yuv.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +// +// HAL functions +// +namespace hal { + +// 8u, 16u, 32f +void cvtBGRtoYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isCbCr) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr); + +#if defined(HAVE_IPP) +#if !IPP_DISABLE_RGB_YUV + CV_IPP_CHECK() + { + if (scn == 3 && depth == CV_8U && swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R))) + return; + } + else if (scn == 3 && depth == CV_8U && !swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], + (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) + return; + } + else if (scn == 4 && depth == CV_8U && swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], + (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth))) + return; + } + else if (scn == 4 && depth == CV_8U && !swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], + (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) + return; + } + } +#endif +#endif + + CV_CPU_DISPATCH(cvtBGRtoYUV, (src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isCbCr) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr); + + +#if defined(HAVE_IPP) +#if !IPP_DISABLE_YUV_RGB + CV_IPP_CHECK() + { + if (dcn == 3 && depth == CV_8U && swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R))) + return; + } + else if (dcn == 3 && depth == CV_8U && !swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, + ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) + return; + } + else if (dcn == 4 && depth == CV_8U && swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, + ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) + return; + } + else if (dcn == 4 && depth == CV_8U && !swapBlue && !isCbCr) + { + if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, + IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, + ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) + return; + } + } +#endif +#endif + + CV_CPU_DISPATCH(cvtYUVtoBGR, (src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + + CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + + CV_CPU_DISPATCH(cvtThreePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx); + + CV_CPU_DISPATCH(cvtBGRtoThreePlaneYUV, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, uchar * uv_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + // TODO: add hal replacement method + + CV_CPU_DISPATCH(cvtBGRtoTwoPlaneYUV, (src_data, src_step, y_data, uv_data, dst_step, width, height, scn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); +} + +void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int uIdx, int ycn) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn); + + CV_CPU_DISPATCH(cvtOnePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn), + CV_CPU_DISPATCH_MODES_ALL); +} + +} // namespace hal + +// +// OCL calls +// + +#ifdef HAVE_OPENCL + +bool oclCvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx ) +{ + OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + if(!h.createKernel("YUV2RGB", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=%d -D bidx=%d", dcn, bidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2YUV( InputArray _src, OutputArray _dst, int bidx ) +{ + OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + if(!h.createKernel("RGB2YUV", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=3 -D bidx=%d", bidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtcolorYCrCb2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx) +{ + OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + if(!h.createKernel("YCrCb2RGB", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=%d -D bidx=%d", dcn, bidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2YCrCb( InputArray _src, OutputArray _dst, int bidx) +{ + OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + if(!h.createKernel("RGB2YCrCb", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=3 -D bidx=%d", bidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx, int yidx ) +{ + OclHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); + + bool optimized = _src.offset() % 4 == 0 && _src.step() % 4 == 0; + if(!h.createKernel("YUV2RGB_422", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, + optimized ? " -D USE_OPTIMIZED_LOAD" : ""))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) +{ + OclHelper< Set<1>, Set<1>, Set, FROM_YUV> h(_src, _dst, 1); + + h.src.rowRange(0, _dst.rows()).copyTo(_dst); + return true; +} + +bool oclCvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx ) +{ + OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); + + if(!h.createKernel("YUV2RGB_NVx", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx ) +{ + OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); + + if(!h.createKernel("YUV2RGB_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, + _src.isContinuous() ? " -D SRC_CONT" : ""))) + { + return false; + } + + return h.run(); +} + +bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, int uidx ) +{ + OclHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); + + if(!h.createKernel("RGB2YUV_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, + format("-D dcn=1 -D bidx=%d -D uidx=%d", bidx, uidx))) + { + return false; + } + + return h.run(); +} + +#endif + +// +// HAL calls +// + +void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, bool swapb, bool crcb) +{ + CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); + + hal::cvtBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, h.scn, swapb, crcb); +} + +void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.depth, dcn, swapb, crcb); +} + +void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn) +{ + CvtHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); + + hal::cvtOnePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + dcn, swapb, uidx, ycn); +} + +void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi ) +{ + CV_Assert( _src.channels() == 2 && _src.depth() == CV_8U ); + + extractChannel(_src, _dst, coi); +} + +void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx) +{ + CvtHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); + + hal::cvtBGRtoThreePlaneYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, + h.scn, swapb, uidx); +} + +void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) +{ + CvtHelper< Set<1>, Set<1>, Set, FROM_YUV > h(_src, _dst, 1); + +#ifdef HAVE_IPP +#if IPP_VERSION_X100 >= 201700 + if (CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1R_L, h.src.data, (IppSizeL)h.src.step, h.dst.data, (IppSizeL)h.dst.step, + ippiSizeL(h.dstSz.width, h.dstSz.height)) >= 0) + return; +#endif +#endif + h.src(Range(0, h.dstSz.height), Range::all()).copyTo(h.dst); +} + +void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); + + hal::cvtThreePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, + dcn, swapb, uidx); +} + +// http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples +// http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples + +void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx ) +{ + if(dcn <= 0) dcn = 3; + CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); + + hal::cvtTwoPlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, + dcn, swapb, uidx); +} + +void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx ) +{ + int stype = _ysrc.type(); + int depth = CV_MAT_DEPTH(stype); + Size ysz = _ysrc.size(), uvs = _uvsrc.size(); + CV_Assert( dcn == 3 || dcn == 4 ); + CV_Assert( depth == CV_8U ); + CV_Assert( ysz.width == uvs.width * 2 && ysz.height == uvs.height * 2 ); + + Mat ysrc = _ysrc.getMat(), uvsrc = _uvsrc.getMat(); + + _dst.create( ysz, CV_MAKETYPE(depth, dcn)); + Mat dst = _dst.getMat(); + + hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step, + dst.data, dst.step, dst.cols, dst.rows, + dcn, swapb, uidx); +} + +} // namespace cv diff --git a/modules/imgproc/src/color_yuv.cpp b/modules/imgproc/src/color_yuv.simd.hpp similarity index 85% rename from modules/imgproc/src/color_yuv.cpp rename to modules/imgproc/src/color_yuv.simd.hpp index 7d731378e2..8bbd78b244 100644 --- a/modules/imgproc/src/color_yuv.cpp +++ b/modules/imgproc/src/color_yuv.simd.hpp @@ -3,11 +3,54 @@ // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" -#include "color.hpp" +#include "opencv2/core/hal/intrin.hpp" -namespace cv -{ +namespace cv { +namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +void cvtBGRtoYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isCbCr); +void cvtYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isCbCr); +void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); +void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); +void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); +void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); +void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, uchar * uv_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); +void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int uIdx, int ycn); +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#if defined(CV_CPU_BASELINE_MODE) +// included in color.hpp +#else +#include "color.simd_helpers.hpp" +#endif + +namespace { //constants for conversion from/to RGB and YUV, YCrCb according to BT.601 //to YCbCr @@ -1738,12 +1781,8 @@ inline void cvtYUV422toRGB(uchar * dst_data, size_t dst_step, const uchar * src_ converter(Range(0, height)); } -// -// HAL functions -// +} // namespace anon -namespace hal -{ // 8u, 16u, 32f void cvtBGRtoYUV(const uchar * src_data, size_t src_step, @@ -1753,43 +1792,6 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr); - -#if defined(HAVE_IPP) -#if !IPP_DISABLE_RGB_YUV - CV_IPP_CHECK() - { - if (scn == 3 && depth == CV_8U && swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R))) - return; - } - else if (scn == 3 && depth == CV_8U && !swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], - (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) - return; - } - else if (scn == 4 && depth == CV_8U && swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], - (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth))) - return; - } - else if (scn == 4 && depth == CV_8U && !swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], - (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) - return; - } - } -#endif -#endif - int blueIdx = swapBlue ? 2 : 0; if( depth == CV_8U ) CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i(scn, blueIdx, isCbCr)); @@ -1806,44 +1808,6 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr); - - -#if defined(HAVE_IPP) -#if !IPP_DISABLE_YUV_RGB - CV_IPP_CHECK() - { - if (dcn == 3 && depth == CV_8U && swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R))) - return; - } - else if (dcn == 3 && depth == CV_8U && !swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, - ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) - return; - } - else if (dcn == 4 && depth == CV_8U && swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, - ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) - return; - } - else if (dcn == 4 && depth == CV_8U && !swapBlue && !isCbCr) - { - if (CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, - ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) - return; - } - } -#endif -#endif - int blueIdx = swapBlue ? 2 : 0; if( depth == CV_8U ) CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i(dcn, blueIdx, isCbCr)); @@ -1860,7 +1824,6 @@ void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); const uchar* uv = src_data + src_step * static_cast(dst_height); cvtTwoPlaneYUVtoBGR(src_data, uv, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); } @@ -1880,8 +1843,6 @@ void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src { CV_INSTRUMENT_REGION(); - // TODO: add hal replacement method - int blueIdx = swapBlue ? 2 : 0; cvt_2plane_yuv_ptr_t cvtPtr; @@ -1919,7 +1880,6 @@ void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); const uchar* u = src_data + src_step * static_cast(dst_height); const uchar* v = src_data + src_step * static_cast(dst_height + dst_height/4) + (dst_width/2) * ((dst_height % 4)/2); @@ -1949,7 +1909,6 @@ void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx); uchar * uv_data = dst_data + dst_step * height; RGB8toYUV420pInvoker cvt(src_data, src_step, dst_data, uv_data, dst_step, width, height, @@ -1968,8 +1927,6 @@ void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - // TODO: add hal replacement method - RGB8toYUV420pInvoker cvt(src_data, src_step, y_data, uv_data, dst_step, width, height, scn, swapBlue, uIdx == 2, true); @@ -1993,8 +1950,6 @@ void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn); - cvt_1plane_yuv_ptr_t cvtPtr; int blueIdx = swapBlue ? 2 : 0; switch(dcn*1000 + blueIdx*100 + uIdx*10 + ycn) @@ -2017,227 +1972,6 @@ void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, cvtPtr(dst_data, dst_step, src_data, src_step, width, height); } -} // namespace hal - -// -// OCL calls -// - -#ifdef HAVE_OPENCL - -bool oclCvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx ) -{ - OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - if(!h.createKernel("YUV2RGB", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d", dcn, bidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2YUV( InputArray _src, OutputArray _dst, int bidx ) -{ - OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - if(!h.createKernel("RGB2YUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=3 -D bidx=%d", bidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtcolorYCrCb2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx) -{ - OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - if(!h.createKernel("YCrCb2RGB", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d", dcn, bidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2YCrCb( InputArray _src, OutputArray _dst, int bidx) -{ - OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - if(!h.createKernel("RGB2YCrCb", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=3 -D bidx=%d", bidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx, int yidx ) -{ - OclHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); - - bool optimized = _src.offset() % 4 == 0 && _src.step() % 4 == 0; - if(!h.createKernel("YUV2RGB_422", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, - optimized ? " -D USE_OPTIMIZED_LOAD" : ""))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) -{ - OclHelper< Set<1>, Set<1>, Set, FROM_YUV> h(_src, _dst, 1); - - h.src.rowRange(0, _dst.rows()).copyTo(_dst); - return true; -} - -bool oclCvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx ) -{ - OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); - - if(!h.createKernel("YUV2RGB_NVx", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx ) -{ - OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); - - if(!h.createKernel("YUV2RGB_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, - _src.isContinuous() ? " -D SRC_CONT" : ""))) - { - return false; - } - - return h.run(); -} - -bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, int uidx ) -{ - OclHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); - - if(!h.createKernel("RGB2YUV_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=1 -D bidx=%d -D uidx=%d", bidx, uidx))) - { - return false; - } - - return h.run(); -} - #endif - -// -// HAL calls -// - -void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, bool swapb, bool crcb) -{ - CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); - - hal::cvtBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, swapb, crcb); -} - -void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, dcn, swapb, crcb); -} - -void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn) -{ - CvtHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); - - hal::cvtOnePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - dcn, swapb, uidx, ycn); -} - -void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi ) -{ - CV_Assert( _src.channels() == 2 && _src.depth() == CV_8U ); - - extractChannel(_src, _dst, coi); -} - -void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx) -{ - CvtHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); - - hal::cvtBGRtoThreePlaneYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.scn, swapb, uidx); -} - -void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) -{ - CvtHelper< Set<1>, Set<1>, Set, FROM_YUV > h(_src, _dst, 1); - -#ifdef HAVE_IPP -#if IPP_VERSION_X100 >= 201700 - if (CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1R_L, h.src.data, (IppSizeL)h.src.step, h.dst.data, (IppSizeL)h.dst.step, - ippiSizeL(h.dstSz.width, h.dstSz.height)) >= 0) - return; -#endif -#endif - h.src(Range(0, h.dstSz.height), Range::all()).copyTo(h.dst); -} - -void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); - - hal::cvtThreePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, - dcn, swapb, uidx); -} - -// http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples -// http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples - -void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx ) -{ - if(dcn <= 0) dcn = 3; - CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); - - hal::cvtTwoPlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, - dcn, swapb, uidx); -} - -void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx ) -{ - int stype = _ysrc.type(); - int depth = CV_MAT_DEPTH(stype); - Size ysz = _ysrc.size(), uvs = _uvsrc.size(); - CV_Assert( dcn == 3 || dcn == 4 ); - CV_Assert( depth == CV_8U ); - CV_Assert( ysz.width == uvs.width * 2 && ysz.height == uvs.height * 2 ); - - Mat ysrc = _ysrc.getMat(), uvsrc = _uvsrc.getMat(); - - _dst.create( ysz, CV_MAKETYPE(depth, dcn)); - Mat dst = _dst.getMat(); - - hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step, - dst.data, dst.step, dst.cols, dst.rows, - dcn, swapb, uidx); -} - -} // namespace cv +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace diff --git a/modules/imgproc/src/filter.avx2.cpp b/modules/imgproc/src/filter.avx2.cpp deleted file mode 100644 index e9ced20e36..0000000000 --- a/modules/imgproc/src/filter.avx2.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "filter.hpp" - -namespace cv -{ - -int RowVec_32f_AVX(const float* src0, const float* _kx, float* dst, int width, int cn, int _ksize) -{ - int i = 0, k; - for (; i <= width - 8; i += 8) - { - const float* src = src0 + i; - __m256 f, x0; - __m256 s0 = _mm256_set1_ps(0.0f); - for (k = 0; k < _ksize; k++, src += cn) - { - f = _mm256_set1_ps(_kx[k]); - x0 = _mm256_loadu_ps(src); -#if CV_FMA3 - s0 = _mm256_fmadd_ps(x0, f, s0); -#else - s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); -#endif - } - _mm256_storeu_ps(dst + i, s0); - } - _mm256_zeroupper(); - return i; -} - -int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2) -{ - int i = 0, k; - const float *S, *S2; - const __m128 d4 = _mm_set1_ps(delta); - const __m256 d8 = _mm256_set1_ps(delta); - - for( ; i <= width - 16; i += 16 ) - { - __m256 f = _mm256_set1_ps(ky[0]); - __m256 s0, s1; - __m256 x0; - S = src[0] + i; - s0 = _mm256_loadu_ps(S); -#if CV_FMA3 - s0 = _mm256_fmadd_ps(s0, f, d8); -#else - s0 = _mm256_add_ps(_mm256_mul_ps(s0, f), d8); -#endif - s1 = _mm256_loadu_ps(S+8); -#if CV_FMA3 - s1 = _mm256_fmadd_ps(s1, f, d8); -#else - s1 = _mm256_add_ps(_mm256_mul_ps(s1, f), d8); -#endif - - for( k = 1; k <= ksize2; k++ ) - { - S = src[k] + i; - S2 = src[-k] + i; - f = _mm256_set1_ps(ky[k]); - x0 = _mm256_add_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2)); -#if CV_FMA3 - s0 = _mm256_fmadd_ps(x0, f, s0); -#else - s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); -#endif - x0 = _mm256_add_ps(_mm256_loadu_ps(S+8), _mm256_loadu_ps(S2+8)); -#if CV_FMA3 - s1 = _mm256_fmadd_ps(x0, f, s1); -#else - s1 = _mm256_add_ps(s1, _mm256_mul_ps(x0, f)); -#endif - } - - _mm256_storeu_ps(dst + i, s0); - _mm256_storeu_ps(dst + i + 8, s1); - } - - for( ; i <= width - 4; i += 4 ) - { - __m128 f = _mm_set1_ps(ky[0]); - __m128 x0, s0 = _mm_load_ps(src[0] + i); - s0 = _mm_add_ps(_mm_mul_ps(s0, f), d4); - - for( k = 1; k <= ksize2; k++ ) - { - f = _mm_set1_ps(ky[k]); - x0 = _mm_add_ps(_mm_load_ps(src[k]+i), _mm_load_ps(src[-k] + i)); - s0 = _mm_add_ps(s0, _mm_mul_ps(x0, f)); - } - - _mm_storeu_ps(dst + i, s0); - } - - _mm256_zeroupper(); - return i; -} - -int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2) -{ - int i = 0, k; - const float *S2; - const __m128 d4 = _mm_set1_ps(delta); - const __m256 d8 = _mm256_set1_ps(delta); - - for (; i <= width - 16; i += 16) - { - __m256 f, s0 = d8, s1 = d8; - __m256 x0; - - for (k = 1; k <= ksize2; k++) - { - const float *S = src[k] + i; - S2 = src[-k] + i; - f = _mm256_set1_ps(ky[k]); - x0 = _mm256_sub_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2)); -#if CV_FMA3 - s0 = _mm256_fmadd_ps(x0, f, s0); -#else - s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); -#endif - x0 = _mm256_sub_ps(_mm256_loadu_ps(S + 8), _mm256_loadu_ps(S2 + 8)); -#if CV_FMA3 - s1 = _mm256_fmadd_ps(x0, f, s1); -#else - s1 = _mm256_add_ps(s1, _mm256_mul_ps(x0, f)); -#endif - } - - _mm256_storeu_ps(dst + i, s0); - _mm256_storeu_ps(dst + i + 8, s1); - } - - for (; i <= width - 4; i += 4) - { - __m128 f, x0, s0 = d4; - - for (k = 1; k <= ksize2; k++) - { - f = _mm_set1_ps(ky[k]); - x0 = _mm_sub_ps(_mm_load_ps(src[k] + i), _mm_load_ps(src[-k] + i)); - s0 = _mm_add_ps(s0, _mm_mul_ps(x0, f)); - } - - _mm_storeu_ps(dst + i, s0); - } - - _mm256_zeroupper(); - return i; -} - -} - -/* End of file. */ diff --git a/modules/imgproc/src/filter.dispatch.cpp b/modules/imgproc/src/filter.dispatch.cpp new file mode 100644 index 0000000000..b6f5331028 --- /dev/null +++ b/modules/imgproc/src/filter.dispatch.cpp @@ -0,0 +1,1432 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencv2/core/opencl/ocl_defs.hpp" +#include "opencl_kernels_imgproc.hpp" +#include "hal_replacement.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "filter.hpp" + +#include "filter.simd.hpp" +#include "filter.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + + +/****************************************************************************************\ + Base Image Filter +\****************************************************************************************/ + +namespace cv { + +BaseRowFilter::BaseRowFilter() { ksize = anchor = -1; } +BaseRowFilter::~BaseRowFilter() {} + +BaseColumnFilter::BaseColumnFilter() { ksize = anchor = -1; } +BaseColumnFilter::~BaseColumnFilter() {} +void BaseColumnFilter::reset() {} + +BaseFilter::BaseFilter() { ksize = Size(-1,-1); anchor = Point(-1,-1); } +BaseFilter::~BaseFilter() {} +void BaseFilter::reset() {} + +FilterEngine::FilterEngine() + : srcType(-1), dstType(-1), bufType(-1), maxWidth(0), wholeSize(-1, -1), dx1(0), dx2(0), + rowBorderType(BORDER_REPLICATE), columnBorderType(BORDER_REPLICATE), + borderElemSize(0), bufStep(0), startY(0), startY0(0), endY(0), rowCount(0), dstY(0) +{ +} + + +FilterEngine::FilterEngine( const Ptr& _filter2D, + const Ptr& _rowFilter, + const Ptr& _columnFilter, + int _srcType, int _dstType, int _bufType, + int _rowBorderType, int _columnBorderType, + const Scalar& _borderValue ) + : srcType(-1), dstType(-1), bufType(-1), maxWidth(0), wholeSize(-1, -1), dx1(0), dx2(0), + rowBorderType(BORDER_REPLICATE), columnBorderType(BORDER_REPLICATE), + borderElemSize(0), bufStep(0), startY(0), startY0(0), endY(0), rowCount(0), dstY(0) +{ + init(_filter2D, _rowFilter, _columnFilter, _srcType, _dstType, _bufType, + _rowBorderType, _columnBorderType, _borderValue); +} + +FilterEngine::~FilterEngine() +{ +} + + +void FilterEngine::init( const Ptr& _filter2D, + const Ptr& _rowFilter, + const Ptr& _columnFilter, + int _srcType, int _dstType, int _bufType, + int _rowBorderType, int _columnBorderType, + const Scalar& _borderValue ) +{ + _srcType = CV_MAT_TYPE(_srcType); + _bufType = CV_MAT_TYPE(_bufType); + _dstType = CV_MAT_TYPE(_dstType); + + srcType = _srcType; + int srcElemSize = (int)getElemSize(srcType); + dstType = _dstType; + bufType = _bufType; + + filter2D = _filter2D; + rowFilter = _rowFilter; + columnFilter = _columnFilter; + + if( _columnBorderType < 0 ) + _columnBorderType = _rowBorderType; + + rowBorderType = _rowBorderType; + columnBorderType = _columnBorderType; + + CV_Assert( columnBorderType != BORDER_WRAP ); + + if( isSeparable() ) + { + CV_Assert( rowFilter && columnFilter ); + ksize = Size(rowFilter->ksize, columnFilter->ksize); + anchor = Point(rowFilter->anchor, columnFilter->anchor); + } + else + { + CV_Assert( bufType == srcType ); + ksize = filter2D->ksize; + anchor = filter2D->anchor; + } + + CV_Assert( 0 <= anchor.x && anchor.x < ksize.width && + 0 <= anchor.y && anchor.y < ksize.height ); + + borderElemSize = srcElemSize/(CV_MAT_DEPTH(srcType) >= CV_32S ? sizeof(int) : 1); + int borderLength = std::max(ksize.width - 1, 1); + borderTab.resize(borderLength*borderElemSize); + + maxWidth = bufStep = 0; + constBorderRow.clear(); + + if( rowBorderType == BORDER_CONSTANT || columnBorderType == BORDER_CONSTANT ) + { + constBorderValue.resize(srcElemSize*borderLength); + int srcType1 = CV_MAKETYPE(CV_MAT_DEPTH(srcType), MIN(CV_MAT_CN(srcType), 4)); + scalarToRawData(_borderValue, &constBorderValue[0], srcType1, + borderLength*CV_MAT_CN(srcType)); + } + + wholeSize = Size(-1,-1); +} + +#define VEC_ALIGN CV_MALLOC_ALIGN + +int FilterEngine::start(const Size& _wholeSize, const Size& sz, const Point& ofs) +{ + CV_INSTRUMENT_REGION(); + + CV_CPU_DISPATCH(FilterEngine__start, (*this, _wholeSize, sz, ofs), + CV_CPU_DISPATCH_MODES_ALL); +} + + +int FilterEngine::start(const Mat& src, const Size &wsz, const Point &ofs) +{ + start( wsz, src.size(), ofs); + return startY - ofs.y; +} + +int FilterEngine::remainingInputRows() const +{ + return endY - startY - rowCount; +} + +int FilterEngine::remainingOutputRows() const +{ + return roi.height - dstY; +} + +int FilterEngine::proceed(const uchar* src, int srcstep, int count, + uchar* dst, int dststep) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( wholeSize.width > 0 && wholeSize.height > 0 ); + + CV_CPU_DISPATCH(FilterEngine__proceed, (*this, src, srcstep, count, dst, dststep), + CV_CPU_DISPATCH_MODES_ALL); +} + +void FilterEngine::apply(const Mat& src, Mat& dst, const Size& wsz, const Point& ofs) +{ + CV_INSTRUMENT_REGION(); + + CV_CheckTypeEQ(src.type(), srcType, ""); + CV_CheckTypeEQ(dst.type(), dstType, ""); + + CV_CPU_DISPATCH(FilterEngine__apply, (*this, src, dst, wsz, ofs), + CV_CPU_DISPATCH_MODES_ALL); +} + +/****************************************************************************************\ +* Separable linear filter * +\****************************************************************************************/ + +int getKernelType(InputArray filter_kernel, Point anchor) +{ + Mat _kernel = filter_kernel.getMat(); + CV_Assert( _kernel.channels() == 1 ); + int i, sz = _kernel.rows*_kernel.cols; + + Mat kernel; + _kernel.convertTo(kernel, CV_64F); + + const double* coeffs = kernel.ptr(); + double sum = 0; + int type = KERNEL_SMOOTH + KERNEL_INTEGER; + if( (_kernel.rows == 1 || _kernel.cols == 1) && + anchor.x*2 + 1 == _kernel.cols && + anchor.y*2 + 1 == _kernel.rows ) + type |= (KERNEL_SYMMETRICAL + KERNEL_ASYMMETRICAL); + + for( i = 0; i < sz; i++ ) + { + double a = coeffs[i], b = coeffs[sz - i - 1]; + if( a != b ) + type &= ~KERNEL_SYMMETRICAL; + if( a != -b ) + type &= ~KERNEL_ASYMMETRICAL; + if( a < 0 ) + type &= ~KERNEL_SMOOTH; + if( a != saturate_cast(a) ) + type &= ~KERNEL_INTEGER; + sum += a; + } + + if( fabs(sum - 1) > FLT_EPSILON*(fabs(sum) + 1) ) + type &= ~KERNEL_SMOOTH; + return type; +} + + +Ptr getLinearRowFilter( + int srcType, int bufType, + InputArray _kernel, int anchor, + int symmetryType) +{ + CV_INSTRUMENT_REGION(); + + Mat kernelMat = _kernel.getMat(); + CV_CPU_DISPATCH(getLinearRowFilter, (srcType, bufType, kernelMat, anchor, symmetryType), + CV_CPU_DISPATCH_MODES_ALL); +} + + +Ptr getLinearColumnFilter( + int bufType, int dstType, + InputArray kernel, int anchor, + int symmetryType, double delta, + int bits) +{ + CV_INSTRUMENT_REGION(); + + Mat kernelMat = kernel.getMat(); + CV_CPU_DISPATCH(getLinearColumnFilter, (bufType, dstType, kernelMat, anchor, symmetryType, delta, bits), + CV_CPU_DISPATCH_MODES_ALL); +} + + +Ptr createSeparableLinearFilter( + int _srcType, int _dstType, + InputArray __rowKernel, InputArray __columnKernel, + Point _anchor, double _delta, + int _rowBorderType, int _columnBorderType, + const Scalar& _borderValue) +{ + Mat _rowKernel = __rowKernel.getMat(), _columnKernel = __columnKernel.getMat(); + _srcType = CV_MAT_TYPE(_srcType); + _dstType = CV_MAT_TYPE(_dstType); + int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType); + int cn = CV_MAT_CN(_srcType); + CV_Assert( cn == CV_MAT_CN(_dstType) ); + int rsize = _rowKernel.rows + _rowKernel.cols - 1; + int csize = _columnKernel.rows + _columnKernel.cols - 1; + if( _anchor.x < 0 ) + _anchor.x = rsize/2; + if( _anchor.y < 0 ) + _anchor.y = csize/2; + int rtype = getKernelType(_rowKernel, + _rowKernel.rows == 1 ? Point(_anchor.x, 0) : Point(0, _anchor.x)); + int ctype = getKernelType(_columnKernel, + _columnKernel.rows == 1 ? Point(_anchor.y, 0) : Point(0, _anchor.y)); + Mat rowKernel, columnKernel; + + int bdepth = std::max(CV_32F,std::max(sdepth, ddepth)); + int bits = 0; + + if( sdepth == CV_8U && + ((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && + ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && + ddepth == CV_8U) || + ((rtype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) && + (ctype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) && + (rtype & ctype & KERNEL_INTEGER) && + ddepth == CV_16S)) ) + { + bdepth = CV_32S; + bits = ddepth == CV_8U ? 8 : 0; + _rowKernel.convertTo( rowKernel, CV_32S, 1 << bits ); + _columnKernel.convertTo( columnKernel, CV_32S, 1 << bits ); + bits *= 2; + _delta *= (1 << bits); + } + else + { + if( _rowKernel.type() != bdepth ) + _rowKernel.convertTo( rowKernel, bdepth ); + else + rowKernel = _rowKernel; + if( _columnKernel.type() != bdepth ) + _columnKernel.convertTo( columnKernel, bdepth ); + else + columnKernel = _columnKernel; + } + + int _bufType = CV_MAKETYPE(bdepth, cn); + Ptr _rowFilter = getLinearRowFilter( + _srcType, _bufType, rowKernel, _anchor.x, rtype); + Ptr _columnFilter = getLinearColumnFilter( + _bufType, _dstType, columnKernel, _anchor.y, ctype, _delta, bits ); + + return Ptr( new FilterEngine(Ptr(), _rowFilter, _columnFilter, + _srcType, _dstType, _bufType, _rowBorderType, _columnBorderType, _borderValue )); +} + + +/****************************************************************************************\ +* Non-separable linear filter * +\****************************************************************************************/ + +void preprocess2DKernel( const Mat& kernel, std::vector& coords, std::vector& coeffs ) +{ + int i, j, k, nz = countNonZero(kernel), ktype = kernel.type(); + if(nz == 0) + nz = 1; + CV_Assert( ktype == CV_8U || ktype == CV_32S || ktype == CV_32F || ktype == CV_64F ); + coords.resize(nz); + coeffs.resize(nz*getElemSize(ktype)); + uchar* _coeffs = &coeffs[0]; + + for( i = k = 0; i < kernel.rows; i++ ) + { + const uchar* krow = kernel.ptr(i); + for( j = 0; j < kernel.cols; j++ ) + { + if( ktype == CV_8U ) + { + uchar val = krow[j]; + if( val == 0 ) + continue; + coords[k] = Point(j,i); + _coeffs[k++] = val; + } + else if( ktype == CV_32S ) + { + int val = ((const int*)krow)[j]; + if( val == 0 ) + continue; + coords[k] = Point(j,i); + ((int*)_coeffs)[k++] = val; + } + else if( ktype == CV_32F ) + { + float val = ((const float*)krow)[j]; + if( val == 0 ) + continue; + coords[k] = Point(j,i); + ((float*)_coeffs)[k++] = val; + } + else + { + double val = ((const double*)krow)[j]; + if( val == 0 ) + continue; + coords[k] = Point(j,i); + ((double*)_coeffs)[k++] = val; + } + } + } +} + + +template struct Filter2D : public BaseFilter +{ + typedef typename CastOp::type1 KT; + typedef typename CastOp::rtype DT; + + Filter2D( const Mat& _kernel, Point _anchor, + double _delta, const CastOp& _castOp=CastOp(), + const VecOp& _vecOp=VecOp() ) + { + anchor = _anchor; + ksize = _kernel.size(); + delta = saturate_cast(_delta); + castOp0 = _castOp; + vecOp = _vecOp; + CV_Assert( _kernel.type() == DataType::type ); + preprocess2DKernel( _kernel, coords, coeffs ); + ptrs.resize( coords.size() ); + } + + void operator()(const uchar** src, uchar* dst, int dststep, int count, int width, int cn) CV_OVERRIDE + { + KT _delta = delta; + const Point* pt = &coords[0]; + const KT* kf = (const KT*)&coeffs[0]; + const ST** kp = (const ST**)&ptrs[0]; + int i, k, nz = (int)coords.size(); + CastOp castOp = castOp0; + + width *= cn; + for( ; count > 0; count--, dst += dststep, src++ ) + { + DT* D = (DT*)dst; + + for( k = 0; k < nz; k++ ) + kp[k] = (const ST*)src[pt[k].y] + pt[k].x*cn; + + i = vecOp((const uchar**)kp, dst, width); + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) + { + KT s0 = _delta, s1 = _delta, s2 = _delta, s3 = _delta; + + for( k = 0; k < nz; k++ ) + { + const ST* sptr = kp[k] + i; + KT f = kf[k]; + s0 += f*sptr[0]; + s1 += f*sptr[1]; + s2 += f*sptr[2]; + s3 += f*sptr[3]; + } + + D[i] = castOp(s0); D[i+1] = castOp(s1); + D[i+2] = castOp(s2); D[i+3] = castOp(s3); + } + #endif + for( ; i < width; i++ ) + { + KT s0 = _delta; + for( k = 0; k < nz; k++ ) + s0 += kf[k]*kp[k][i]; + D[i] = castOp(s0); + } + } + } + + std::vector coords; + std::vector coeffs; + std::vector ptrs; + KT delta; + CastOp castOp0; + VecOp vecOp; +}; + +#ifdef HAVE_OPENCL + +#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain)) +#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) + +// prepare kernel: transpose and make double rows (+align). Returns size of aligned row +// Samples: +// a b c +// Input: d e f +// g h i +// Output, last two zeros is the alignment: +// a d g a d g 0 0 +// b e h b e h 0 0 +// c f i c f i 0 0 +template +static int _prepareKernelFilter2D(std::vector & data, const Mat & kernel) +{ + Mat _kernel; kernel.convertTo(_kernel, DataDepth::value); + int size_y_aligned = ROUNDUP(kernel.rows * 2, 4); + data.clear(); data.resize(size_y_aligned * kernel.cols, 0); + for (int x = 0; x < kernel.cols; x++) + { + for (int y = 0; y < kernel.rows; y++) + { + data[x * size_y_aligned + y] = _kernel.at(y, x); + data[x * size_y_aligned + y + kernel.rows] = _kernel.at(y, x); + } + } + return size_y_aligned; +} + +static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, + InputArray _kernel, Point anchor, + double delta, int borderType ) +{ + int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + ddepth = ddepth < 0 ? sdepth : ddepth; + int dtype = CV_MAKE_TYPE(ddepth, cn), wdepth = std::max(std::max(sdepth, ddepth), CV_32F), + wtype = CV_MAKE_TYPE(wdepth, cn); + if (cn > 4) + return false; + + Size ksize = _kernel.size(); + if (anchor.x < 0) + anchor.x = ksize.width / 2; + if (anchor.y < 0) + anchor.y = ksize.height / 2; + + bool isolated = (borderType & BORDER_ISOLATED) != 0; + borderType &= ~BORDER_ISOLATED; + const cv::ocl::Device &device = cv::ocl::Device::getDefault(); + bool doubleSupport = device.doubleFPConfig() > 0; + if (wdepth == CV_64F && !doubleSupport) + return false; + + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", + "BORDER_WRAP", "BORDER_REFLECT_101" }; + + cv::Mat kernelMat = _kernel.getMat(); + cv::Size sz = _src.size(), wholeSize; + size_t globalsize[2] = { (size_t)sz.width, (size_t)sz.height }; + size_t localsize_general[2] = {0, 1}; + size_t* localsize = NULL; + + ocl::Kernel k; + UMat src = _src.getUMat(); + if (!isolated) + { + Point ofs; + src.locateROI(wholeSize, ofs); + } + + size_t tryWorkItems = device.maxWorkGroupSize(); + if (device.isIntel() && 128 < tryWorkItems) + tryWorkItems = 128; + char cvt[2][40]; + + // For smaller filter kernels, there is a special kernel that is more + // efficient than the general one. + UMat kernalDataUMat; + if (device.isIntel() && (device.type() & ocl::Device::TYPE_GPU) && + ((ksize.width < 5 && ksize.height < 5) || + (ksize.width == 5 && ksize.height == 5 && cn == 1))) + { + kernelMat = kernelMat.reshape(0, 1); + String kerStr = ocl::kernelToStr(kernelMat, CV_32F); + int h = isolated ? sz.height : wholeSize.height; + int w = isolated ? sz.width : wholeSize.width; + + if (w < ksize.width || h < ksize.height) + return false; + + // Figure out what vector size to use for loading the pixels. + int pxLoadNumPixels = cn != 1 || sz.width % 4 ? 1 : 4; + int pxLoadVecSize = cn * pxLoadNumPixels; + + // Figure out how many pixels per work item to compute in X and Y + // directions. Too many and we run out of registers. + int pxPerWorkItemX = 1; + int pxPerWorkItemY = 1; + if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4) + { + pxPerWorkItemX = sz.width % 8 ? sz.width % 4 ? sz.width % 2 ? 1 : 2 : 4 : 8; + pxPerWorkItemY = sz.height % 2 ? 1 : 2; + } + else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4)) + { + pxPerWorkItemX = sz.width % 2 ? 1 : 2; + pxPerWorkItemY = sz.height % 2 ? 1 : 2; + } + globalsize[0] = sz.width / pxPerWorkItemX; + globalsize[1] = sz.height / pxPerWorkItemY; + + // Need some padding in the private array for pixels + int privDataWidth = ROUNDUP(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels); + + // Make the global size a nice round number so the runtime can pick + // from reasonable choices for the workgroup size + const int wgRound = 256; + globalsize[0] = ROUNDUP(globalsize[0], wgRound); + + char build_options[1024]; + sprintf(build_options, "-D cn=%d " + "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " + "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d " + "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s " + "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d " + "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " + "-D convertToWT=%s -D convertToDstT=%s %s", + cn, anchor.x, anchor.y, ksize.width, ksize.height, + pxLoadVecSize, pxLoadNumPixels, + pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType], + isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", + privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1, + ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), + ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), + ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), kerStr.c_str()); + + if (!k.create("filter2DSmall", cv::ocl::imgproc::filter2DSmall_oclsrc, build_options)) + return false; + } + else + { + localsize = localsize_general; + std::vector kernelMatDataFloat; + int kernel_size_y2_aligned = _prepareKernelFilter2D(kernelMatDataFloat, kernelMat); + String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F); + + for ( ; ; ) + { + size_t BLOCK_SIZE = tryWorkItems; + while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)sz.width * 2) + BLOCK_SIZE /= 2; + + if ((size_t)ksize.width > BLOCK_SIZE) + return false; + + int requiredTop = anchor.y; + int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x; + int requiredBottom = ksize.height - 1 - anchor.y; + int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x; + int h = isolated ? sz.height : wholeSize.height; + int w = isolated ? sz.width : wholeSize.width; + bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight; + + if ((w < ksize.width) || (h < ksize.height)) + return false; + + String opts = format("-D LOCAL_SIZE=%d -D cn=%d " + "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " + "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s " + "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " + "-D convertToWT=%s -D convertToDstT=%s", + (int)BLOCK_SIZE, cn, anchor.x, anchor.y, + ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType], + extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", + isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", + doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(), + ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), + ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), + ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1])); + + localsize[0] = BLOCK_SIZE; + globalsize[0] = DIVUP(sz.width, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE; + globalsize[1] = sz.height; + + if (!k.create("filter2D", cv::ocl::imgproc::filter2D_oclsrc, opts)) + return false; + + size_t kernelWorkGroupSize = k.workGroupSize(); + if (localsize[0] <= kernelWorkGroupSize) + break; + if (BLOCK_SIZE < kernelWorkGroupSize) + return false; + tryWorkItems = kernelWorkGroupSize; + } + } + + _dst.create(sz, dtype); + UMat dst = _dst.getUMat(); + + int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); + int srcOffsetY = (int)(src.offset / src.step); + int srcEndX = (isolated ? (srcOffsetX + sz.width) : wholeSize.width); + int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height); + + k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY, + srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta); + + return k.run(2, globalsize, localsize, false); +} + +const int shift_bits = 8; + +static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor, + int borderType, int ddepth, bool fast8uc1, bool int_arithm) +{ + int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + Size bufSize = buf.size(); + int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); + + if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) + return false; + +#ifdef __ANDROID__ + size_t localsize[2] = {16, 10}; +#else + size_t localsize[2] = {16, 16}; +#endif + + size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]}; + if (fast8uc1) + globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0]; + + int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1; + + bool isolated = (borderType & BORDER_ISOLATED) != 0; + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }, + * const btype = borderMap[borderType & ~BORDER_ISOLATED]; + + bool extra_extrapolation = src.rows < (int)((-radiusY + globalsize[1]) >> 1) + 1; + extra_extrapolation |= src.rows < radiusY; + extra_extrapolation |= src.cols < (int)((-radiusX + globalsize[0] + 8 * localsize[0] + 3) >> 1) + 1; + extra_extrapolation |= src.cols < radiusX; + + char cvt[40]; + cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s" + " -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s%s%s", + radiusX, (int)localsize[0], (int)localsize[1], cn, btype, + extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", + isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", + ocl::typeToStr(type), ocl::typeToStr(buf_type), + ocl::convertTypeStr(sdepth, bdepth, cn, cvt), + ocl::typeToStr(sdepth), ocl::typeToStr(bdepth), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + int_arithm ? " -D INTEGER_ARITHMETIC" : ""); + build_options += ocl::kernelToStr(kernelX, bdepth); + + Size srcWholeSize; Point srcOffset; + src.locateROI(srcWholeSize, srcOffset); + + String kernelName("row_filter"); + if (fast8uc1) + kernelName += "_C1_D0"; + + ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, + build_options); + if (k.empty()) + return false; + + if (fast8uc1) + k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x, + srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height, + ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()), + buf.cols, buf.rows, radiusY); + else + k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffset.x, + srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height, + ocl::KernelArg::PtrWriteOnly(buf), (int)buf.step, buf.cols, buf.rows, radiusY); + + return k.run(2, globalsize, localsize, false); +} + +static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor, bool int_arithm) +{ + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if (dst.depth() == CV_64F && !doubleSupport) + return false; + +#ifdef __ANDROID__ + size_t localsize[2] = { 16, 10 }; +#else + size_t localsize[2] = { 16, 16 }; +#endif + size_t globalsize[2] = { 0, 0 }; + + int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype); + Size sz = dst.size(); + int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); + + globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1]; + globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0]; + + char cvt[40]; + cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d" + " -D srcT=%s -D dstT=%s -D convertToDstT=%s" + " -D srcT1=%s -D dstT1=%s -D SHIFT_BITS=%d%s%s", + anchor, (int)localsize[0], (int)localsize[1], cn, + ocl::typeToStr(buf_type), ocl::typeToStr(dtype), + ocl::convertTypeStr(bdepth, ddepth, cn, cvt), + ocl::typeToStr(bdepth), ocl::typeToStr(ddepth), + 2*shift_bits, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + int_arithm ? " -D INTEGER_ARITHMETIC" : ""); + build_options += ocl::kernelToStr(kernelY, bdepth); + + ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, + build_options); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst), + static_cast(delta)); + + return k.run(2, globalsize, localsize, false); +} + +const int optimizedSepFilterLocalWidth = 16; +const int optimizedSepFilterLocalHeight = 8; + +static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, + Mat row_kernel, Mat col_kernel, + double delta, int borderType, int ddepth, int bdepth, bool int_arithm) +{ + Size size = _src.size(), wholeSize; + Point origin; + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), + esz = CV_ELEM_SIZE(stype), wdepth = std::max(std::max(sdepth, ddepth), bdepth), + dtype = CV_MAKE_TYPE(ddepth, cn); + size_t src_step = _src.step(), src_offset = _src.offset(); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (esz == 0 || src_step == 0 + || (src_offset % src_step) % esz != 0 + || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) + || !(borderType == BORDER_CONSTANT + || borderType == BORDER_REPLICATE + || borderType == BORDER_REFLECT + || borderType == BORDER_WRAP + || borderType == BORDER_REFLECT_101)) + return false; + + size_t lt2[2] = { optimizedSepFilterLocalWidth, optimizedSepFilterLocalHeight }; + size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1]}; + + char cvt[2][40]; + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", + "BORDER_REFLECT_101" }; + + String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s" + " -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s" + " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s", + (int)lt2[0], (int)lt2[1], row_kernel.cols / 2, col_kernel.cols / 2, + ocl::kernelToStr(row_kernel, wdepth, "KERNEL_MATRIX_X").c_str(), + ocl::kernelToStr(col_kernel, wdepth, "KERNEL_MATRIX_Y").c_str(), + ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype), + ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType], + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth), + cn, 2*shift_bits, int_arithm ? " -D INTEGER_ARITHMETIC" : ""); + + ocl::Kernel k("sep_filter", ocl::imgproc::filterSep_singlePass_oclsrc, opts); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(size, dtype); + UMat dst = _dst.getUMat(); + + int src_offset_x = static_cast((src_offset % src_step) / esz); + int src_offset_y = static_cast(src_offset / src_step); + + src.locateROI(wholeSize, origin); + + k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y, + wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst), + static_cast(delta)); + + return k.run(2, gt2, lt2, false); +} + +bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, + InputArray _kernelX, InputArray _kernelY, Point anchor, + double delta, int borderType ) +{ + const ocl::Device & d = ocl::Device::getDefault(); + Size imgSize = _src.size(); + + int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + if (cn > 4) + return false; + + Mat kernelX = _kernelX.getMat().reshape(1, 1); + if (kernelX.cols % 2 != 1) + return false; + Mat kernelY = _kernelY.getMat().reshape(1, 1); + if (kernelY.cols % 2 != 1) + return false; + + if (ddepth < 0) + ddepth = sdepth; + + if (anchor.x < 0) + anchor.x = kernelX.cols >> 1; + if (anchor.y < 0) + anchor.y = kernelY.cols >> 1; + + int rtype = getKernelType(kernelX, + kernelX.rows == 1 ? Point(anchor.x, 0) : Point(0, anchor.x)); + int ctype = getKernelType(kernelY, + kernelY.rows == 1 ? Point(anchor.y, 0) : Point(0, anchor.y)); + + int bdepth = CV_32F; + bool int_arithm = false; + if( sdepth == CV_8U && ddepth == CV_8U && + rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && + ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL) + { + if (ocl::Device::getDefault().isIntel()) + { + for (int i=0; i(0, i) = (float) cvRound(kernelX.at(0, i) * (1 << shift_bits)); + if (kernelX.data != kernelY.data) + for (int i=0; i(0, i) = (float) cvRound(kernelY.at(0, i) * (1 << shift_bits)); + } else + { + bdepth = CV_32S; + kernelX.convertTo( kernelX, bdepth, 1 << shift_bits ); + kernelY.convertTo( kernelY, bdepth, 1 << shift_bits ); + } + int_arithm = true; + } + + CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && + imgSize.width > optimizedSepFilterLocalWidth + anchor.x && + imgSize.height > optimizedSepFilterLocalHeight + anchor.y && + (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && + anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) && + OCL_PERFORMANCE_CHECK(d.isIntel()), // TODO FIXIT + ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta, + borderType & ~BORDER_ISOLATED, ddepth, bdepth, int_arithm), true) + + UMat src = _src.getUMat(); + Size srcWholeSize; Point srcOffset; + src.locateROI(srcWholeSize, srcOffset); + + bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 && + src.cols % 4 == 0 && src.step % 4 == 0; + + Size srcSize = src.size(); + Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); + UMat buf(bufSize, CV_MAKETYPE(bdepth, cn)); + if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1, int_arithm)) + return false; + + _dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); + UMat dst = _dst.getUMat(); + + return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y, int_arithm); +} + +#endif + +Ptr getLinearFilter( + int srcType, int dstType, + InputArray filter_kernel, Point anchor, + double delta, int bits) +{ + CV_INSTRUMENT_REGION(); + + Mat kernelMat = filter_kernel.getMat(); + CV_CPU_DISPATCH(getLinearFilter, (srcType, dstType, kernelMat, anchor, delta, bits), + CV_CPU_DISPATCH_MODES_ALL); +} + + +Ptr createLinearFilter( + int _srcType, int _dstType, + InputArray filter_kernel, + Point _anchor, double _delta, + int _rowBorderType, int _columnBorderType, + const Scalar& _borderValue) +{ + Mat _kernel = filter_kernel.getMat(); + _srcType = CV_MAT_TYPE(_srcType); + _dstType = CV_MAT_TYPE(_dstType); + int cn = CV_MAT_CN(_srcType); + CV_Assert( cn == CV_MAT_CN(_dstType) ); + + Mat kernel = _kernel; + int bits = 0; + + /*int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType); + int ktype = _kernel.depth() == CV_32S ? KERNEL_INTEGER : getKernelType(_kernel, _anchor); + if( sdepth == CV_8U && (ddepth == CV_8U || ddepth == CV_16S) && + _kernel.rows*_kernel.cols <= (1 << 10) ) + { + bits = (ktype & KERNEL_INTEGER) ? 0 : 11; + _kernel.convertTo(kernel, CV_32S, 1 << bits); + }*/ + + Ptr _filter2D = getLinearFilter(_srcType, _dstType, + kernel, _anchor, _delta, bits); + + return makePtr(_filter2D, Ptr(), + Ptr(), _srcType, _dstType, _srcType, + _rowBorderType, _columnBorderType, _borderValue ); +} + + +//================================================================ +// HAL interface +//================================================================ + +static bool replacementFilter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, bool isSubmatrix) +{ + cvhalFilter2D* ctx; + int res = cv_hal_filterInit(&ctx, kernel_data, kernel_step, kernel_type, kernel_width, kernel_height, width, height, + stype, dtype, borderType, delta, anchor_x, anchor_y, isSubmatrix, src_data == dst_data); + if (res != CV_HAL_ERROR_OK) + return false; + res = cv_hal_filter(ctx, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y); + bool success = (res == CV_HAL_ERROR_OK); + res = cv_hal_filterFree(ctx); + if (res != CV_HAL_ERROR_OK) + return false; + return success; +} + +#ifdef HAVE_IPP +static bool ippFilter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + + ::ipp::IwiSize iwSize(width, height); + ::ipp::IwiSize kernelSize(kernel_width, kernel_height); + IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype)); + int channels = CV_MAT_CN(stype); + + CV_UNUSED(isSubmatrix); + +#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel + if(kernel_width == 1 && kernel_height == 1) + return false; +#endif + +#if IPP_DISABLE_FILTER2D_BIG_MASK + // Too big difference compared to OpenCV FFT-based convolution + if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7)) + return false; + + // Poor optimization for big kernels + if(kernel_width > 7 || kernel_height > 7) + return false; +#endif + + if(src_data == dst_data) + return false; + + if(stype != dtype) + return false; + + if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1) + return false; + + // TODO: Implement offset for 8u, 16u + if(std::fabs(delta) >= DBL_EPSILON) + return false; + + if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height)) + return false; + + try + { + ::ipp::IwiBorderSize iwBorderSize; + ::ipp::IwiBorderType iwBorderType; + ::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step); + ::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step); + ::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step); + + iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize); + iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize); + if(!iwBorderType) + return false; + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType); + } + catch(const ::ipp::IwException& ex) + { + CV_UNUSED(ex); + return false; + } + + return true; +#else + CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step); + CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width); + CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step); + CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta); + CV_UNUSED(borderType); CV_UNUSED(isSubmatrix); + return false; +#endif +} +#endif + +static bool dftFilter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType) +{ + { + int sdepth = CV_MAT_DEPTH(stype); + int ddepth = CV_MAT_DEPTH(dtype); + int dft_filter_size = checkHardwareSupport(CV_CPU_SSE3) && ((sdepth == CV_8U && (ddepth == CV_8U || ddepth == CV_16S)) || (sdepth == CV_32F && ddepth == CV_32F)) ? 130 : 50; + if (kernel_width * kernel_height < dft_filter_size) + return false; + } + + Point anchor = Point(anchor_x, anchor_y); + Mat kernel = Mat(Size(kernel_width, kernel_height), kernel_type, kernel_data, kernel_step); + + Mat src(Size(full_width-offset_x, full_height-offset_y), stype, src_data, src_step); + Mat dst(Size(full_width, full_height), dtype, dst_data, dst_step); + Mat temp; + int src_channels = CV_MAT_CN(stype); + int dst_channels = CV_MAT_CN(dtype); + int ddepth = CV_MAT_DEPTH(dtype); + // crossCorr doesn't accept non-zero delta with multiple channels + if (src_channels != 1 && delta != 0) { + // The semantics of filter2D require that the delta be applied + // as floating-point math. So wee need an intermediate Mat + // with a float datatype. If the dest is already floats, + // we just use that. + int corrDepth = ddepth; + if ((ddepth == CV_32F || ddepth == CV_64F) && src_data != dst_data) { + temp = Mat(Size(full_width, full_height), dtype, dst_data, dst_step); + } else { + corrDepth = ddepth == CV_64F ? CV_64F : CV_32F; + temp.create(Size(full_width, full_height), CV_MAKETYPE(corrDepth, dst_channels)); + } + crossCorr(src, kernel, temp, src.size(), + CV_MAKETYPE(corrDepth, src_channels), + anchor, 0, borderType); + add(temp, delta, temp); + if (temp.data != dst_data) { + temp.convertTo(dst, dst.type()); + } + } else { + if (src_data != dst_data) + temp = Mat(Size(full_width, full_height), dtype, dst_data, dst_step); + else + temp.create(Size(full_width, full_height), dtype); + crossCorr(src, kernel, temp, src.size(), + CV_MAKETYPE(ddepth, src_channels), + anchor, delta, borderType); + if (temp.data != dst_data) + temp.copyTo(dst); + } + return true; +} + +static void ocvFilter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType) +{ + int borderTypeValue = borderType & ~BORDER_ISOLATED; + Mat kernel = Mat(Size(kernel_width, kernel_height), kernel_type, kernel_data, kernel_step); + Ptr f = createLinearFilter(stype, dtype, kernel, Point(anchor_x, anchor_y), delta, + borderTypeValue); + Mat src(Size(width, height), stype, src_data, src_step); + Mat dst(Size(width, height), dtype, dst_data, dst_step); + f->apply(src, dst, Size(full_width, full_height), Point(offset_x, offset_y)); +} + +static bool replacementSepFilter(int stype, int dtype, int ktype, + uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, + int width, int height, int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, double delta, int borderType) +{ + cvhalFilter2D *ctx; + int res = cv_hal_sepFilterInit(&ctx, stype, dtype, ktype, + kernelx_data, kernelx_len, + kernely_data, kernely_len, + anchor_x, anchor_y, delta, borderType); + if (res != CV_HAL_ERROR_OK) + return false; + res = cv_hal_sepFilter(ctx, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y); + bool success = (res == CV_HAL_ERROR_OK); + res = cv_hal_sepFilterFree(ctx); + if (res != CV_HAL_ERROR_OK) + return false; + return success; +} + +static void ocvSepFilter(int stype, int dtype, int ktype, + uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, + int width, int height, int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, double delta, int borderType) +{ + Mat kernelX(Size(kernelx_len, 1), ktype, kernelx_data); + Mat kernelY(Size(kernely_len, 1), ktype, kernely_data); + Ptr f = createSeparableLinearFilter(stype, dtype, kernelX, kernelY, + Point(anchor_x, anchor_y), + delta, borderType & ~BORDER_ISOLATED); + Mat src(Size(width, height), stype, src_data, src_step); + Mat dst(Size(width, height), dtype, dst_data, dst_step); + f->apply(src, dst, Size(full_width, full_height), Point(offset_x, offset_y)); +}; + +//=================================================================== +// HAL functions +//=================================================================== + +namespace hal { + + +CV_DEPRECATED Ptr Filter2D::create(uchar * , size_t , int , + int , int , + int , int , + int , int , + int , double , + int , int , + bool , bool ) { return Ptr(); } + +CV_DEPRECATED Ptr SepFilter2D::create(int , int , int , + uchar * , int , + uchar * , int , + int , int , + double , int ) { return Ptr(); } + + +void filter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix) +{ + bool res; + res = replacementFilter2D(stype, dtype, kernel_type, + src_data, src_step, + dst_data, dst_step, + width, height, + full_width, full_height, + offset_x, offset_y, + kernel_data, kernel_step, + kernel_width, kernel_height, + anchor_x, anchor_y, + delta, borderType, isSubmatrix); + if (res) + return; + + CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type, + src_data, src_step, + dst_data, dst_step, + width, height, + full_width, full_height, + offset_x, offset_y, + kernel_data, kernel_step, + kernel_width, kernel_height, + anchor_x, anchor_y, + delta, borderType, isSubmatrix)) + + res = dftFilter2D(stype, dtype, kernel_type, + src_data, src_step, + dst_data, dst_step, + full_width, full_height, + offset_x, offset_y, + kernel_data, kernel_step, + kernel_width, kernel_height, + anchor_x, anchor_y, + delta, borderType); + if (res) + return; + ocvFilter2D(stype, dtype, kernel_type, + src_data, src_step, + dst_data, dst_step, + width, height, + full_width, full_height, + offset_x, offset_y, + kernel_data, kernel_step, + kernel_width, kernel_height, + anchor_x, anchor_y, + delta, borderType); +} + +//--------------------------------------------------------------- + +void sepFilter2D(int stype, int dtype, int ktype, + uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, + int width, int height, int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, double delta, int borderType) +{ + + bool res = replacementSepFilter(stype, dtype, ktype, + src_data, src_step, dst_data, dst_step, + width, height, full_width, full_height, + offset_x, offset_y, + kernelx_data, kernelx_len, + kernely_data, kernely_len, + anchor_x, anchor_y, delta, borderType); + if (res) + return; + ocvSepFilter(stype, dtype, ktype, + src_data, src_step, dst_data, dst_step, + width, height, full_width, full_height, + offset_x, offset_y, + kernelx_data, kernelx_len, + kernely_data, kernely_len, + anchor_x, anchor_y, delta, borderType); +} + +} // namespace cv::hal:: + +//================================================================ +// Main interface +//================================================================ + +void filter2D(InputArray _src, OutputArray _dst, int ddepth, + InputArray _kernel, Point anchor0, + double delta, int borderType) +{ + CV_INSTRUMENT_REGION(); + + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, + ocl_filter2D(_src, _dst, ddepth, _kernel, anchor0, delta, borderType)) + + Mat src = _src.getMat(), kernel = _kernel.getMat(); + + if( ddepth < 0 ) + ddepth = src.depth(); + + _dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); + Mat dst = _dst.getMat(); + Point anchor = normalizeAnchor(anchor0, kernel.size()); + + Point ofs; + Size wsz(src.cols, src.rows); + if( (borderType & BORDER_ISOLATED) == 0 ) + src.locateROI( wsz, ofs ); + + hal::filter2D(src.type(), dst.type(), kernel.type(), + src.data, src.step, dst.data, dst.step, + dst.cols, dst.rows, wsz.width, wsz.height, ofs.x, ofs.y, + kernel.data, kernel.step, kernel.cols, kernel.rows, + anchor.x, anchor.y, + delta, borderType, src.isSubmatrix()); +} + +void sepFilter2D(InputArray _src, OutputArray _dst, int ddepth, + InputArray _kernelX, InputArray _kernelY, Point anchor, + double delta, int borderType) +{ + CV_INSTRUMENT_REGION(); + + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (size_t)_src.rows() > _kernelY.total() && (size_t)_src.cols() > _kernelX.total(), + ocl_sepFilter2D(_src, _dst, ddepth, _kernelX, _kernelY, anchor, delta, borderType)) + + Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat(); + + if( ddepth < 0 ) + ddepth = src.depth(); + + _dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); + Mat dst = _dst.getMat(); + + Point ofs; + Size wsz(src.cols, src.rows); + if( (borderType & BORDER_ISOLATED) == 0 ) + src.locateROI( wsz, ofs ); + + CV_Assert( kernelX.type() == kernelY.type() && + (kernelX.cols == 1 || kernelX.rows == 1) && + (kernelY.cols == 1 || kernelY.rows == 1) ); + + Mat contKernelX = kernelX.isContinuous() ? kernelX : kernelX.clone(); + Mat contKernelY = kernelY.isContinuous() ? kernelY : kernelY.clone(); + + hal::sepFilter2D(src.type(), dst.type(), kernelX.type(), + src.data, src.step, dst.data, dst.step, + dst.cols, dst.rows, wsz.width, wsz.height, ofs.x, ofs.y, + contKernelX.data, kernelX.cols + kernelX.rows - 1, + contKernelY.data, kernelY.cols + kernelY.rows - 1, + anchor.x, anchor.y, delta, borderType & ~BORDER_ISOLATED); +} + +} // namespace + +CV_IMPL void +cvFilter2D( const CvArr* srcarr, CvArr* dstarr, const CvMat* _kernel, CvPoint anchor ) +{ + cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); + cv::Mat kernel = cv::cvarrToMat(_kernel); + + CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() ); + + cv::filter2D( src, dst, dst.depth(), kernel, anchor, 0, cv::BORDER_REPLICATE ); +} + +/* End of file. */ diff --git a/modules/imgproc/src/filter.hpp b/modules/imgproc/src/filter.hpp index 93f3f177e6..7b792d1935 100644 --- a/modules/imgproc/src/filter.hpp +++ b/modules/imgproc/src/filter.hpp @@ -45,17 +45,13 @@ namespace cv { -#if CV_TRY_AVX2 - int RowVec_32f_AVX(const float* src0, const float* _kx, float* dst, int width, int cn, int _ksize); - int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2); - int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2); -#endif - #ifdef HAVE_OPENCL bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, InputArray _kernelX, InputArray _kernelY, Point anchor, double delta, int borderType ); #endif + + void preprocess2DKernel(const Mat& kernel, std::vector& coords, std::vector& coeffs); } #endif diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.simd.hpp similarity index 68% rename from modules/imgproc/src/filter.cpp rename to modules/imgproc/src/filter.simd.hpp index 538f158b6e..f09cd1ec1d 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.simd.hpp @@ -41,160 +41,85 @@ //M*/ #include "precomp.hpp" -#include "opencv2/core/opencl/ocl_defs.hpp" -#include "opencl_kernels_imgproc.hpp" -#include "hal_replacement.hpp" #include "opencv2/core/hal/intrin.hpp" #include "filter.hpp" +#if defined(CV_CPU_BASELINE_MODE) +#if IPP_VERSION_X100 >= 710 +#define USE_IPP_SEP_FILTERS 1 +#else +#undef USE_IPP_SEP_FILTERS +#endif +#endif + /****************************************************************************************\ Base Image Filter \****************************************************************************************/ -#if IPP_VERSION_X100 >= 710 -#define USE_IPP_SEP_FILTERS 1 -#else -#undef USE_IPP_SEP_FILTERS -#endif +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +int FilterEngine__start(FilterEngine& this_, const Size &_wholeSize, const Size &sz, const Point &ofs); +int FilterEngine__proceed(FilterEngine& this_, const uchar* src, int srcstep, int count, + uchar* dst, int dststep); +void FilterEngine__apply(FilterEngine& this_, const Mat& src, Mat& dst, const Size& wsz, const Point& ofs); -namespace cv -{ +Ptr getLinearRowFilter( + int srcType, int bufType, + const Mat& kernel, int anchor, + int symmetryType); -BaseRowFilter::BaseRowFilter() { ksize = anchor = -1; } -BaseRowFilter::~BaseRowFilter() {} +Ptr getLinearColumnFilter( + int bufType, int dstType, + const Mat& kernel, int anchor, + int symmetryType, double delta, + int bits); -BaseColumnFilter::BaseColumnFilter() { ksize = anchor = -1; } -BaseColumnFilter::~BaseColumnFilter() {} -void BaseColumnFilter::reset() {} - -BaseFilter::BaseFilter() { ksize = Size(-1,-1); anchor = Point(-1,-1); } -BaseFilter::~BaseFilter() {} -void BaseFilter::reset() {} - -FilterEngine::FilterEngine() - : srcType(-1), dstType(-1), bufType(-1), maxWidth(0), wholeSize(-1, -1), dx1(0), dx2(0), - rowBorderType(BORDER_REPLICATE), columnBorderType(BORDER_REPLICATE), - borderElemSize(0), bufStep(0), startY(0), startY0(0), endY(0), rowCount(0), dstY(0) -{ -} +Ptr getLinearFilter( + int srcType, int dstType, + const Mat& filter_kernel, Point anchor, + double delta, int bits); -FilterEngine::FilterEngine( const Ptr& _filter2D, - const Ptr& _rowFilter, - const Ptr& _columnFilter, - int _srcType, int _dstType, int _bufType, - int _rowBorderType, int _columnBorderType, - const Scalar& _borderValue ) - : srcType(-1), dstType(-1), bufType(-1), maxWidth(0), wholeSize(-1, -1), dx1(0), dx2(0), - rowBorderType(BORDER_REPLICATE), columnBorderType(BORDER_REPLICATE), - borderElemSize(0), bufStep(0), startY(0), startY0(0), endY(0), rowCount(0), dstY(0) -{ - init(_filter2D, _rowFilter, _columnFilter, _srcType, _dstType, _bufType, - _rowBorderType, _columnBorderType, _borderValue); -} - -FilterEngine::~FilterEngine() -{ -} - - -void FilterEngine::init( const Ptr& _filter2D, - const Ptr& _rowFilter, - const Ptr& _columnFilter, - int _srcType, int _dstType, int _bufType, - int _rowBorderType, int _columnBorderType, - const Scalar& _borderValue ) -{ - _srcType = CV_MAT_TYPE(_srcType); - _bufType = CV_MAT_TYPE(_bufType); - _dstType = CV_MAT_TYPE(_dstType); - - srcType = _srcType; - int srcElemSize = (int)getElemSize(srcType); - dstType = _dstType; - bufType = _bufType; - - filter2D = _filter2D; - rowFilter = _rowFilter; - columnFilter = _columnFilter; - - if( _columnBorderType < 0 ) - _columnBorderType = _rowBorderType; - - rowBorderType = _rowBorderType; - columnBorderType = _columnBorderType; - - CV_Assert( columnBorderType != BORDER_WRAP ); - - if( isSeparable() ) - { - CV_Assert( rowFilter && columnFilter ); - ksize = Size(rowFilter->ksize, columnFilter->ksize); - anchor = Point(rowFilter->anchor, columnFilter->anchor); - } - else - { - CV_Assert( bufType == srcType ); - ksize = filter2D->ksize; - anchor = filter2D->anchor; - } - - CV_Assert( 0 <= anchor.x && anchor.x < ksize.width && - 0 <= anchor.y && anchor.y < ksize.height ); - - borderElemSize = srcElemSize/(CV_MAT_DEPTH(srcType) >= CV_32S ? sizeof(int) : 1); - int borderLength = std::max(ksize.width - 1, 1); - borderTab.resize(borderLength*borderElemSize); - - maxWidth = bufStep = 0; - constBorderRow.clear(); - - if( rowBorderType == BORDER_CONSTANT || columnBorderType == BORDER_CONSTANT ) - { - constBorderValue.resize(srcElemSize*borderLength); - int srcType1 = CV_MAKETYPE(CV_MAT_DEPTH(srcType), MIN(CV_MAT_CN(srcType), 4)); - scalarToRawData(_borderValue, &constBorderValue[0], srcType1, - borderLength*CV_MAT_CN(srcType)); - } - - wholeSize = Size(-1,-1); -} +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #define VEC_ALIGN CV_MALLOC_ALIGN -int FilterEngine::start(const Size &_wholeSize, const Size &sz, const Point &ofs) +int FilterEngine__start(FilterEngine& this_, const Size &_wholeSize, const Size &sz, const Point &ofs) { + CV_INSTRUMENT_REGION(); + int i, j; - wholeSize = _wholeSize; - roi = Rect(ofs, sz); - CV_Assert( roi.x >= 0 && roi.y >= 0 && roi.width >= 0 && roi.height >= 0 && - roi.x + roi.width <= wholeSize.width && - roi.y + roi.height <= wholeSize.height ); + this_.wholeSize = _wholeSize; + this_.roi = Rect(ofs, sz); + CV_Assert( this_.roi.x >= 0 && this_.roi.y >= 0 && this_.roi.width >= 0 && this_.roi.height >= 0 && + this_.roi.x + this_.roi.width <= this_.wholeSize.width && + this_.roi.y + this_.roi.height <= this_.wholeSize.height ); - int esz = (int)getElemSize(srcType); - int bufElemSize = (int)getElemSize(bufType); - const uchar* constVal = !constBorderValue.empty() ? &constBorderValue[0] : 0; + int esz = (int)getElemSize(this_.srcType); + int bufElemSize = (int)getElemSize(this_.bufType); + const uchar* constVal = !this_.constBorderValue.empty() ? &this_.constBorderValue[0] : 0; - int _maxBufRows = std::max(ksize.height + 3, - std::max(anchor.y, - ksize.height-anchor.y-1)*2+1); + int _maxBufRows = std::max(this_.ksize.height + 3, + std::max(this_.anchor.y, + this_.ksize.height-this_.anchor.y-1)*2+1); - if( maxWidth < roi.width || _maxBufRows != (int)rows.size() ) + if (this_.maxWidth < this_.roi.width || _maxBufRows != (int)this_.rows.size() ) { - rows.resize(_maxBufRows); - maxWidth = std::max(maxWidth, roi.width); - int cn = CV_MAT_CN(srcType); - srcRow.resize(esz*(maxWidth + ksize.width - 1)); - if( columnBorderType == BORDER_CONSTANT ) + this_.rows.resize(_maxBufRows); + this_.maxWidth = std::max(this_.maxWidth, this_.roi.width); + int cn = CV_MAT_CN(this_.srcType); + this_.srcRow.resize(esz*(this_.maxWidth + this_.ksize.width - 1)); + if (this_.columnBorderType == BORDER_CONSTANT) { CV_Assert(constVal != NULL); - constBorderRow.resize(getElemSize(bufType)*(maxWidth + ksize.width - 1 + VEC_ALIGN)); - uchar *dst = alignPtr(&constBorderRow[0], VEC_ALIGN), *tdst; - int n = (int)constBorderValue.size(), N; - N = (maxWidth + ksize.width - 1)*esz; - tdst = isSeparable() ? &srcRow[0] : dst; + this_.constBorderRow.resize(getElemSize(this_.bufType)*(this_.maxWidth + this_.ksize.width - 1 + VEC_ALIGN)); + uchar *dst = alignPtr(&this_.constBorderRow[0], VEC_ALIGN); + int n = (int)this_.constBorderValue.size(); + int N = (this_.maxWidth + this_.ksize.width - 1)*esz; + uchar *tdst = this_.isSeparable() ? &this_.srcRow[0] : dst; for( i = 0; i < N; i += n ) { @@ -203,126 +128,113 @@ int FilterEngine::start(const Size &_wholeSize, const Size &sz, const Point &ofs tdst[i+j] = constVal[j]; } - if( isSeparable() ) - (*rowFilter)(&srcRow[0], dst, maxWidth, cn); + if (this_.isSeparable()) + (*this_.rowFilter)(&this_.srcRow[0], dst, this_.maxWidth, cn); } - int maxBufStep = bufElemSize*(int)alignSize(maxWidth + - (!isSeparable() ? ksize.width - 1 : 0),VEC_ALIGN); - ringBuf.resize(maxBufStep*rows.size()+VEC_ALIGN); + int maxBufStep = bufElemSize*(int)alignSize(this_.maxWidth + + (!this_.isSeparable() ? this_.ksize.width - 1 : 0), VEC_ALIGN); + this_.ringBuf.resize(maxBufStep*this_.rows.size()+VEC_ALIGN); } // adjust bufstep so that the used part of the ring buffer stays compact in memory - bufStep = bufElemSize*(int)alignSize(roi.width + (!isSeparable() ? ksize.width - 1 : 0),VEC_ALIGN); + this_.bufStep = bufElemSize*(int)alignSize(this_.roi.width + (!this_.isSeparable() ? this_.ksize.width - 1 : 0), VEC_ALIGN); - dx1 = std::max(anchor.x - roi.x, 0); - dx2 = std::max(ksize.width - anchor.x - 1 + roi.x + roi.width - wholeSize.width, 0); + this_.dx1 = std::max(this_.anchor.x - this_.roi.x, 0); + this_.dx2 = std::max(this_.ksize.width - this_.anchor.x - 1 + this_.roi.x + this_.roi.width - this_.wholeSize.width, 0); // recompute border tables - if( dx1 > 0 || dx2 > 0 ) + if (this_.dx1 > 0 || this_.dx2 > 0) { - if( rowBorderType == BORDER_CONSTANT ) + if (this_.rowBorderType == BORDER_CONSTANT ) { CV_Assert(constVal != NULL); - int nr = isSeparable() ? 1 : (int)rows.size(); + int nr = this_.isSeparable() ? 1 : (int)this_.rows.size(); for( i = 0; i < nr; i++ ) { - uchar* dst = isSeparable() ? &srcRow[0] : alignPtr(&ringBuf[0],VEC_ALIGN) + bufStep*i; - memcpy( dst, constVal, dx1*esz ); - memcpy( dst + (roi.width + ksize.width - 1 - dx2)*esz, constVal, dx2*esz ); + uchar* dst = this_.isSeparable() ? &this_.srcRow[0] : alignPtr(&this_.ringBuf[0], VEC_ALIGN) + this_.bufStep*i; + memcpy(dst, constVal, this_.dx1*esz); + memcpy(dst + (this_.roi.width + this_.ksize.width - 1 - this_.dx2)*esz, constVal, this_.dx2*esz); } } else { - int xofs1 = std::min(roi.x, anchor.x) - roi.x; + int xofs1 = std::min(this_.roi.x, this_.anchor.x) - this_.roi.x; - int btab_esz = borderElemSize, wholeWidth = wholeSize.width; - int* btab = (int*)&borderTab[0]; + int btab_esz = this_.borderElemSize, wholeWidth = this_.wholeSize.width; + int* btab = (int*)&this_.borderTab[0]; - for( i = 0; i < dx1; i++ ) + for( i = 0; i < this_.dx1; i++ ) { - int p0 = (borderInterpolate(i-dx1, wholeWidth, rowBorderType) + xofs1)*btab_esz; + int p0 = (borderInterpolate(i-this_.dx1, wholeWidth, this_.rowBorderType) + xofs1)*btab_esz; for( j = 0; j < btab_esz; j++ ) btab[i*btab_esz + j] = p0 + j; } - for( i = 0; i < dx2; i++ ) + for( i = 0; i < this_.dx2; i++ ) { - int p0 = (borderInterpolate(wholeWidth + i, wholeWidth, rowBorderType) + xofs1)*btab_esz; + int p0 = (borderInterpolate(wholeWidth + i, wholeWidth, this_.rowBorderType) + xofs1)*btab_esz; for( j = 0; j < btab_esz; j++ ) - btab[(i + dx1)*btab_esz + j] = p0 + j; + btab[(i + this_.dx1)*btab_esz + j] = p0 + j; } } } - rowCount = dstY = 0; - startY = startY0 = std::max(roi.y - anchor.y, 0); - endY = std::min(roi.y + roi.height + ksize.height - anchor.y - 1, wholeSize.height); - if( columnFilter ) - columnFilter->reset(); - if( filter2D ) - filter2D->reset(); + this_.rowCount = this_.dstY = 0; + this_.startY = this_.startY0 = std::max(this_.roi.y - this_.anchor.y, 0); + this_.endY = std::min(this_.roi.y + this_.roi.height + this_.ksize.height - this_.anchor.y - 1, this_.wholeSize.height); - return startY; + if (this_.columnFilter) + this_.columnFilter->reset(); + if (this_.filter2D) + this_.filter2D->reset(); + + return this_.startY; } -int FilterEngine::start(const Mat& src, const Size &wsz, const Point &ofs) +int FilterEngine__proceed(FilterEngine& this_, const uchar* src, int srcstep, int count, + uchar* dst, int dststep) { - start( wsz, src.size(), ofs); - return startY - ofs.y; -} + CV_INSTRUMENT_REGION(); -int FilterEngine::remainingInputRows() const -{ - return endY - startY - rowCount; -} + CV_DbgAssert(this_.wholeSize.width > 0 && this_.wholeSize.height > 0 ); -int FilterEngine::remainingOutputRows() const -{ - return roi.height - dstY; -} - -int FilterEngine::proceed( const uchar* src, int srcstep, int count, - uchar* dst, int dststep ) -{ - CV_Assert( wholeSize.width > 0 && wholeSize.height > 0 ); - - const int *btab = &borderTab[0]; - int esz = (int)getElemSize(srcType), btab_esz = borderElemSize; - uchar** brows = &rows[0]; - int bufRows = (int)rows.size(); - int cn = CV_MAT_CN(bufType); - int width = roi.width, kwidth = ksize.width; - int kheight = ksize.height, ay = anchor.y; - int _dx1 = dx1, _dx2 = dx2; - int width1 = roi.width + kwidth - 1; - int xofs1 = std::min(roi.x, anchor.x); - bool isSep = isSeparable(); - bool makeBorder = (_dx1 > 0 || _dx2 > 0) && rowBorderType != BORDER_CONSTANT; + const int *btab = &this_.borderTab[0]; + int esz = (int)getElemSize(this_.srcType), btab_esz = this_.borderElemSize; + uchar** brows = &this_.rows[0]; + int bufRows = (int)this_.rows.size(); + int cn = CV_MAT_CN(this_.bufType); + int width = this_.roi.width, kwidth = this_.ksize.width; + int kheight = this_.ksize.height, ay = this_.anchor.y; + int _dx1 = this_.dx1, _dx2 = this_.dx2; + int width1 = this_.roi.width + kwidth - 1; + int xofs1 = std::min(this_.roi.x, this_.anchor.x); + bool isSep = this_.isSeparable(); + bool makeBorder = (_dx1 > 0 || _dx2 > 0) && this_.rowBorderType != BORDER_CONSTANT; int dy = 0, i = 0; src -= xofs1*esz; - count = std::min(count, remainingInputRows()); + count = std::min(count, this_.remainingInputRows()); - CV_Assert( src && dst && count > 0 ); + CV_Assert(src && dst && count > 0); for(;; dst += dststep*i, dy += i) { - int dcount = bufRows - ay - startY - rowCount + roi.y; + int dcount = bufRows - ay - this_.startY - this_.rowCount + this_.roi.y; dcount = dcount > 0 ? dcount : bufRows - kheight + 1; dcount = std::min(dcount, count); count -= dcount; for( ; dcount-- > 0; src += srcstep ) { - int bi = (startY - startY0 + rowCount) % bufRows; - uchar* brow = alignPtr(&ringBuf[0], VEC_ALIGN) + bi*bufStep; - uchar* row = isSep ? &srcRow[0] : brow; + int bi = (this_.startY - this_.startY0 + this_.rowCount) % bufRows; + uchar* brow = alignPtr(&this_.ringBuf[0], VEC_ALIGN) + bi*this_.bufStep; + uchar* row = isSep ? &this_.srcRow[0] : brow; - if( ++rowCount > bufRows ) + if (++this_.rowCount > bufRows) { - --rowCount; - ++startY; + --this_.rowCount; + ++this_.startY; } memcpy( row + _dx1*esz, src, (width1 - _dx2 - _dx1)*esz ); @@ -349,99 +261,55 @@ int FilterEngine::proceed( const uchar* src, int srcstep, int count, } if( isSep ) - (*rowFilter)(row, brow, width, CV_MAT_CN(srcType)); + (*this_.rowFilter)(row, brow, width, CV_MAT_CN(this_.srcType)); } - int max_i = std::min(bufRows, roi.height - (dstY + dy) + (kheight - 1)); + int max_i = std::min(bufRows, this_.roi.height - (this_.dstY + dy) + (kheight - 1)); for( i = 0; i < max_i; i++ ) { - int srcY = borderInterpolate(dstY + dy + i + roi.y - ay, - wholeSize.height, columnBorderType); + int srcY = borderInterpolate(this_.dstY + dy + i + this_.roi.y - ay, + this_.wholeSize.height, this_.columnBorderType); if( srcY < 0 ) // can happen only with constant border type - brows[i] = alignPtr(&constBorderRow[0], VEC_ALIGN); + brows[i] = alignPtr(&this_.constBorderRow[0], VEC_ALIGN); else { - CV_Assert( srcY >= startY ); - if( srcY >= startY + rowCount ) + CV_Assert(srcY >= this_.startY); + if( srcY >= this_.startY + this_.rowCount) break; - int bi = (srcY - startY0) % bufRows; - brows[i] = alignPtr(&ringBuf[0], VEC_ALIGN) + bi*bufStep; + int bi = (srcY - this_.startY0) % bufRows; + brows[i] = alignPtr(&this_.ringBuf[0], VEC_ALIGN) + bi*this_.bufStep; } } if( i < kheight ) break; i -= kheight - 1; - if( isSeparable() ) - (*columnFilter)((const uchar**)brows, dst, dststep, i, roi.width*cn); + if (isSep) + (*this_.columnFilter)((const uchar**)brows, dst, dststep, i, this_.roi.width*cn); else - (*filter2D)((const uchar**)brows, dst, dststep, i, roi.width, cn); + (*this_.filter2D)((const uchar**)brows, dst, dststep, i, this_.roi.width, cn); } - dstY += dy; - CV_Assert( dstY <= roi.height ); + this_.dstY += dy; + CV_Assert(this_.dstY <= this_.roi.height); return dy; } -void FilterEngine::apply(const Mat& src, Mat& dst, const Size & wsz, const Point & ofs) +void FilterEngine__apply(FilterEngine& this_, const Mat& src, Mat& dst, const Size& wsz, const Point& ofs) { CV_INSTRUMENT_REGION(); - CV_Assert( src.type() == srcType && dst.type() == dstType ); + CV_DbgAssert(src.type() == this_.srcType && dst.type() == this_.dstType); - int y = start(src, wsz, ofs); - proceed(src.ptr() + y*src.step, + FilterEngine__start(this_, wsz, src.size(), ofs); + int y = this_.startY - ofs.y; + FilterEngine__proceed(this_, + src.ptr() + y*src.step, (int)src.step, - endY - startY, + this_.endY - this_.startY, dst.ptr(), (int)dst.step ); } -} - -/****************************************************************************************\ -* Separable linear filter * -\****************************************************************************************/ - -int cv::getKernelType(InputArray filter_kernel, Point anchor) -{ - Mat _kernel = filter_kernel.getMat(); - CV_Assert( _kernel.channels() == 1 ); - int i, sz = _kernel.rows*_kernel.cols; - - Mat kernel; - _kernel.convertTo(kernel, CV_64F); - - const double* coeffs = kernel.ptr(); - double sum = 0; - int type = KERNEL_SMOOTH + KERNEL_INTEGER; - if( (_kernel.rows == 1 || _kernel.cols == 1) && - anchor.x*2 + 1 == _kernel.cols && - anchor.y*2 + 1 == _kernel.rows ) - type |= (KERNEL_SYMMETRICAL + KERNEL_ASYMMETRICAL); - - for( i = 0; i < sz; i++ ) - { - double a = coeffs[i], b = coeffs[sz - i - 1]; - if( a != b ) - type &= ~KERNEL_SYMMETRICAL; - if( a != -b ) - type &= ~KERNEL_ASYMMETRICAL; - if( a < 0 ) - type &= ~KERNEL_SMOOTH; - if( a != saturate_cast(a) ) - type &= ~KERNEL_INTEGER; - sum += a; - } - - if( fabs(sum - 1) > FLT_EPSILON*(fabs(sum) + 1) ) - type &= ~KERNEL_SMOOTH; - return type; -} - - -namespace cv -{ - struct RowNoVec { RowNoVec() {} @@ -503,6 +371,8 @@ struct RowVec_8u32s int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { + CV_INSTRUMENT_REGION(); + int i = 0, k, _ksize = kernel.rows + kernel.cols - 1; int* dst = (int*)_dst; const int* _kx = kernel.ptr(); @@ -587,7 +457,6 @@ struct RowVec_8u32s i += v_uint32::nlanes; } } - vx_cleanup(); return i; } @@ -618,6 +487,8 @@ struct SymmRowSmallVec_8u32s int operator()(const uchar* src, uchar* _dst, int width, int cn) const { + CV_INSTRUMENT_REGION(); + int i = 0, j, k, _ksize = kernel.rows + kernel.cols - 1; int* dst = (int*)_dst; bool symmetrical = (symmetryType & KERNEL_SYMMETRICAL) != 0; @@ -1083,8 +954,6 @@ struct SymmRowSmallVec_8u32s } } } - - vx_cleanup(); return i; } @@ -1107,6 +976,8 @@ struct SymmColumnVec_32s8u int operator()(const uchar** _src, uchar* dst, int width) const { + CV_INSTRUMENT_REGION(); + int _ksize = kernel.rows + kernel.cols - 1; if( _ksize == 1 ) return 0; @@ -1237,8 +1108,6 @@ struct SymmColumnVec_32s8u i += v_int32x4::nlanes; } } - - vx_cleanup(); return i; } @@ -1261,6 +1130,8 @@ struct SymmColumnSmallVec_32s16s int operator()(const uchar** _src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + int ksize2 = (kernel.rows + kernel.cols - 1)/2; const float* ky = kernel.ptr() + ksize2; int i = 0; @@ -1420,8 +1291,6 @@ struct SymmColumnSmallVec_32s16s } } } - - vx_cleanup(); return i; } @@ -1443,6 +1312,8 @@ struct RowVec_16s32f int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { + CV_INSTRUMENT_REGION(); + int i = 0, k, _ksize = kernel.rows + kernel.cols - 1; float* dst = (float*)_dst; const float* _kx = kernel.ptr(); @@ -1495,7 +1366,6 @@ struct RowVec_16s32f v_store(dst + i, s0); i += v_float32::nlanes; } - vx_cleanup(); return i; } @@ -1516,6 +1386,8 @@ struct SymmColumnVec_32f16s int operator()(const uchar** _src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + int _ksize = kernel.rows + kernel.cols - 1; if( _ksize == 1 ) return 0; @@ -1620,7 +1492,6 @@ struct SymmColumnVec_32f16s } } - vx_cleanup(); return i; } @@ -1636,7 +1507,6 @@ struct RowVec_32f { RowVec_32f() { - haveAVX2 = CV_CPU_HAS_SUPPORT_AVX2; #if defined USE_IPP_SEP_FILTERS bufsz = -1; #endif @@ -1645,7 +1515,6 @@ struct RowVec_32f RowVec_32f( const Mat& _kernel ) { kernel = _kernel; - haveAVX2 = CV_CPU_HAS_SUPPORT_AVX2; #if defined USE_IPP_SEP_FILTERS bufsz = -1; #endif @@ -1653,6 +1522,8 @@ struct RowVec_32f int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { + CV_INSTRUMENT_REGION(); + #if defined USE_IPP_SEP_FILTERS CV_IPP_CHECK() { @@ -1670,9 +1541,24 @@ struct RowVec_32f int i = 0, k; width *= cn; -#if CV_TRY_AVX2 - if (haveAVX2) - return RowVec_32f_AVX(src0, _kx, dst, width, cn, _ksize); +#if CV_AVX + for (; i <= width - 8; i += 8) + { + const float* src = src0 + i; + __m256 f, x0; + __m256 s0 = _mm256_set1_ps(0.0f); + for (k = 0; k < _ksize; k++, src += cn) + { + f = _mm256_set1_ps(_kx[k]); + x0 = _mm256_loadu_ps(src); +#if CV_FMA3 + s0 = _mm256_fmadd_ps(x0, f, s0); +#else + s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); +#endif + } + _mm256_storeu_ps(dst + i, s0); + } #endif v_float32 k0 = vx_setall_f32(_kx[0]); for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes ) @@ -1722,12 +1608,10 @@ struct RowVec_32f v_store(dst + i, s0); i += v_float32::nlanes; } - vx_cleanup(); return i; } Mat kernel; - bool haveAVX2; #if defined USE_IPP_SEP_FILTERS private: mutable int bufsz; @@ -1782,6 +1666,8 @@ struct SymmRowSmallVec_32f int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { + CV_INSTRUMENT_REGION(); + int i = 0, _ksize = kernel.rows + kernel.cols - 1; if( _ksize == 1 ) return 0; @@ -1868,8 +1754,6 @@ struct SymmRowSmallVec_32f v_store(dst + i, v_muladd(vx_load(src + 2*cn) - vx_load(src - 2*cn), k2, (vx_load(src + cn) - vx_load(src - cn)) * k1)); } } - - vx_cleanup(); return i; } @@ -1882,7 +1766,6 @@ struct SymmColumnVec_32f { SymmColumnVec_32f() { symmetryType=0; - haveAVX2 = CV_CPU_HAS_SUPPORT_AVX2; delta = 0; } SymmColumnVec_32f(const Mat& _kernel, int _symmetryType, int, double _delta) @@ -1890,12 +1773,13 @@ struct SymmColumnVec_32f symmetryType = _symmetryType; kernel = _kernel; delta = (float)_delta; - haveAVX2 = CV_CPU_HAS_SUPPORT_AVX2; CV_Assert( (symmetryType & (KERNEL_SYMMETRICAL | KERNEL_ASYMMETRICAL)) != 0 ); } int operator()(const uchar** _src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + int ksize2 = (kernel.rows + kernel.cols - 1)/2; const float* ky = kernel.ptr() + ksize2; int i = 0, k; @@ -1906,9 +1790,53 @@ struct SymmColumnVec_32f if( symmetrical ) { -#if CV_TRY_AVX2 - if (haveAVX2) - return SymmColumnVec_32f_Symm_AVX(src, ky, dst, delta, width, ksize2); +#if CV_AVX + { + const float *S, *S2; + const __m256 d8 = _mm256_set1_ps(delta); + + for( ; i <= width - 16; i += 16 ) + { + __m256 f = _mm256_set1_ps(ky[0]); + __m256 s0, s1; + __m256 x0; + S = src[0] + i; + s0 = _mm256_loadu_ps(S); +#if CV_FMA3 + s0 = _mm256_fmadd_ps(s0, f, d8); +#else + s0 = _mm256_add_ps(_mm256_mul_ps(s0, f), d8); +#endif + s1 = _mm256_loadu_ps(S+8); +#if CV_FMA3 + s1 = _mm256_fmadd_ps(s1, f, d8); +#else + s1 = _mm256_add_ps(_mm256_mul_ps(s1, f), d8); +#endif + + for( k = 1; k <= ksize2; k++ ) + { + S = src[k] + i; + S2 = src[-k] + i; + f = _mm256_set1_ps(ky[k]); + x0 = _mm256_add_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2)); +#if CV_FMA3 + s0 = _mm256_fmadd_ps(x0, f, s0); +#else + s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); +#endif + x0 = _mm256_add_ps(_mm256_loadu_ps(S+8), _mm256_loadu_ps(S2+8)); +#if CV_FMA3 + s1 = _mm256_fmadd_ps(x0, f, s1); +#else + s1 = _mm256_add_ps(s1, _mm256_mul_ps(x0, f)); +#endif + } + + _mm256_storeu_ps(dst + i, s0); + _mm256_storeu_ps(dst + i + 8, s1); + } + } #endif const v_float32 d4 = vx_setall_f32(delta); const v_float32 k0 = vx_setall_f32(ky[0]); @@ -1956,11 +1884,41 @@ struct SymmColumnVec_32f } else { -#if CV_TRY_AVX2 - if (haveAVX2) - return SymmColumnVec_32f_Unsymm_AVX(src, ky, dst, delta, width, ksize2); -#endif CV_DbgAssert(ksize2 > 0); +#if CV_AVX + { + const float *S2; + const __m256 d8 = _mm256_set1_ps(delta); + + for (; i <= width - 16; i += 16) + { + __m256 f, s0 = d8, s1 = d8; + __m256 x0; + + for (k = 1; k <= ksize2; k++) + { + const float *S = src[k] + i; + S2 = src[-k] + i; + f = _mm256_set1_ps(ky[k]); + x0 = _mm256_sub_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2)); +#if CV_FMA3 + s0 = _mm256_fmadd_ps(x0, f, s0); +#else + s0 = _mm256_add_ps(s0, _mm256_mul_ps(x0, f)); +#endif + x0 = _mm256_sub_ps(_mm256_loadu_ps(S + 8), _mm256_loadu_ps(S2 + 8)); +#if CV_FMA3 + s1 = _mm256_fmadd_ps(x0, f, s1); +#else + s1 = _mm256_add_ps(s1, _mm256_mul_ps(x0, f)); +#endif + } + + _mm256_storeu_ps(dst + i, s0); + _mm256_storeu_ps(dst + i + 8, s1); + } + } +#endif const v_float32 d4 = vx_setall_f32(delta); const v_float32 k1 = vx_setall_f32(ky[1]); for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes ) @@ -2005,15 +1963,12 @@ struct SymmColumnVec_32f i += v_float32::nlanes; } } - - vx_cleanup(); return i; } int symmetryType; float delta; Mat kernel; - bool haveAVX2; }; @@ -2030,6 +1985,8 @@ struct SymmColumnSmallVec_32f int operator()(const uchar** _src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + int ksize2 = (kernel.rows + kernel.cols - 1)/2; const float* ky = kernel.ptr() + ksize2; int i = 0; @@ -2085,8 +2042,6 @@ struct SymmColumnSmallVec_32f v_store(dst + i, v_muladd(vx_load(S2 + i) - vx_load(S0 + i), k1, d4)); } } - - vx_cleanup(); return i; } @@ -2115,6 +2070,8 @@ struct FilterVec_8u int operator()(const uchar** src, uchar* dst, int width) const { + CV_INSTRUMENT_REGION(); + CV_DbgAssert(_nz > 0); const float* kf = (const float*)&coeffs[0]; int i = 0, k, nz = _nz; @@ -2175,8 +2132,6 @@ struct FilterVec_8u *(int*)(dst + i) = v_reinterpret_as_s32(v_pack_u(s16, s16)).get0(); i += v_int32x4::nlanes; } - - vx_cleanup(); return i; } @@ -2201,6 +2156,8 @@ struct FilterVec_8u16s int operator()(const uchar** src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + CV_DbgAssert(_nz > 0); const float* kf = (const float*)&coeffs[0]; short* dst = (short*)_dst; @@ -2251,8 +2208,6 @@ struct FilterVec_8u16s v_pack_store(dst + i, v_round(s0)); i += v_int32::nlanes; } - - vx_cleanup(); return i; } @@ -2275,6 +2230,8 @@ struct FilterVec_32f int operator()(const uchar** _src, uchar* _dst, int width) const { + CV_INSTRUMENT_REGION(); + const float* kf = (const float*)&coeffs[0]; const float** src = (const float**)_src; float* dst = (float*)_dst; @@ -2323,8 +2280,6 @@ struct FilterVec_32f v_store(dst + i, s0); i += v_float32::nlanes; } - - vx_cleanup(); return i; } @@ -2369,6 +2324,8 @@ template struct RowFilter : public BaseRo void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int _ksize = ksize; const DT* kx = kernel.ptr
(); const ST* S; @@ -2427,6 +2384,8 @@ template struct SymmRowSmallFilter : void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int ksize2 = this->ksize/2, ksize2n = ksize2*cn; const DT* kx = this->kernel.template ptr
() + ksize2; bool symmetrical = (this->symmetryType & KERNEL_SYMMETRICAL) != 0; @@ -2566,6 +2525,8 @@ template struct ColumnFilter : public BaseColumnFilte void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + const ST* ky = kernel.template ptr(); ST _delta = delta; int _ksize = ksize; @@ -2629,6 +2590,8 @@ template struct SymmColumnFilter : public ColumnFilte void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int ksize2 = this->ksize/2; const ST* ky = this->kernel.template ptr() + ksize2; int i, k; @@ -2735,6 +2698,8 @@ struct SymmColumnSmallFilter : public SymmColumnFilter void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE { + CV_INSTRUMENT_REGION(); + int ksize2 = this->ksize/2; const ST* ky = this->kernel.template ptr() + ksize2; int i; @@ -2904,13 +2869,14 @@ template struct FixedPtCastEx int SHIFT, DELTA; }; -} -cv::Ptr cv::getLinearRowFilter( int srcType, int bufType, - InputArray _kernel, int anchor, - int symmetryType ) +Ptr getLinearRowFilter( + int srcType, int bufType, + const Mat& kernel, int anchor, + int symmetryType) { - Mat kernel = _kernel.getMat(); + CV_INSTRUMENT_REGION(); + int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(bufType); int cn = CV_MAT_CN(srcType); CV_Assert( cn == CV_MAT_CN(bufType) && @@ -2958,12 +2924,14 @@ cv::Ptr cv::getLinearRowFilter( int srcType, int bufType, } -cv::Ptr cv::getLinearColumnFilter( int bufType, int dstType, - InputArray _kernel, int anchor, - int symmetryType, double delta, - int bits ) +Ptr getLinearColumnFilter( + int bufType, int dstType, + const Mat& kernel, int anchor, + int symmetryType, double delta, + int bits) { - Mat kernel = _kernel.getMat(); + CV_INSTRUMENT_REGION(); + int sdepth = CV_MAT_DEPTH(bufType), ddepth = CV_MAT_DEPTH(dstType); int cn = CV_MAT_CN(dstType); CV_Assert( cn == CV_MAT_CN(bufType) && @@ -3053,131 +3021,6 @@ cv::Ptr cv::getLinearColumnFilter( int bufType, int dstTyp } -cv::Ptr cv::createSeparableLinearFilter( - int _srcType, int _dstType, - InputArray __rowKernel, InputArray __columnKernel, - Point _anchor, double _delta, - int _rowBorderType, int _columnBorderType, - const Scalar& _borderValue ) -{ - Mat _rowKernel = __rowKernel.getMat(), _columnKernel = __columnKernel.getMat(); - _srcType = CV_MAT_TYPE(_srcType); - _dstType = CV_MAT_TYPE(_dstType); - int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType); - int cn = CV_MAT_CN(_srcType); - CV_Assert( cn == CV_MAT_CN(_dstType) ); - int rsize = _rowKernel.rows + _rowKernel.cols - 1; - int csize = _columnKernel.rows + _columnKernel.cols - 1; - if( _anchor.x < 0 ) - _anchor.x = rsize/2; - if( _anchor.y < 0 ) - _anchor.y = csize/2; - int rtype = getKernelType(_rowKernel, - _rowKernel.rows == 1 ? Point(_anchor.x, 0) : Point(0, _anchor.x)); - int ctype = getKernelType(_columnKernel, - _columnKernel.rows == 1 ? Point(_anchor.y, 0) : Point(0, _anchor.y)); - Mat rowKernel, columnKernel; - - int bdepth = std::max(CV_32F,std::max(sdepth, ddepth)); - int bits = 0; - - if( sdepth == CV_8U && - ((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && - ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && - ddepth == CV_8U) || - ((rtype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) && - (ctype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) && - (rtype & ctype & KERNEL_INTEGER) && - ddepth == CV_16S)) ) - { - bdepth = CV_32S; - bits = ddepth == CV_8U ? 8 : 0; - _rowKernel.convertTo( rowKernel, CV_32S, 1 << bits ); - _columnKernel.convertTo( columnKernel, CV_32S, 1 << bits ); - bits *= 2; - _delta *= (1 << bits); - } - else - { - if( _rowKernel.type() != bdepth ) - _rowKernel.convertTo( rowKernel, bdepth ); - else - rowKernel = _rowKernel; - if( _columnKernel.type() != bdepth ) - _columnKernel.convertTo( columnKernel, bdepth ); - else - columnKernel = _columnKernel; - } - - int _bufType = CV_MAKETYPE(bdepth, cn); - Ptr _rowFilter = getLinearRowFilter( - _srcType, _bufType, rowKernel, _anchor.x, rtype); - Ptr _columnFilter = getLinearColumnFilter( - _bufType, _dstType, columnKernel, _anchor.y, ctype, _delta, bits ); - - return Ptr( new FilterEngine(Ptr(), _rowFilter, _columnFilter, - _srcType, _dstType, _bufType, _rowBorderType, _columnBorderType, _borderValue )); -} - - -/****************************************************************************************\ -* Non-separable linear filter * -\****************************************************************************************/ - -namespace cv -{ - -void preprocess2DKernel( const Mat& kernel, std::vector& coords, std::vector& coeffs ) -{ - int i, j, k, nz = countNonZero(kernel), ktype = kernel.type(); - if(nz == 0) - nz = 1; - CV_Assert( ktype == CV_8U || ktype == CV_32S || ktype == CV_32F || ktype == CV_64F ); - coords.resize(nz); - coeffs.resize(nz*getElemSize(ktype)); - uchar* _coeffs = &coeffs[0]; - - for( i = k = 0; i < kernel.rows; i++ ) - { - const uchar* krow = kernel.ptr(i); - for( j = 0; j < kernel.cols; j++ ) - { - if( ktype == CV_8U ) - { - uchar val = krow[j]; - if( val == 0 ) - continue; - coords[k] = Point(j,i); - _coeffs[k++] = val; - } - else if( ktype == CV_32S ) - { - int val = ((const int*)krow)[j]; - if( val == 0 ) - continue; - coords[k] = Point(j,i); - ((int*)_coeffs)[k++] = val; - } - else if( ktype == CV_32F ) - { - float val = ((const float*)krow)[j]; - if( val == 0 ) - continue; - coords[k] = Point(j,i); - ((float*)_coeffs)[k++] = val; - } - else - { - double val = ((const double*)krow)[j]; - if( val == 0 ) - continue; - coords[k] = Point(j,i); - ((double*)_coeffs)[k++] = val; - } - } - } -} - template struct Filter2D : public BaseFilter { @@ -3253,489 +3096,14 @@ template struct Filter2D : public BaseFi VecOp vecOp; }; -#ifdef HAVE_OPENCL -#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain)) -#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) - -// prepare kernel: transpose and make double rows (+align). Returns size of aligned row -// Samples: -// a b c -// Input: d e f -// g h i -// Output, last two zeros is the alignment: -// a d g a d g 0 0 -// b e h b e h 0 0 -// c f i c f i 0 0 -template -static int _prepareKernelFilter2D(std::vector & data, const Mat & kernel) +Ptr getLinearFilter( + int srcType, int dstType, + const Mat& _kernel, Point anchor, + double delta, int bits) { - Mat _kernel; kernel.convertTo(_kernel, DataDepth::value); - int size_y_aligned = ROUNDUP(kernel.rows * 2, 4); - data.clear(); data.resize(size_y_aligned * kernel.cols, 0); - for (int x = 0; x < kernel.cols; x++) - { - for (int y = 0; y < kernel.rows; y++) - { - data[x * size_y_aligned + y] = _kernel.at(y, x); - data[x * size_y_aligned + y + kernel.rows] = _kernel.at(y, x); - } - } - return size_y_aligned; -} + CV_INSTRUMENT_REGION(); -static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, - InputArray _kernel, Point anchor, - double delta, int borderType ) -{ - int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - ddepth = ddepth < 0 ? sdepth : ddepth; - int dtype = CV_MAKE_TYPE(ddepth, cn), wdepth = std::max(std::max(sdepth, ddepth), CV_32F), - wtype = CV_MAKE_TYPE(wdepth, cn); - if (cn > 4) - return false; - - Size ksize = _kernel.size(); - if (anchor.x < 0) - anchor.x = ksize.width / 2; - if (anchor.y < 0) - anchor.y = ksize.height / 2; - - bool isolated = (borderType & BORDER_ISOLATED) != 0; - borderType &= ~BORDER_ISOLATED; - const cv::ocl::Device &device = cv::ocl::Device::getDefault(); - bool doubleSupport = device.doubleFPConfig() > 0; - if (wdepth == CV_64F && !doubleSupport) - return false; - - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", - "BORDER_WRAP", "BORDER_REFLECT_101" }; - - cv::Mat kernelMat = _kernel.getMat(); - cv::Size sz = _src.size(), wholeSize; - size_t globalsize[2] = { (size_t)sz.width, (size_t)sz.height }; - size_t localsize_general[2] = {0, 1}; - size_t* localsize = NULL; - - ocl::Kernel k; - UMat src = _src.getUMat(); - if (!isolated) - { - Point ofs; - src.locateROI(wholeSize, ofs); - } - - size_t tryWorkItems = device.maxWorkGroupSize(); - if (device.isIntel() && 128 < tryWorkItems) - tryWorkItems = 128; - char cvt[2][40]; - - // For smaller filter kernels, there is a special kernel that is more - // efficient than the general one. - UMat kernalDataUMat; - if (device.isIntel() && (device.type() & ocl::Device::TYPE_GPU) && - ((ksize.width < 5 && ksize.height < 5) || - (ksize.width == 5 && ksize.height == 5 && cn == 1))) - { - kernelMat = kernelMat.reshape(0, 1); - String kerStr = ocl::kernelToStr(kernelMat, CV_32F); - int h = isolated ? sz.height : wholeSize.height; - int w = isolated ? sz.width : wholeSize.width; - - if (w < ksize.width || h < ksize.height) - return false; - - // Figure out what vector size to use for loading the pixels. - int pxLoadNumPixels = cn != 1 || sz.width % 4 ? 1 : 4; - int pxLoadVecSize = cn * pxLoadNumPixels; - - // Figure out how many pixels per work item to compute in X and Y - // directions. Too many and we run out of registers. - int pxPerWorkItemX = 1; - int pxPerWorkItemY = 1; - if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4) - { - pxPerWorkItemX = sz.width % 8 ? sz.width % 4 ? sz.width % 2 ? 1 : 2 : 4 : 8; - pxPerWorkItemY = sz.height % 2 ? 1 : 2; - } - else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4)) - { - pxPerWorkItemX = sz.width % 2 ? 1 : 2; - pxPerWorkItemY = sz.height % 2 ? 1 : 2; - } - globalsize[0] = sz.width / pxPerWorkItemX; - globalsize[1] = sz.height / pxPerWorkItemY; - - // Need some padding in the private array for pixels - int privDataWidth = ROUNDUP(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels); - - // Make the global size a nice round number so the runtime can pick - // from reasonable choices for the workgroup size - const int wgRound = 256; - globalsize[0] = ROUNDUP(globalsize[0], wgRound); - - char build_options[1024]; - sprintf(build_options, "-D cn=%d " - "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " - "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d " - "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s " - "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d " - "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " - "-D convertToWT=%s -D convertToDstT=%s %s", - cn, anchor.x, anchor.y, ksize.width, ksize.height, - pxLoadVecSize, pxLoadNumPixels, - pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType], - isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1, - ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), - ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), - ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), kerStr.c_str()); - - if (!k.create("filter2DSmall", cv::ocl::imgproc::filter2DSmall_oclsrc, build_options)) - return false; - } - else - { - localsize = localsize_general; - std::vector kernelMatDataFloat; - int kernel_size_y2_aligned = _prepareKernelFilter2D(kernelMatDataFloat, kernelMat); - String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F); - - for ( ; ; ) - { - size_t BLOCK_SIZE = tryWorkItems; - while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)sz.width * 2) - BLOCK_SIZE /= 2; - - if ((size_t)ksize.width > BLOCK_SIZE) - return false; - - int requiredTop = anchor.y; - int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x; - int requiredBottom = ksize.height - 1 - anchor.y; - int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x; - int h = isolated ? sz.height : wholeSize.height; - int w = isolated ? sz.width : wholeSize.width; - bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight; - - if ((w < ksize.width) || (h < ksize.height)) - return false; - - String opts = format("-D LOCAL_SIZE=%d -D cn=%d " - "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " - "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s " - "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " - "-D convertToWT=%s -D convertToDstT=%s", - (int)BLOCK_SIZE, cn, anchor.x, anchor.y, - ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType], - extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", - isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(), - ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), - ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), - ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1])); - - localsize[0] = BLOCK_SIZE; - globalsize[0] = DIVUP(sz.width, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE; - globalsize[1] = sz.height; - - if (!k.create("filter2D", cv::ocl::imgproc::filter2D_oclsrc, opts)) - return false; - - size_t kernelWorkGroupSize = k.workGroupSize(); - if (localsize[0] <= kernelWorkGroupSize) - break; - if (BLOCK_SIZE < kernelWorkGroupSize) - return false; - tryWorkItems = kernelWorkGroupSize; - } - } - - _dst.create(sz, dtype); - UMat dst = _dst.getUMat(); - - int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); - int srcOffsetY = (int)(src.offset / src.step); - int srcEndX = (isolated ? (srcOffsetX + sz.width) : wholeSize.width); - int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height); - - k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY, - srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta); - - return k.run(2, globalsize, localsize, false); -} - -const int shift_bits = 8; - -static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor, - int borderType, int ddepth, bool fast8uc1, bool int_arithm) -{ - int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - Size bufSize = buf.size(); - int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); - - if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) - return false; - -#ifdef __ANDROID__ - size_t localsize[2] = {16, 10}; -#else - size_t localsize[2] = {16, 16}; -#endif - - size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]}; - if (fast8uc1) - globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0]; - - int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1; - - bool isolated = (borderType & BORDER_ISOLATED) != 0; - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }, - * const btype = borderMap[borderType & ~BORDER_ISOLATED]; - - bool extra_extrapolation = src.rows < (int)((-radiusY + globalsize[1]) >> 1) + 1; - extra_extrapolation |= src.rows < radiusY; - extra_extrapolation |= src.cols < (int)((-radiusX + globalsize[0] + 8 * localsize[0] + 3) >> 1) + 1; - extra_extrapolation |= src.cols < radiusX; - - char cvt[40]; - cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s" - " -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s%s%s", - radiusX, (int)localsize[0], (int)localsize[1], cn, btype, - extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", - isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - ocl::typeToStr(type), ocl::typeToStr(buf_type), - ocl::convertTypeStr(sdepth, bdepth, cn, cvt), - ocl::typeToStr(sdepth), ocl::typeToStr(bdepth), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", - int_arithm ? " -D INTEGER_ARITHMETIC" : ""); - build_options += ocl::kernelToStr(kernelX, bdepth); - - Size srcWholeSize; Point srcOffset; - src.locateROI(srcWholeSize, srcOffset); - - String kernelName("row_filter"); - if (fast8uc1) - kernelName += "_C1_D0"; - - ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, - build_options); - if (k.empty()) - return false; - - if (fast8uc1) - k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x, - srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height, - ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()), - buf.cols, buf.rows, radiusY); - else - k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffset.x, - srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height, - ocl::KernelArg::PtrWriteOnly(buf), (int)buf.step, buf.cols, buf.rows, radiusY); - - return k.run(2, globalsize, localsize, false); -} - -static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor, bool int_arithm) -{ - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if (dst.depth() == CV_64F && !doubleSupport) - return false; - -#ifdef __ANDROID__ - size_t localsize[2] = { 16, 10 }; -#else - size_t localsize[2] = { 16, 16 }; -#endif - size_t globalsize[2] = { 0, 0 }; - - int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype); - Size sz = dst.size(); - int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); - - globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1]; - globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0]; - - char cvt[40]; - cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d" - " -D srcT=%s -D dstT=%s -D convertToDstT=%s" - " -D srcT1=%s -D dstT1=%s -D SHIFT_BITS=%d%s%s", - anchor, (int)localsize[0], (int)localsize[1], cn, - ocl::typeToStr(buf_type), ocl::typeToStr(dtype), - ocl::convertTypeStr(bdepth, ddepth, cn, cvt), - ocl::typeToStr(bdepth), ocl::typeToStr(ddepth), - 2*shift_bits, doubleSupport ? " -D DOUBLE_SUPPORT" : "", - int_arithm ? " -D INTEGER_ARITHMETIC" : ""); - build_options += ocl::kernelToStr(kernelY, bdepth); - - ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, - build_options); - if (k.empty()) - return false; - - k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst), - static_cast(delta)); - - return k.run(2, globalsize, localsize, false); -} - -const int optimizedSepFilterLocalWidth = 16; -const int optimizedSepFilterLocalHeight = 8; - -static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, - Mat row_kernel, Mat col_kernel, - double delta, int borderType, int ddepth, int bdepth, bool int_arithm) -{ - Size size = _src.size(), wholeSize; - Point origin; - int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), - esz = CV_ELEM_SIZE(stype), wdepth = std::max(std::max(sdepth, ddepth), bdepth), - dtype = CV_MAKE_TYPE(ddepth, cn); - size_t src_step = _src.step(), src_offset = _src.offset(); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if (esz == 0 || src_step == 0 - || (src_offset % src_step) % esz != 0 - || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) - || !(borderType == BORDER_CONSTANT - || borderType == BORDER_REPLICATE - || borderType == BORDER_REFLECT - || borderType == BORDER_WRAP - || borderType == BORDER_REFLECT_101)) - return false; - - size_t lt2[2] = { optimizedSepFilterLocalWidth, optimizedSepFilterLocalHeight }; - size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1]}; - - char cvt[2][40]; - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", - "BORDER_REFLECT_101" }; - - String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s" - " -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s" - " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s", - (int)lt2[0], (int)lt2[1], row_kernel.cols / 2, col_kernel.cols / 2, - ocl::kernelToStr(row_kernel, wdepth, "KERNEL_MATRIX_X").c_str(), - ocl::kernelToStr(col_kernel, wdepth, "KERNEL_MATRIX_Y").c_str(), - ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), - ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype), - ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType], - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth), - cn, 2*shift_bits, int_arithm ? " -D INTEGER_ARITHMETIC" : ""); - - ocl::Kernel k("sep_filter", ocl::imgproc::filterSep_singlePass_oclsrc, opts); - if (k.empty()) - return false; - - UMat src = _src.getUMat(); - _dst.create(size, dtype); - UMat dst = _dst.getUMat(); - - int src_offset_x = static_cast((src_offset % src_step) / esz); - int src_offset_y = static_cast(src_offset / src_step); - - src.locateROI(wholeSize, origin); - - k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y, - wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst), - static_cast(delta)); - - return k.run(2, gt2, lt2, false); -} - -bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, - InputArray _kernelX, InputArray _kernelY, Point anchor, - double delta, int borderType ) -{ - const ocl::Device & d = ocl::Device::getDefault(); - Size imgSize = _src.size(); - - int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - if (cn > 4) - return false; - - Mat kernelX = _kernelX.getMat().reshape(1, 1); - if (kernelX.cols % 2 != 1) - return false; - Mat kernelY = _kernelY.getMat().reshape(1, 1); - if (kernelY.cols % 2 != 1) - return false; - - if (ddepth < 0) - ddepth = sdepth; - - if (anchor.x < 0) - anchor.x = kernelX.cols >> 1; - if (anchor.y < 0) - anchor.y = kernelY.cols >> 1; - - int rtype = getKernelType(kernelX, - kernelX.rows == 1 ? Point(anchor.x, 0) : Point(0, anchor.x)); - int ctype = getKernelType(kernelY, - kernelY.rows == 1 ? Point(anchor.y, 0) : Point(0, anchor.y)); - - int bdepth = CV_32F; - bool int_arithm = false; - if( sdepth == CV_8U && ddepth == CV_8U && - rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && - ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL) - { - if (ocl::Device::getDefault().isIntel()) - { - for (int i=0; i(0, i) = (float) cvRound(kernelX.at(0, i) * (1 << shift_bits)); - if (kernelX.data != kernelY.data) - for (int i=0; i(0, i) = (float) cvRound(kernelY.at(0, i) * (1 << shift_bits)); - } else - { - bdepth = CV_32S; - kernelX.convertTo( kernelX, bdepth, 1 << shift_bits ); - kernelY.convertTo( kernelY, bdepth, 1 << shift_bits ); - } - int_arithm = true; - } - - CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && - imgSize.width > optimizedSepFilterLocalWidth + anchor.x && - imgSize.height > optimizedSepFilterLocalHeight + anchor.y && - (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && - anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) && - OCL_PERFORMANCE_CHECK(d.isIntel()), // TODO FIXIT - ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta, - borderType & ~BORDER_ISOLATED, ddepth, bdepth, int_arithm), true) - - UMat src = _src.getUMat(); - Size srcWholeSize; Point srcOffset; - src.locateROI(srcWholeSize, srcOffset); - - bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 && - src.cols % 4 == 0 && src.step % 4 == 0; - - Size srcSize = src.size(); - Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); - UMat buf(bufSize, CV_MAKETYPE(bdepth, cn)); - if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1, int_arithm)) - return false; - - _dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); - UMat dst = _dst.getUMat(); - - return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y, int_arithm); -} - -#endif - -} - -cv::Ptr cv::getLinearFilter(int srcType, int dstType, - InputArray filter_kernel, Point anchor, - double delta, int bits) -{ - Mat _kernel = filter_kernel.getMat(); int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType); int cn = CV_MAT_CN(srcType), kdepth = _kernel.depth(); CV_Assert( cn == CV_MAT_CN(dstType) && ddepth >= sdepth ); @@ -3806,476 +3174,6 @@ cv::Ptr cv::getLinearFilter(int srcType, int dstType, srcType, dstType)); } - -cv::Ptr cv::createLinearFilter( int _srcType, int _dstType, - InputArray filter_kernel, - Point _anchor, double _delta, - int _rowBorderType, int _columnBorderType, - const Scalar& _borderValue ) -{ - Mat _kernel = filter_kernel.getMat(); - _srcType = CV_MAT_TYPE(_srcType); - _dstType = CV_MAT_TYPE(_dstType); - int cn = CV_MAT_CN(_srcType); - CV_Assert( cn == CV_MAT_CN(_dstType) ); - - Mat kernel = _kernel; - int bits = 0; - - /*int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType); - int ktype = _kernel.depth() == CV_32S ? KERNEL_INTEGER : getKernelType(_kernel, _anchor); - if( sdepth == CV_8U && (ddepth == CV_8U || ddepth == CV_16S) && - _kernel.rows*_kernel.cols <= (1 << 10) ) - { - bits = (ktype & KERNEL_INTEGER) ? 0 : 11; - _kernel.convertTo(kernel, CV_32S, 1 << bits); - }*/ - - Ptr _filter2D = getLinearFilter(_srcType, _dstType, - kernel, _anchor, _delta, bits); - - return makePtr(_filter2D, Ptr(), - Ptr(), _srcType, _dstType, _srcType, - _rowBorderType, _columnBorderType, _borderValue ); -} - - -//================================================================ -// HAL interface -//================================================================ - -using namespace cv; - -static bool replacementFilter2D(int stype, int dtype, int kernel_type, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType, bool isSubmatrix) -{ - cvhalFilter2D* ctx; - int res = cv_hal_filterInit(&ctx, kernel_data, kernel_step, kernel_type, kernel_width, kernel_height, width, height, - stype, dtype, borderType, delta, anchor_x, anchor_y, isSubmatrix, src_data == dst_data); - if (res != CV_HAL_ERROR_OK) - return false; - res = cv_hal_filter(ctx, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y); - bool success = (res == CV_HAL_ERROR_OK); - res = cv_hal_filterFree(ctx); - if (res != CV_HAL_ERROR_OK) - return false; - return success; -} - -#if 0 //defined HAVE_IPP -static bool ippFilter2D(int stype, int dtype, int kernel_type, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType, - bool isSubmatrix) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - - ::ipp::IwiSize iwSize(width, height); - ::ipp::IwiSize kernelSize(kernel_width, kernel_height); - IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype)); - int channels = CV_MAT_CN(stype); - - CV_UNUSED(isSubmatrix); - -#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel - if(kernel_width == 1 && kernel_height == 1) - return false; #endif - -#if IPP_DISABLE_FILTER2D_BIG_MASK - // Too big difference compared to OpenCV FFT-based convolution - if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7)) - return false; - - // Poor optimization for big kernels - if(kernel_width > 7 || kernel_height > 7) - return false; -#endif - - if(src_data == dst_data) - return false; - - if(stype != dtype) - return false; - - if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1) - return false; - - // TODO: Implement offset for 8u, 16u - if(std::fabs(delta) >= DBL_EPSILON) - return false; - - if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height)) - return false; - - try - { - ::ipp::IwiBorderSize iwBorderSize; - ::ipp::IwiBorderType iwBorderType; - ::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step); - ::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step); - ::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step); - - iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize); - iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize); - if(!iwBorderType) - return false; - - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType); - } - catch(const ::ipp::IwException& ex) - { - CV_UNUSED(ex); - return false; - } - - return true; -#else - CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step); - CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width); - CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step); - CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta); - CV_UNUSED(borderType); CV_UNUSED(isSubmatrix); - return false; -#endif -} -#endif - -static bool dftFilter2D(int stype, int dtype, int kernel_type, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType) -{ - { - int sdepth = CV_MAT_DEPTH(stype); - int ddepth = CV_MAT_DEPTH(dtype); - int dft_filter_size = checkHardwareSupport(CV_CPU_SSE3) && ((sdepth == CV_8U && (ddepth == CV_8U || ddepth == CV_16S)) || (sdepth == CV_32F && ddepth == CV_32F)) ? 130 : 50; - if (kernel_width * kernel_height < dft_filter_size) - return false; - } - - Point anchor = Point(anchor_x, anchor_y); - Mat kernel = Mat(Size(kernel_width, kernel_height), kernel_type, kernel_data, kernel_step); - - Mat src(Size(full_width-offset_x, full_height-offset_y), stype, src_data, src_step); - Mat dst(Size(full_width, full_height), dtype, dst_data, dst_step); - Mat temp; - int src_channels = CV_MAT_CN(stype); - int dst_channels = CV_MAT_CN(dtype); - int ddepth = CV_MAT_DEPTH(dtype); - // crossCorr doesn't accept non-zero delta with multiple channels - if (src_channels != 1 && delta != 0) { - // The semantics of filter2D require that the delta be applied - // as floating-point math. So wee need an intermediate Mat - // with a float datatype. If the dest is already floats, - // we just use that. - int corrDepth = ddepth; - if ((ddepth == CV_32F || ddepth == CV_64F) && src_data != dst_data) { - temp = Mat(Size(full_width, full_height), dtype, dst_data, dst_step); - } else { - corrDepth = ddepth == CV_64F ? CV_64F : CV_32F; - temp.create(Size(full_width, full_height), CV_MAKETYPE(corrDepth, dst_channels)); - } - crossCorr(src, kernel, temp, src.size(), - CV_MAKETYPE(corrDepth, src_channels), - anchor, 0, borderType); - add(temp, delta, temp); - if (temp.data != dst_data) { - temp.convertTo(dst, dst.type()); - } - } else { - if (src_data != dst_data) - temp = Mat(Size(full_width, full_height), dtype, dst_data, dst_step); - else - temp.create(Size(full_width, full_height), dtype); - crossCorr(src, kernel, temp, src.size(), - CV_MAKETYPE(ddepth, src_channels), - anchor, delta, borderType); - if (temp.data != dst_data) - temp.copyTo(dst); - } - return true; -} - -static void ocvFilter2D(int stype, int dtype, int kernel_type, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType) -{ - int borderTypeValue = borderType & ~BORDER_ISOLATED; - Mat kernel = Mat(Size(kernel_width, kernel_height), kernel_type, kernel_data, kernel_step); - Ptr f = createLinearFilter(stype, dtype, kernel, Point(anchor_x, anchor_y), delta, - borderTypeValue); - Mat src(Size(width, height), stype, src_data, src_step); - Mat dst(Size(width, height), dtype, dst_data, dst_step); - f->apply(src, dst, Size(full_width, full_height), Point(offset_x, offset_y)); -} - -static bool replacementSepFilter(int stype, int dtype, int ktype, - uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, - int width, int height, int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernelx_data, int kernelx_len, - uchar * kernely_data, int kernely_len, - int anchor_x, int anchor_y, double delta, int borderType) -{ - cvhalFilter2D *ctx; - int res = cv_hal_sepFilterInit(&ctx, stype, dtype, ktype, - kernelx_data, kernelx_len, - kernely_data, kernely_len, - anchor_x, anchor_y, delta, borderType); - if (res != CV_HAL_ERROR_OK) - return false; - res = cv_hal_sepFilter(ctx, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y); - bool success = (res == CV_HAL_ERROR_OK); - res = cv_hal_sepFilterFree(ctx); - if (res != CV_HAL_ERROR_OK) - return false; - return success; -} - -static void ocvSepFilter(int stype, int dtype, int ktype, - uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, - int width, int height, int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernelx_data, int kernelx_len, - uchar * kernely_data, int kernely_len, - int anchor_x, int anchor_y, double delta, int borderType) -{ - Mat kernelX(Size(kernelx_len, 1), ktype, kernelx_data); - Mat kernelY(Size(kernely_len, 1), ktype, kernely_data); - Ptr f = createSeparableLinearFilter(stype, dtype, kernelX, kernelY, - Point(anchor_x, anchor_y), - delta, borderType & ~BORDER_ISOLATED); - Mat src(Size(width, height), stype, src_data, src_step); - Mat dst(Size(width, height), dtype, dst_data, dst_step); - f->apply(src, dst, Size(full_width, full_height), Point(offset_x, offset_y)); -}; - -//=================================================================== -// HAL functions -//=================================================================== - -namespace cv { -namespace hal { - - -CV_DEPRECATED Ptr Filter2D::create(uchar * , size_t , int , - int , int , - int , int , - int , int , - int , double , - int , int , - bool , bool ) { return Ptr(); } - -CV_DEPRECATED Ptr SepFilter2D::create(int , int , int , - uchar * , int , - uchar * , int , - int , int , - double , int ) { return Ptr(); } - - -void filter2D(int stype, int dtype, int kernel_type, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType, - bool isSubmatrix) -{ - bool res; - res = replacementFilter2D(stype, dtype, kernel_type, - src_data, src_step, - dst_data, dst_step, - width, height, - full_width, full_height, - offset_x, offset_y, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType, isSubmatrix); - if (res) - return; - - /*CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type, - src_data, src_step, - dst_data, dst_step, - width, height, - full_width, full_height, - offset_x, offset_y, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType, isSubmatrix))*/ - - res = dftFilter2D(stype, dtype, kernel_type, - src_data, src_step, - dst_data, dst_step, - full_width, full_height, - offset_x, offset_y, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType); - if (res) - return; - ocvFilter2D(stype, dtype, kernel_type, - src_data, src_step, - dst_data, dst_step, - width, height, - full_width, full_height, - offset_x, offset_y, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType); -} - -//--------------------------------------------------------------- - -void sepFilter2D(int stype, int dtype, int ktype, - uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, - int width, int height, int full_width, int full_height, - int offset_x, int offset_y, - uchar * kernelx_data, int kernelx_len, - uchar * kernely_data, int kernely_len, - int anchor_x, int anchor_y, double delta, int borderType) -{ - - bool res = replacementSepFilter(stype, dtype, ktype, - src_data, src_step, dst_data, dst_step, - width, height, full_width, full_height, - offset_x, offset_y, - kernelx_data, kernelx_len, - kernely_data, kernely_len, - anchor_x, anchor_y, delta, borderType); - if (res) - return; - ocvSepFilter(stype, dtype, ktype, - src_data, src_step, dst_data, dst_step, - width, height, full_width, full_height, - offset_x, offset_y, - kernelx_data, kernelx_len, - kernely_data, kernely_len, - anchor_x, anchor_y, delta, borderType); -} - -} // cv::hal:: -} // cv:: - -//================================================================ -// Main interface -//================================================================ - -void cv::filter2D( InputArray _src, OutputArray _dst, int ddepth, - InputArray _kernel, Point anchor0, - double delta, int borderType ) -{ - CV_INSTRUMENT_REGION(); - - CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, - ocl_filter2D(_src, _dst, ddepth, _kernel, anchor0, delta, borderType)) - - Mat src = _src.getMat(), kernel = _kernel.getMat(); - - if( ddepth < 0 ) - ddepth = src.depth(); - - _dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); - Mat dst = _dst.getMat(); - Point anchor = normalizeAnchor(anchor0, kernel.size()); - - Point ofs; - Size wsz(src.cols, src.rows); - if( (borderType & BORDER_ISOLATED) == 0 ) - src.locateROI( wsz, ofs ); - - hal::filter2D(src.type(), dst.type(), kernel.type(), - src.data, src.step, dst.data, dst.step, - dst.cols, dst.rows, wsz.width, wsz.height, ofs.x, ofs.y, - kernel.data, kernel.step, kernel.cols, kernel.rows, - anchor.x, anchor.y, - delta, borderType, src.isSubmatrix()); -} - -void cv::sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, - InputArray _kernelX, InputArray _kernelY, Point anchor, - double delta, int borderType ) -{ - CV_INSTRUMENT_REGION(); - - CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (size_t)_src.rows() > _kernelY.total() && (size_t)_src.cols() > _kernelX.total(), - ocl_sepFilter2D(_src, _dst, ddepth, _kernelX, _kernelY, anchor, delta, borderType)) - - Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat(); - - if( ddepth < 0 ) - ddepth = src.depth(); - - _dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); - Mat dst = _dst.getMat(); - - Point ofs; - Size wsz(src.cols, src.rows); - if( (borderType & BORDER_ISOLATED) == 0 ) - src.locateROI( wsz, ofs ); - - CV_Assert( kernelX.type() == kernelY.type() && - (kernelX.cols == 1 || kernelX.rows == 1) && - (kernelY.cols == 1 || kernelY.rows == 1) ); - - Mat contKernelX = kernelX.isContinuous() ? kernelX : kernelX.clone(); - Mat contKernelY = kernelY.isContinuous() ? kernelY : kernelY.clone(); - - hal::sepFilter2D(src.type(), dst.type(), kernelX.type(), - src.data, src.step, dst.data, dst.step, - dst.cols, dst.rows, wsz.width, wsz.height, ofs.x, ofs.y, - contKernelX.data, kernelX.cols + kernelX.rows - 1, - contKernelY.data, kernelY.cols + kernelY.rows - 1, - anchor.x, anchor.y, delta, borderType & ~BORDER_ISOLATED); -} - - -CV_IMPL void -cvFilter2D( const CvArr* srcarr, CvArr* dstarr, const CvMat* _kernel, CvPoint anchor ) -{ - cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); - cv::Mat kernel = cv::cvarrToMat(_kernel); - - CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() ); - - cv::filter2D( src, dst, dst.depth(), kernel, anchor, 0, cv::BORDER_REPLICATE ); -} - -/* End of file. */ +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/imgproc/src/fixedpoint.inl.hpp b/modules/imgproc/src/fixedpoint.inl.hpp index a1a75a29e1..40b1c3faa1 100644 --- a/modules/imgproc/src/fixedpoint.inl.hpp +++ b/modules/imgproc/src/fixedpoint.inl.hpp @@ -9,10 +9,7 @@ #ifndef _CV_FIXEDPOINT_HPP_ #define _CV_FIXEDPOINT_HPP_ -#include "opencv2/core/softfloat.hpp" - -namespace -{ +namespace { class fixedpoint64 { diff --git a/modules/imgproc/src/median_blur.dispatch.cpp b/modules/imgproc/src/median_blur.dispatch.cpp new file mode 100644 index 0000000000..d993fbad5b --- /dev/null +++ b/modules/imgproc/src/median_blur.dispatch.cpp @@ -0,0 +1,317 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, 2018, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014-2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#include + +#include "opencv2/core/hal/intrin.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "opencv2/core/openvx/ovx_defs.hpp" + +#include "median_blur.simd.hpp" +#include "median_blur.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +#ifdef HAVE_OPENCL + +#define DIVUP(total, grain) ((total + grain - 1) / (grain)) + +static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) +{ + size_t localsize[2] = { 16, 16 }; + size_t globalsize[2]; + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) ) + return false; + + Size imgSize = _src.size(); + bool useOptimized = (1 == cn) && + (size_t)imgSize.width >= localsize[0] * 8 && + (size_t)imgSize.height >= localsize[1] * 8 && + imgSize.width % 4 == 0 && + imgSize.height % 4 == 0 && + (ocl::Device::getDefault().isIntel()); + + cv::String kname = format( useOptimized ? "medianFilter%d_u" : "medianFilter%d", m) ; + cv::String kdefs = useOptimized ? + format("-D T=%s -D T1=%s -D T4=%s%d -D cn=%d -D USE_4OPT", ocl::typeToStr(type), + ocl::typeToStr(depth), ocl::typeToStr(depth), cn*4, cn) + : + format("-D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn) ; + + ocl::Kernel k(kname.c_str(), ocl::imgproc::medianFilter_oclsrc, kdefs.c_str() ); + + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(src.size(), type); + UMat dst = _dst.getUMat(); + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); + + if( useOptimized ) + { + globalsize[0] = DIVUP(src.cols / 4, localsize[0]) * localsize[0]; + globalsize[1] = DIVUP(src.rows / 4, localsize[1]) * localsize[1]; + } + else + { + globalsize[0] = (src.cols + localsize[0] + 2) / localsize[0] * localsize[0]; + globalsize[1] = (src.rows + localsize[1] - 1) / localsize[1] * localsize[1]; + } + + return k.run(2, globalsize, localsize, false); +} + +#undef DIVUP + +#endif + +#ifdef HAVE_OPENVX +namespace ovx { + template <> inline bool skipSmallImages(int w, int h) { return w*h < 1280 * 720; } +} +static bool openvx_medianFilter(InputArray _src, OutputArray _dst, int ksize) +{ + if (_src.type() != CV_8UC1 || _dst.type() != CV_8U +#ifndef VX_VERSION_1_1 + || ksize != 3 +#endif + ) + return false; + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + if ( +#ifdef VX_VERSION_1_1 + ksize != 3 ? ovx::skipSmallImages(src.cols, src.rows) : +#endif + ovx::skipSmallImages(src.cols, src.rows) + ) + return false; + + try + { + ivx::Context ctx = ovx::getOpenVXContext(); +#ifdef VX_VERSION_1_1 + if ((vx_size)ksize > ctx.nonlinearMaxDimension()) + return false; +#endif + + Mat a; + if (dst.data != src.data) + a = src; + else + src.copyTo(a); + + ivx::Image + ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), + ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); + + //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments + //since OpenVX standard says nothing about thread-safety for now + ivx::border_t prevBorder = ctx.immediateBorder(); + ctx.setImmediateBorder(VX_BORDER_REPLICATE); +#ifdef VX_VERSION_1_1 + if (ksize == 3) +#endif + { + ivx::IVX_CHECK_STATUS(vxuMedian3x3(ctx, ia, ib)); + } +#ifdef VX_VERSION_1_1 + else + { + ivx::Matrix mtx; + if(ksize == 5) + mtx = ivx::Matrix::createFromPattern(ctx, VX_PATTERN_BOX, ksize, ksize); + else + { + vx_size supportedSize; + ivx::IVX_CHECK_STATUS(vxQueryContext(ctx, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, &supportedSize, sizeof(supportedSize))); + if ((vx_size)ksize > supportedSize) + { + ctx.setImmediateBorder(prevBorder); + return false; + } + Mat mask(ksize, ksize, CV_8UC1, Scalar(255)); + mtx = ivx::Matrix::create(ctx, VX_TYPE_UINT8, ksize, ksize); + mtx.copyFrom(mask); + } + ivx::IVX_CHECK_STATUS(vxuNonLinearFilter(ctx, VX_NONLINEAR_FILTER_MEDIAN, ia, mtx, ib)); + } +#endif + ctx.setImmediateBorder(prevBorder); + } + catch (const ivx::RuntimeError & e) + { + VX_DbgThrow(e.what()); + } + catch (const ivx::WrapperError & e) + { + VX_DbgThrow(e.what()); + } + + return true; +} +#endif + +#ifdef HAVE_IPP +static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize) +{ + CV_INSTRUMENT_REGION_IPP(); + +#if IPP_VERSION_X100 < 201801 + // Degradations for big kernel + if(ksize > 7) + return false; +#endif + + { + int bufSize; + IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); + IppDataType ippType = ippiGetDataType(src0.type()); + int channels = src0.channels(); + IppAutoBuffer buffer; + + if(src0.isSubmatrix()) + return false; + + Mat src; + if(dst.data != src0.data) + src = src0; + else + src0.copyTo(src); + + if(ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, ippType, channels, &bufSize) < 0) + return false; + + buffer.allocate(bufSize); + + switch(ippType) + { + case ipp8u: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp16u: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp16s: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp32f: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_32f_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + default: + return false; + } + } +} +#endif + +void medianBlur( InputArray _src0, OutputArray _dst, int ksize ) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( (ksize % 2 == 1) && (_src0.dims() <= 2 )); + + if( ksize <= 1 || _src0.empty() ) + { + _src0.copyTo(_dst); + return; + } + + CV_OCL_RUN(_dst.isUMat(), + ocl_medianFilter(_src0,_dst, ksize)) + + Mat src0 = _src0.getMat(); + _dst.create( src0.size(), src0.type() ); + Mat dst = _dst.getMat(); + + CALL_HAL(medianBlur, cv_hal_medianBlur, src0.data, src0.step, dst.data, dst.step, src0.cols, src0.rows, src0.depth(), + src0.channels(), ksize); + + CV_OVX_RUN(true, + openvx_medianFilter(_src0, _dst, ksize)) + + CV_IPP_RUN_FAST(ipp_medianFilter(src0, dst, ksize)); + +#ifdef HAVE_TEGRA_OPTIMIZATION + if (tegra::useTegra() && tegra::medianBlur(src0, dst, ksize)) + return; +#endif + + CV_CPU_DISPATCH(medianBlur, (src0, dst, ksize), + CV_CPU_DISPATCH_MODES_ALL); +} + +} // namespace + +/* End of file. */ diff --git a/modules/imgproc/src/median_blur.cpp b/modules/imgproc/src/median_blur.simd.hpp similarity index 79% rename from modules/imgproc/src/median_blur.cpp rename to modules/imgproc/src/median_blur.simd.hpp index d565b9486d..c3203f2a07 100644 --- a/modules/imgproc/src/median_blur.cpp +++ b/modules/imgproc/src/median_blur.simd.hpp @@ -46,9 +46,11 @@ #include #include "opencv2/core/hal/intrin.hpp" -#include "opencl_kernels_imgproc.hpp" -#include "opencv2/core/openvx/ovx_defs.hpp" +#ifdef _MSC_VER +#pragma warning(disable: 4244) // warning C4244: 'argument': conversion from 'int' to 'ushort', possible loss of data + // triggered on intrinsic code from medianBlur_8u_O1() +#endif /* * This file includes the code, contributed by Simon Perreault @@ -71,12 +73,18 @@ Median Filter \****************************************************************************************/ -namespace cv -{ +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +void medianBlur(const Mat& src0, /*const*/ Mat& dst, int ksize); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY static void medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) { + CV_INSTRUMENT_REGION(); + typedef ushort HT; /** @@ -330,9 +338,6 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) } } } -#if CV_SIMD - vx_cleanup(); -#endif } #undef HOP @@ -342,6 +347,8 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) static void medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m ) { + CV_INSTRUMENT_REGION(); + #define N 16 int zone0[4][N]; int zone1[4][N*N]; @@ -671,6 +678,8 @@ template static void medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) { + CV_INSTRUMENT_REGION(); + typedef typename Op::value_type T; typedef typename Op::arg_type WT; typedef typename VecOp::arg_type VT; @@ -770,9 +779,6 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) limit = size.width; } } -#if CV_SIMD - vx_cleanup(); -#endif } else if( m == 5 ) { @@ -934,268 +940,15 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) limit = size.width; } } -#if CV_SIMD - vx_cleanup(); -#endif } } -#ifdef HAVE_OPENCL +} // namespace anon -#define DIVUP(total, grain) ((total + grain - 1) / (grain)) - -static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) -{ - size_t localsize[2] = { 16, 16 }; - size_t globalsize[2]; - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - - if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) ) - return false; - - Size imgSize = _src.size(); - bool useOptimized = (1 == cn) && - (size_t)imgSize.width >= localsize[0] * 8 && - (size_t)imgSize.height >= localsize[1] * 8 && - imgSize.width % 4 == 0 && - imgSize.height % 4 == 0 && - (ocl::Device::getDefault().isIntel()); - - cv::String kname = format( useOptimized ? "medianFilter%d_u" : "medianFilter%d", m) ; - cv::String kdefs = useOptimized ? - format("-D T=%s -D T1=%s -D T4=%s%d -D cn=%d -D USE_4OPT", ocl::typeToStr(type), - ocl::typeToStr(depth), ocl::typeToStr(depth), cn*4, cn) - : - format("-D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn) ; - - ocl::Kernel k(kname.c_str(), ocl::imgproc::medianFilter_oclsrc, kdefs.c_str() ); - - if (k.empty()) - return false; - - UMat src = _src.getUMat(); - _dst.create(src.size(), type); - UMat dst = _dst.getUMat(); - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); - - if( useOptimized ) - { - globalsize[0] = DIVUP(src.cols / 4, localsize[0]) * localsize[0]; - globalsize[1] = DIVUP(src.rows / 4, localsize[1]) * localsize[1]; - } - else - { - globalsize[0] = (src.cols + localsize[0] + 2) / localsize[0] * localsize[0]; - globalsize[1] = (src.rows + localsize[1] - 1) / localsize[1] * localsize[1]; - } - - return k.run(2, globalsize, localsize, false); -} - -#undef DIVUP - -#endif - -#ifdef HAVE_OPENVX -} // close anonymous namespace #13634 -namespace ovx { - template <> inline bool skipSmallImages(int w, int h) { return w*h < 1280 * 720; } -} -namespace { // reopen it -static bool openvx_medianFilter(InputArray _src, OutputArray _dst, int ksize) -{ - if (_src.type() != CV_8UC1 || _dst.type() != CV_8U -#ifndef VX_VERSION_1_1 - || ksize != 3 -#endif - ) - return false; - - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); - - if ( -#ifdef VX_VERSION_1_1 - ksize != 3 ? ovx::skipSmallImages(src.cols, src.rows) : -#endif - ovx::skipSmallImages(src.cols, src.rows) - ) - return false; - - try - { - ivx::Context ctx = ovx::getOpenVXContext(); -#ifdef VX_VERSION_1_1 - if ((vx_size)ksize > ctx.nonlinearMaxDimension()) - return false; -#endif - - Mat a; - if (dst.data != src.data) - a = src; - else - src.copyTo(a); - - ivx::Image - ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), - ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); - - //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments - //since OpenVX standard says nothing about thread-safety for now - ivx::border_t prevBorder = ctx.immediateBorder(); - ctx.setImmediateBorder(VX_BORDER_REPLICATE); -#ifdef VX_VERSION_1_1 - if (ksize == 3) -#endif - { - ivx::IVX_CHECK_STATUS(vxuMedian3x3(ctx, ia, ib)); - } -#ifdef VX_VERSION_1_1 - else - { - ivx::Matrix mtx; - if(ksize == 5) - mtx = ivx::Matrix::createFromPattern(ctx, VX_PATTERN_BOX, ksize, ksize); - else - { - vx_size supportedSize; - ivx::IVX_CHECK_STATUS(vxQueryContext(ctx, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, &supportedSize, sizeof(supportedSize))); - if ((vx_size)ksize > supportedSize) - { - ctx.setImmediateBorder(prevBorder); - return false; - } - Mat mask(ksize, ksize, CV_8UC1, Scalar(255)); - mtx = ivx::Matrix::create(ctx, VX_TYPE_UINT8, ksize, ksize); - mtx.copyFrom(mask); - } - ivx::IVX_CHECK_STATUS(vxuNonLinearFilter(ctx, VX_NONLINEAR_FILTER_MEDIAN, ia, mtx, ib)); - } -#endif - ctx.setImmediateBorder(prevBorder); - } - catch (const ivx::RuntimeError & e) - { - VX_DbgThrow(e.what()); - } - catch (const ivx::WrapperError & e) - { - VX_DbgThrow(e.what()); - } - - return true; -} -#endif - -#if 0 //defined HAVE_IPP -static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize) -{ - CV_INSTRUMENT_REGION_IPP(); - -#if IPP_VERSION_X100 < 201801 - // Degradations for big kernel - if(ksize > 7) - return false; -#endif - - { - int bufSize; - IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); - IppDataType ippType = ippiGetDataType(src0.type()); - int channels = src0.channels(); - IppAutoBuffer buffer; - - if(src0.isSubmatrix()) - return false; - - Mat src; - if(dst.data != src0.data) - src = src0; - else - src0.copyTo(src); - - if(ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, ippType, channels, &bufSize) < 0) - return false; - - buffer.allocate(bufSize); - - switch(ippType) - { - case ipp8u: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp16u: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp16s: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp32f: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_32f_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - default: - return false; - } - } -} -#endif -} - -void medianBlur( InputArray _src0, OutputArray _dst, int ksize ) +void medianBlur(const Mat& src0, /*const*/ Mat& dst, int ksize) { CV_INSTRUMENT_REGION(); - CV_Assert( (ksize % 2 == 1) && (_src0.dims() <= 2 )); - - if( ksize <= 1 || _src0.empty() ) - { - _src0.copyTo(_dst); - return; - } - - CV_OCL_RUN(_dst.isUMat(), - ocl_medianFilter(_src0,_dst, ksize)) - - Mat src0 = _src0.getMat(); - _dst.create( src0.size(), src0.type() ); - Mat dst = _dst.getMat(); - - CALL_HAL(medianBlur, cv_hal_medianBlur, src0.data, src0.step, dst.data, dst.step, src0.cols, src0.rows, src0.depth(), - src0.channels(), ksize); - - CV_OVX_RUN(true, - openvx_medianFilter(_src0, _dst, ksize)) - - //CV_IPP_RUN_FAST(ipp_medianFilter(src0, dst, ksize)); - -#ifdef HAVE_TEGRA_OPTIMIZATION - if (tegra::useTegra() && tegra::medianBlur(src0, dst, ksize)) - return; -#endif - bool useSortNet = ksize == 3 || (ksize == 5 #if !(CV_SIMD) && ( src0.depth() > CV_8U || src0.channels() == 2 || src0.channels() > 4 ) @@ -1225,6 +978,7 @@ void medianBlur( InputArray _src0, OutputArray _dst, int ksize ) } else { + // TODO AVX guard (external call) cv::copyMakeBorder( src0, src, 0, 0, ksize/2, ksize/2, BORDER_REPLICATE|BORDER_ISOLATED); int cn = src0.channels(); @@ -1239,6 +993,6 @@ void medianBlur( InputArray _src0, OutputArray _dst, int ksize ) } } -} - -/* End of file. */ +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.dispatch.cpp similarity index 64% rename from modules/imgproc/src/morph.cpp rename to modules/imgproc/src/morph.dispatch.cpp index e4d08da8af..441797ce88 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.dispatch.cpp @@ -48,779 +48,49 @@ #include "opencv2/core/hal/intrin.hpp" #include +#include "morph.simd.hpp" +#include "morph.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + + /****************************************************************************************\ Basic Morphological Operations: Erosion & Dilation \****************************************************************************************/ -using namespace std; - -namespace cv -{ - -template struct MinOp -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator ()(const T a, const T b) const { return std::min(a, b); } -}; - -template struct MaxOp -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator ()(const T a, const T b) const { return std::max(a, b); } -}; - -#undef CV_MIN_8U -#undef CV_MAX_8U -#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b))) -#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a))) - -template<> inline uchar MinOp::operator ()(const uchar a, const uchar b) const { return CV_MIN_8U(a, b); } -template<> inline uchar MaxOp::operator ()(const uchar a, const uchar b) const { return CV_MAX_8U(a, b); } - -struct MorphRowNoVec -{ - MorphRowNoVec(int, int) {} - int operator()(const uchar*, uchar*, int, int) const { return 0; } -}; - -struct MorphColumnNoVec -{ - MorphColumnNoVec(int, int) {} - int operator()(const uchar**, uchar*, int, int, int) const { return 0; } -}; - -struct MorphNoVec -{ - int operator()(uchar**, int, uchar*, int) const { return 0; } -}; - -#if CV_SIMD - -template struct MorphRowVec -{ - typedef typename VecUpdate::vtype vtype; - typedef typename vtype::lane_type stype; - MorphRowVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {} - int operator()(const uchar* src, uchar* dst, int width, int cn) const - { - int i, k, _ksize = ksize*cn; - width *= cn; - VecUpdate updateOp; - - for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes ) - { - vtype s0 = vx_load((const stype*)src + i); - vtype s1 = vx_load((const stype*)src + i + vtype::nlanes); - vtype s2 = vx_load((const stype*)src + i + 2*vtype::nlanes); - vtype s3 = vx_load((const stype*)src + i + 3*vtype::nlanes); - for (k = cn; k < _ksize; k += cn) - { - s0 = updateOp(s0, vx_load((const stype*)src + i + k)); - s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes)); - s2 = updateOp(s2, vx_load((const stype*)src + i + k + 2*vtype::nlanes)); - s3 = updateOp(s3, vx_load((const stype*)src + i + k + 3*vtype::nlanes)); - } - v_store((stype*)dst + i, s0); - v_store((stype*)dst + i + vtype::nlanes, s1); - v_store((stype*)dst + i + 2*vtype::nlanes, s2); - v_store((stype*)dst + i + 3*vtype::nlanes, s3); - } - if( i <= width - 2*vtype::nlanes ) - { - vtype s0 = vx_load((const stype*)src + i); - vtype s1 = vx_load((const stype*)src + i + vtype::nlanes); - for( k = cn; k < _ksize; k += cn ) - { - s0 = updateOp(s0, vx_load((const stype*)src + i + k)); - s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes)); - } - v_store((stype*)dst + i, s0); - v_store((stype*)dst + i + vtype::nlanes, s1); - i += 2*vtype::nlanes; - } - if( i <= width - vtype::nlanes ) - { - vtype s = vx_load((const stype*)src + i); - for( k = cn; k < _ksize; k += cn ) - s = updateOp(s, vx_load((const stype*)src + i + k)); - v_store((stype*)dst + i, s); - i += vtype::nlanes; - } - if( i <= width - vtype::nlanes/2 ) - { - vtype s = vx_load_low((const stype*)src + i); - for( k = cn; k < _ksize; k += cn ) - s = updateOp(s, vx_load_low((const stype*)src + i + k)); - v_store_low((stype*)dst + i, s); - i += vtype::nlanes/2; - } - - return i - i % cn; - } - - int ksize, anchor; -}; - - -template struct MorphColumnVec -{ - typedef typename VecUpdate::vtype vtype; - typedef typename vtype::lane_type stype; - MorphColumnVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {} - int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const - { - int i = 0, k, _ksize = ksize; - VecUpdate updateOp; - - for( i = 0; i < count + ksize - 1; i++ ) - CV_Assert( ((size_t)_src[i] & (CV_SIMD_WIDTH-1)) == 0 ); - - const stype** src = (const stype**)_src; - stype* dst = (stype*)_dst; - dststep /= sizeof(dst[0]); - - for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 ) - { - for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes) - { - const stype* sptr = src[1] + i; - vtype s0 = vx_load_aligned(sptr); - vtype s1 = vx_load_aligned(sptr + vtype::nlanes); - vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes); - vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes); - - for( k = 2; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load_aligned(sptr)); - s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); - s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)); - s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)); - } - - sptr = src[0] + i; - v_store(dst + i, updateOp(s0, vx_load_aligned(sptr))); - v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); - v_store(dst + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes))); - v_store(dst + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes))); - - sptr = src[k] + i; - v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr))); - v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); - v_store(dst + dststep + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes))); - v_store(dst + dststep + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes))); - } - if( i <= width - 2*vtype::nlanes ) - { - const stype* sptr = src[1] + i; - vtype s0 = vx_load_aligned(sptr); - vtype s1 = vx_load_aligned(sptr + vtype::nlanes); - - for( k = 2; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load_aligned(sptr)); - s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); - } - - sptr = src[0] + i; - v_store(dst + i, updateOp(s0, vx_load_aligned(sptr))); - v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); - - sptr = src[k] + i; - v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr))); - v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); - i += 2*vtype::nlanes; - } - if( i <= width - vtype::nlanes ) - { - vtype s0 = vx_load_aligned(src[1] + i); - - for( k = 2; k < _ksize; k++ ) - s0 = updateOp(s0, vx_load_aligned(src[k] + i)); - - v_store(dst + i, updateOp(s0, vx_load_aligned(src[0] + i))); - v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(src[k] + i))); - i += vtype::nlanes; - } - if( i <= width - vtype::nlanes/2 ) - { - vtype s0 = vx_load_low(src[1] + i); - - for( k = 2; k < _ksize; k++ ) - s0 = updateOp(s0, vx_load_low(src[k] + i)); - - v_store_low(dst + i, updateOp(s0, vx_load_low(src[0] + i))); - v_store_low(dst + dststep + i, updateOp(s0, vx_load_low(src[k] + i))); - i += vtype::nlanes/2; - } - } - - for( ; count > 0; count--, dst += dststep, src++ ) - { - for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes) - { - const stype* sptr = src[0] + i; - vtype s0 = vx_load_aligned(sptr); - vtype s1 = vx_load_aligned(sptr + vtype::nlanes); - vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes); - vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes); - - for( k = 1; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load_aligned(sptr)); - s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); - s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)); - s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)); - } - v_store(dst + i, s0); - v_store(dst + i + vtype::nlanes, s1); - v_store(dst + i + 2*vtype::nlanes, s2); - v_store(dst + i + 3*vtype::nlanes, s3); - } - if( i <= width - 2*vtype::nlanes ) - { - const stype* sptr = src[0] + i; - vtype s0 = vx_load_aligned(sptr); - vtype s1 = vx_load_aligned(sptr + vtype::nlanes); - - for( k = 1; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load_aligned(sptr)); - s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); - } - v_store(dst + i, s0); - v_store(dst + i + vtype::nlanes, s1); - i += 2*vtype::nlanes; - } - if( i <= width - vtype::nlanes ) - { - vtype s0 = vx_load_aligned(src[0] + i); - - for( k = 1; k < _ksize; k++ ) - s0 = updateOp(s0, vx_load_aligned(src[k] + i)); - v_store(dst + i, s0); - i += vtype::nlanes; - } - if( i <= width - vtype::nlanes/2 ) - { - vtype s0 = vx_load_low(src[0] + i); - - for( k = 1; k < _ksize; k++ ) - s0 = updateOp(s0, vx_load_low(src[k] + i)); - v_store_low(dst + i, s0); - i += vtype::nlanes/2; - } - } - - return i; - } - - int ksize, anchor; -}; - - -template struct MorphVec -{ - typedef typename VecUpdate::vtype vtype; - typedef typename vtype::lane_type stype; - int operator()(uchar** _src, int nz, uchar* _dst, int width) const - { - const stype** src = (const stype**)_src; - stype* dst = (stype*)_dst; - int i, k; - VecUpdate updateOp; - - for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes ) - { - const stype* sptr = src[0] + i; - vtype s0 = vx_load(sptr); - vtype s1 = vx_load(sptr + vtype::nlanes); - vtype s2 = vx_load(sptr + 2*vtype::nlanes); - vtype s3 = vx_load(sptr + 3*vtype::nlanes); - for( k = 1; k < nz; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load(sptr)); - s1 = updateOp(s1, vx_load(sptr + vtype::nlanes)); - s2 = updateOp(s2, vx_load(sptr + 2*vtype::nlanes)); - s3 = updateOp(s3, vx_load(sptr + 3*vtype::nlanes)); - } - v_store(dst + i, s0); - v_store(dst + i + vtype::nlanes, s1); - v_store(dst + i + 2*vtype::nlanes, s2); - v_store(dst + i + 3*vtype::nlanes, s3); - } - if( i <= width - 2*vtype::nlanes ) - { - const stype* sptr = src[0] + i; - vtype s0 = vx_load(sptr); - vtype s1 = vx_load(sptr + vtype::nlanes); - for( k = 1; k < nz; k++ ) - { - sptr = src[k] + i; - s0 = updateOp(s0, vx_load(sptr)); - s1 = updateOp(s1, vx_load(sptr + vtype::nlanes)); - } - v_store(dst + i, s0); - v_store(dst + i + vtype::nlanes, s1); - i += 2*vtype::nlanes; - } - if( i <= width - vtype::nlanes ) - { - vtype s0 = vx_load(src[0] + i); - for( k = 1; k < nz; k++ ) - s0 = updateOp(s0, vx_load(src[k] + i)); - v_store(dst + i, s0); - i += vtype::nlanes; - } - if( i <= width - vtype::nlanes/2 ) - { - vtype s0 = vx_load_low(src[0] + i); - for( k = 1; k < nz; k++ ) - s0 = updateOp(s0, vx_load_low(src[k] + i)); - v_store_low(dst + i, s0); - i += vtype::nlanes/2; - } - return i; - } -}; - -template struct VMin -{ - typedef T vtype; - vtype operator()(const vtype& a, const vtype& b) const { return v_min(a,b); } -}; -template struct VMax -{ - typedef T vtype; - vtype operator()(const vtype& a, const vtype& b) const { return v_max(a,b); } -}; - -typedef MorphRowVec > ErodeRowVec8u; -typedef MorphRowVec > DilateRowVec8u; -typedef MorphRowVec > ErodeRowVec16u; -typedef MorphRowVec > DilateRowVec16u; -typedef MorphRowVec > ErodeRowVec16s; -typedef MorphRowVec > DilateRowVec16s; -typedef MorphRowVec > ErodeRowVec32f; -typedef MorphRowVec > DilateRowVec32f; - -typedef MorphColumnVec > ErodeColumnVec8u; -typedef MorphColumnVec > DilateColumnVec8u; -typedef MorphColumnVec > ErodeColumnVec16u; -typedef MorphColumnVec > DilateColumnVec16u; -typedef MorphColumnVec > ErodeColumnVec16s; -typedef MorphColumnVec > DilateColumnVec16s; -typedef MorphColumnVec > ErodeColumnVec32f; -typedef MorphColumnVec > DilateColumnVec32f; - -typedef MorphVec > ErodeVec8u; -typedef MorphVec > DilateVec8u; -typedef MorphVec > ErodeVec16u; -typedef MorphVec > DilateVec16u; -typedef MorphVec > ErodeVec16s; -typedef MorphVec > DilateVec16s; -typedef MorphVec > ErodeVec32f; -typedef MorphVec > DilateVec32f; - -#else - -typedef MorphRowNoVec ErodeRowVec8u; -typedef MorphRowNoVec DilateRowVec8u; - -typedef MorphColumnNoVec ErodeColumnVec8u; -typedef MorphColumnNoVec DilateColumnVec8u; - -typedef MorphRowNoVec ErodeRowVec16u; -typedef MorphRowNoVec DilateRowVec16u; -typedef MorphRowNoVec ErodeRowVec16s; -typedef MorphRowNoVec DilateRowVec16s; -typedef MorphRowNoVec ErodeRowVec32f; -typedef MorphRowNoVec DilateRowVec32f; - -typedef MorphColumnNoVec ErodeColumnVec16u; -typedef MorphColumnNoVec DilateColumnVec16u; -typedef MorphColumnNoVec ErodeColumnVec16s; -typedef MorphColumnNoVec DilateColumnVec16s; -typedef MorphColumnNoVec ErodeColumnVec32f; -typedef MorphColumnNoVec DilateColumnVec32f; - -typedef MorphNoVec ErodeVec8u; -typedef MorphNoVec DilateVec8u; -typedef MorphNoVec ErodeVec16u; -typedef MorphNoVec DilateVec16u; -typedef MorphNoVec ErodeVec16s; -typedef MorphNoVec DilateVec16s; -typedef MorphNoVec ErodeVec32f; -typedef MorphNoVec DilateVec32f; - -#endif - -typedef MorphRowNoVec ErodeRowVec64f; -typedef MorphRowNoVec DilateRowVec64f; -typedef MorphColumnNoVec ErodeColumnVec64f; -typedef MorphColumnNoVec DilateColumnVec64f; -typedef MorphNoVec ErodeVec64f; -typedef MorphNoVec DilateVec64f; - - -template struct MorphRowFilter : public BaseRowFilter -{ - typedef typename Op::rtype T; - - MorphRowFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor) - { - ksize = _ksize; - anchor = _anchor; - } - - void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE - { - int i, j, k, _ksize = ksize*cn; - const T* S = (const T*)src; - Op op; - T* D = (T*)dst; - - if( _ksize == cn ) - { - for( i = 0; i < width*cn; i++ ) - D[i] = S[i]; - return; - } - - int i0 = vecOp(src, dst, width, cn); - width *= cn; - - for( k = 0; k < cn; k++, S++, D++ ) - { - for( i = i0; i <= width - cn*2; i += cn*2 ) - { - const T* s = S + i; - T m = s[cn]; - for( j = cn*2; j < _ksize; j += cn ) - m = op(m, s[j]); - D[i] = op(m, s[0]); - D[i+cn] = op(m, s[j]); - } - - for( ; i < width; i += cn ) - { - const T* s = S + i; - T m = s[0]; - for( j = cn; j < _ksize; j += cn ) - m = op(m, s[j]); - D[i] = m; - } - } - } - - VecOp vecOp; -}; - - -template struct MorphColumnFilter : public BaseColumnFilter -{ - typedef typename Op::rtype T; - - MorphColumnFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor) - { - ksize = _ksize; - anchor = _anchor; - } - - void operator()(const uchar** _src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE - { - int i, k, _ksize = ksize; - const T** src = (const T**)_src; - T* D = (T*)dst; - Op op; - - int i0 = vecOp(_src, dst, dststep, count, width); - dststep /= sizeof(D[0]); - - for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 ) - { - i = i0; - #if CV_ENABLE_UNROLLED - for( ; i <= width - 4; i += 4 ) - { - const T* sptr = src[1] + i; - T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; - - for( k = 2; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); - s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); - } - - sptr = src[0] + i; - D[i] = op(s0, sptr[0]); - D[i+1] = op(s1, sptr[1]); - D[i+2] = op(s2, sptr[2]); - D[i+3] = op(s3, sptr[3]); - - sptr = src[k] + i; - D[i+dststep] = op(s0, sptr[0]); - D[i+dststep+1] = op(s1, sptr[1]); - D[i+dststep+2] = op(s2, sptr[2]); - D[i+dststep+3] = op(s3, sptr[3]); - } - #endif - for( ; i < width; i++ ) - { - T s0 = src[1][i]; - - for( k = 2; k < _ksize; k++ ) - s0 = op(s0, src[k][i]); - - D[i] = op(s0, src[0][i]); - D[i+dststep] = op(s0, src[k][i]); - } - } - - for( ; count > 0; count--, D += dststep, src++ ) - { - i = i0; - #if CV_ENABLE_UNROLLED - for( ; i <= width - 4; i += 4 ) - { - const T* sptr = src[0] + i; - T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; - - for( k = 1; k < _ksize; k++ ) - { - sptr = src[k] + i; - s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); - s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); - } - - D[i] = s0; D[i+1] = s1; - D[i+2] = s2; D[i+3] = s3; - } - #endif - for( ; i < width; i++ ) - { - T s0 = src[0][i]; - for( k = 1; k < _ksize; k++ ) - s0 = op(s0, src[k][i]); - D[i] = s0; - } - } - } - - VecOp vecOp; -}; - - -template struct MorphFilter : BaseFilter -{ - typedef typename Op::rtype T; - - MorphFilter( const Mat& _kernel, Point _anchor ) - { - anchor = _anchor; - ksize = _kernel.size(); - CV_Assert( _kernel.type() == CV_8U ); - - std::vector coeffs; // we do not really the values of non-zero - // kernel elements, just their locations - preprocess2DKernel( _kernel, coords, coeffs ); - ptrs.resize( coords.size() ); - } - - void operator()(const uchar** src, uchar* dst, int dststep, int count, int width, int cn) CV_OVERRIDE - { - const Point* pt = &coords[0]; - const T** kp = (const T**)&ptrs[0]; - int i, k, nz = (int)coords.size(); - Op op; - - width *= cn; - for( ; count > 0; count--, dst += dststep, src++ ) - { - T* D = (T*)dst; - - for( k = 0; k < nz; k++ ) - kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn; - - i = vecOp(&ptrs[0], nz, dst, width); - #if CV_ENABLE_UNROLLED - for( ; i <= width - 4; i += 4 ) - { - const T* sptr = kp[0] + i; - T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; - - for( k = 1; k < nz; k++ ) - { - sptr = kp[k] + i; - s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); - s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); - } - - D[i] = s0; D[i+1] = s1; - D[i+2] = s2; D[i+3] = s3; - } - #endif - for( ; i < width; i++ ) - { - T s0 = kp[0][i]; - for( k = 1; k < nz; k++ ) - s0 = op(s0, kp[k][i]); - D[i] = s0; - } - } - } - - std::vector coords; - std::vector ptrs; - VecOp vecOp; -}; - -} +namespace cv { /////////////////////////////////// External Interface ///////////////////////////////////// -cv::Ptr cv::getMorphologyRowFilter(int op, int type, int ksize, int anchor) +Ptr getMorphologyRowFilter(int op, int type, int ksize, int anchor) { - int depth = CV_MAT_DEPTH(type); - if( anchor < 0 ) - anchor = ksize/2; - CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); - if( op == MORPH_ERODE ) - { - if( depth == CV_8U ) - return makePtr, - ErodeRowVec8u> >(ksize, anchor); - if( depth == CV_16U ) - return makePtr, - ErodeRowVec16u> >(ksize, anchor); - if( depth == CV_16S ) - return makePtr, - ErodeRowVec16s> >(ksize, anchor); - if( depth == CV_32F ) - return makePtr, - ErodeRowVec32f> >(ksize, anchor); - if( depth == CV_64F ) - return makePtr, - ErodeRowVec64f> >(ksize, anchor); - } - else - { - if( depth == CV_8U ) - return makePtr, - DilateRowVec8u> >(ksize, anchor); - if( depth == CV_16U ) - return makePtr, - DilateRowVec16u> >(ksize, anchor); - if( depth == CV_16S ) - return makePtr, - DilateRowVec16s> >(ksize, anchor); - if( depth == CV_32F ) - return makePtr, - DilateRowVec32f> >(ksize, anchor); - if( depth == CV_64F ) - return makePtr, - DilateRowVec64f> >(ksize, anchor); - } + CV_INSTRUMENT_REGION(); - CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); + CV_CPU_DISPATCH(getMorphologyRowFilter, (op, type, ksize, anchor), + CV_CPU_DISPATCH_MODES_ALL); } -cv::Ptr cv::getMorphologyColumnFilter(int op, int type, int ksize, int anchor) +Ptr getMorphologyColumnFilter(int op, int type, int ksize, int anchor) { - int depth = CV_MAT_DEPTH(type); - if( anchor < 0 ) - anchor = ksize/2; - CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); - if( op == MORPH_ERODE ) - { - if( depth == CV_8U ) - return makePtr, - ErodeColumnVec8u> >(ksize, anchor); - if( depth == CV_16U ) - return makePtr, - ErodeColumnVec16u> >(ksize, anchor); - if( depth == CV_16S ) - return makePtr, - ErodeColumnVec16s> >(ksize, anchor); - if( depth == CV_32F ) - return makePtr, - ErodeColumnVec32f> >(ksize, anchor); - if( depth == CV_64F ) - return makePtr, - ErodeColumnVec64f> >(ksize, anchor); - } - else - { - if( depth == CV_8U ) - return makePtr, - DilateColumnVec8u> >(ksize, anchor); - if( depth == CV_16U ) - return makePtr, - DilateColumnVec16u> >(ksize, anchor); - if( depth == CV_16S ) - return makePtr, - DilateColumnVec16s> >(ksize, anchor); - if( depth == CV_32F ) - return makePtr, - DilateColumnVec32f> >(ksize, anchor); - if( depth == CV_64F ) - return makePtr, - DilateColumnVec64f> >(ksize, anchor); - } + CV_INSTRUMENT_REGION(); - CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); + CV_CPU_DISPATCH(getMorphologyColumnFilter, (op, type, ksize, anchor), + CV_CPU_DISPATCH_MODES_ALL); } -cv::Ptr cv::getMorphologyFilter(int op, int type, InputArray _kernel, Point anchor) +Ptr getMorphologyFilter(int op, int type, InputArray _kernel, Point anchor) { + CV_INSTRUMENT_REGION(); + Mat kernel = _kernel.getMat(); - int depth = CV_MAT_DEPTH(type); - anchor = normalizeAnchor(anchor, kernel.size()); - CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); - if( op == MORPH_ERODE ) - { - if( depth == CV_8U ) - return makePtr, ErodeVec8u> >(kernel, anchor); - if( depth == CV_16U ) - return makePtr, ErodeVec16u> >(kernel, anchor); - if( depth == CV_16S ) - return makePtr, ErodeVec16s> >(kernel, anchor); - if( depth == CV_32F ) - return makePtr, ErodeVec32f> >(kernel, anchor); - if( depth == CV_64F ) - return makePtr, ErodeVec64f> >(kernel, anchor); - } - else - { - if( depth == CV_8U ) - return makePtr, DilateVec8u> >(kernel, anchor); - if( depth == CV_16U ) - return makePtr, DilateVec16u> >(kernel, anchor); - if( depth == CV_16S ) - return makePtr, DilateVec16s> >(kernel, anchor); - if( depth == CV_32F ) - return makePtr, DilateVec32f> >(kernel, anchor); - if( depth == CV_64F ) - return makePtr, DilateVec64f> >(kernel, anchor); - } - - CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); + CV_CPU_DISPATCH(getMorphologyFilter, (op, type, kernel, anchor), + CV_CPU_DISPATCH_MODES_ALL); } -cv::Ptr cv::createMorphologyFilter( int op, int type, InputArray _kernel, - Point anchor, int _rowBorderType, int _columnBorderType, - const Scalar& _borderValue ) +Ptr createMorphologyFilter( + int op, int type, InputArray _kernel, + Point anchor, int _rowBorderType, int _columnBorderType, + const Scalar& _borderValue) { Mat kernel = _kernel.getMat(); anchor = normalizeAnchor(anchor, kernel.size()); @@ -862,7 +132,7 @@ cv::Ptr cv::createMorphologyFilter( int op, int type, InputArr } -cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) +Mat getStructuringElement(int shape, Size ksize, Point anchor) { int i, j; int r = 0, c = 0; @@ -915,9 +185,6 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) return elem; } -namespace cv -{ - // ===== 1. replacement implementation static bool halMorph(int op, int src_type, int dst_type, @@ -1732,9 +999,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, (src.isSubmatrix() && !isolated)); } -} - -void cv::erode( InputArray src, OutputArray dst, InputArray kernel, +void erode( InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { @@ -1744,7 +1009,7 @@ void cv::erode( InputArray src, OutputArray dst, InputArray kernel, } -void cv::dilate( InputArray src, OutputArray dst, InputArray kernel, +void dilate( InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { @@ -1755,8 +1020,6 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel, #ifdef HAVE_OPENCL -namespace cv { - static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op, InputArray kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue) @@ -1813,13 +1076,11 @@ static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op, return true; } -} #endif #define IPP_DISABLE_MORPH_ADV 1 #if 0 //defined HAVE_IPP #if !IPP_DISABLE_MORPH_ADV -namespace cv { static bool ipp_morphologyEx(int op, InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int iterations, @@ -1884,11 +1145,10 @@ static bool ipp_morphologyEx(int op, InputArray _src, OutputArray _dst, return false; #endif } -} #endif #endif -void cv::morphologyEx( InputArray _src, OutputArray _dst, int op, +void morphologyEx( InputArray _src, OutputArray _dst, int op, InputArray _kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { @@ -1985,6 +1245,8 @@ void cv::morphologyEx( InputArray _src, OutputArray _dst, int op, } } +} // namespace cv + CV_IMPL IplConvKernel * cvCreateStructuringElementEx( int cols, int rows, int anchorX, int anchorY, diff --git a/modules/imgproc/src/morph.simd.hpp b/modules/imgproc/src/morph.simd.hpp new file mode 100644 index 0000000000..9b3023f8f0 --- /dev/null +++ b/modules/imgproc/src/morph.simd.hpp @@ -0,0 +1,846 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include "opencv2/core/hal/intrin.hpp" + +/****************************************************************************************\ + Basic Morphological Operations: Erosion & Dilation +\****************************************************************************************/ + +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +Ptr getMorphologyRowFilter(int op, int type, int ksize, int anchor); +Ptr getMorphologyColumnFilter(int op, int type, int ksize, int anchor); +Ptr getMorphologyFilter(int op, int type, const Mat& kernel, Point anchor); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace { +template struct MinOp +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator ()(const T a, const T b) const { return std::min(a, b); } +}; + +template struct MaxOp +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator ()(const T a, const T b) const { return std::max(a, b); } +}; + + +#if !defined(CV_SIMD) // min/max operation are usually fast enough (without using of control flow 'if' statements) + +#undef CV_MIN_8U +#undef CV_MAX_8U +#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b))) +#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a))) + +template<> inline uchar MinOp::operator ()(const uchar a, const uchar b) const { return CV_MIN_8U(a, b); } +template<> inline uchar MaxOp::operator ()(const uchar a, const uchar b) const { return CV_MAX_8U(a, b); } + +#endif + + + +struct MorphRowNoVec +{ + MorphRowNoVec(int, int) {} + int operator()(const uchar*, uchar*, int, int) const { return 0; } +}; + +struct MorphColumnNoVec +{ + MorphColumnNoVec(int, int) {} + int operator()(const uchar**, uchar*, int, int, int) const { return 0; } +}; + +struct MorphNoVec +{ + int operator()(uchar**, int, uchar*, int) const { return 0; } +}; + +#if CV_SIMD + +template struct MorphRowVec +{ + typedef typename VecUpdate::vtype vtype; + typedef typename vtype::lane_type stype; + MorphRowVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {} + int operator()(const uchar* src, uchar* dst, int width, int cn) const + { + CV_INSTRUMENT_REGION(); + + int i, k, _ksize = ksize*cn; + width *= cn; + VecUpdate updateOp; + + for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes ) + { + vtype s0 = vx_load((const stype*)src + i); + vtype s1 = vx_load((const stype*)src + i + vtype::nlanes); + vtype s2 = vx_load((const stype*)src + i + 2*vtype::nlanes); + vtype s3 = vx_load((const stype*)src + i + 3*vtype::nlanes); + for (k = cn; k < _ksize; k += cn) + { + s0 = updateOp(s0, vx_load((const stype*)src + i + k)); + s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes)); + s2 = updateOp(s2, vx_load((const stype*)src + i + k + 2*vtype::nlanes)); + s3 = updateOp(s3, vx_load((const stype*)src + i + k + 3*vtype::nlanes)); + } + v_store((stype*)dst + i, s0); + v_store((stype*)dst + i + vtype::nlanes, s1); + v_store((stype*)dst + i + 2*vtype::nlanes, s2); + v_store((stype*)dst + i + 3*vtype::nlanes, s3); + } + if( i <= width - 2*vtype::nlanes ) + { + vtype s0 = vx_load((const stype*)src + i); + vtype s1 = vx_load((const stype*)src + i + vtype::nlanes); + for( k = cn; k < _ksize; k += cn ) + { + s0 = updateOp(s0, vx_load((const stype*)src + i + k)); + s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes)); + } + v_store((stype*)dst + i, s0); + v_store((stype*)dst + i + vtype::nlanes, s1); + i += 2*vtype::nlanes; + } + if( i <= width - vtype::nlanes ) + { + vtype s = vx_load((const stype*)src + i); + for( k = cn; k < _ksize; k += cn ) + s = updateOp(s, vx_load((const stype*)src + i + k)); + v_store((stype*)dst + i, s); + i += vtype::nlanes; + } + if( i <= width - vtype::nlanes/2 ) + { + vtype s = vx_load_low((const stype*)src + i); + for( k = cn; k < _ksize; k += cn ) + s = updateOp(s, vx_load_low((const stype*)src + i + k)); + v_store_low((stype*)dst + i, s); + i += vtype::nlanes/2; + } + + return i - i % cn; + } + + int ksize, anchor; +}; + + +template struct MorphColumnVec +{ + typedef typename VecUpdate::vtype vtype; + typedef typename vtype::lane_type stype; + MorphColumnVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {} + int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const + { + CV_INSTRUMENT_REGION(); + + int i = 0, k, _ksize = ksize; + VecUpdate updateOp; + + for( i = 0; i < count + ksize - 1; i++ ) + CV_Assert( ((size_t)_src[i] & (CV_SIMD_WIDTH-1)) == 0 ); + + const stype** src = (const stype**)_src; + stype* dst = (stype*)_dst; + dststep /= sizeof(dst[0]); + + for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 ) + { + for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes) + { + const stype* sptr = src[1] + i; + vtype s0 = vx_load_aligned(sptr); + vtype s1 = vx_load_aligned(sptr + vtype::nlanes); + vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes); + vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes); + + for( k = 2; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load_aligned(sptr)); + s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); + s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)); + s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)); + } + + sptr = src[0] + i; + v_store(dst + i, updateOp(s0, vx_load_aligned(sptr))); + v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); + v_store(dst + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes))); + v_store(dst + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes))); + + sptr = src[k] + i; + v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr))); + v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); + v_store(dst + dststep + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes))); + v_store(dst + dststep + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes))); + } + if( i <= width - 2*vtype::nlanes ) + { + const stype* sptr = src[1] + i; + vtype s0 = vx_load_aligned(sptr); + vtype s1 = vx_load_aligned(sptr + vtype::nlanes); + + for( k = 2; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load_aligned(sptr)); + s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); + } + + sptr = src[0] + i; + v_store(dst + i, updateOp(s0, vx_load_aligned(sptr))); + v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); + + sptr = src[k] + i; + v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr))); + v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes))); + i += 2*vtype::nlanes; + } + if( i <= width - vtype::nlanes ) + { + vtype s0 = vx_load_aligned(src[1] + i); + + for( k = 2; k < _ksize; k++ ) + s0 = updateOp(s0, vx_load_aligned(src[k] + i)); + + v_store(dst + i, updateOp(s0, vx_load_aligned(src[0] + i))); + v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(src[k] + i))); + i += vtype::nlanes; + } + if( i <= width - vtype::nlanes/2 ) + { + vtype s0 = vx_load_low(src[1] + i); + + for( k = 2; k < _ksize; k++ ) + s0 = updateOp(s0, vx_load_low(src[k] + i)); + + v_store_low(dst + i, updateOp(s0, vx_load_low(src[0] + i))); + v_store_low(dst + dststep + i, updateOp(s0, vx_load_low(src[k] + i))); + i += vtype::nlanes/2; + } + } + + for( ; count > 0; count--, dst += dststep, src++ ) + { + for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes) + { + const stype* sptr = src[0] + i; + vtype s0 = vx_load_aligned(sptr); + vtype s1 = vx_load_aligned(sptr + vtype::nlanes); + vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes); + vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes); + + for( k = 1; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load_aligned(sptr)); + s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); + s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)); + s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)); + } + v_store(dst + i, s0); + v_store(dst + i + vtype::nlanes, s1); + v_store(dst + i + 2*vtype::nlanes, s2); + v_store(dst + i + 3*vtype::nlanes, s3); + } + if( i <= width - 2*vtype::nlanes ) + { + const stype* sptr = src[0] + i; + vtype s0 = vx_load_aligned(sptr); + vtype s1 = vx_load_aligned(sptr + vtype::nlanes); + + for( k = 1; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load_aligned(sptr)); + s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)); + } + v_store(dst + i, s0); + v_store(dst + i + vtype::nlanes, s1); + i += 2*vtype::nlanes; + } + if( i <= width - vtype::nlanes ) + { + vtype s0 = vx_load_aligned(src[0] + i); + + for( k = 1; k < _ksize; k++ ) + s0 = updateOp(s0, vx_load_aligned(src[k] + i)); + v_store(dst + i, s0); + i += vtype::nlanes; + } + if( i <= width - vtype::nlanes/2 ) + { + vtype s0 = vx_load_low(src[0] + i); + + for( k = 1; k < _ksize; k++ ) + s0 = updateOp(s0, vx_load_low(src[k] + i)); + v_store_low(dst + i, s0); + i += vtype::nlanes/2; + } + } + + return i; + } + + int ksize, anchor; +}; + + +template struct MorphVec +{ + typedef typename VecUpdate::vtype vtype; + typedef typename vtype::lane_type stype; + int operator()(uchar** _src, int nz, uchar* _dst, int width) const + { + CV_INSTRUMENT_REGION(); + + const stype** src = (const stype**)_src; + stype* dst = (stype*)_dst; + int i, k; + VecUpdate updateOp; + + for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes ) + { + const stype* sptr = src[0] + i; + vtype s0 = vx_load(sptr); + vtype s1 = vx_load(sptr + vtype::nlanes); + vtype s2 = vx_load(sptr + 2*vtype::nlanes); + vtype s3 = vx_load(sptr + 3*vtype::nlanes); + for( k = 1; k < nz; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load(sptr)); + s1 = updateOp(s1, vx_load(sptr + vtype::nlanes)); + s2 = updateOp(s2, vx_load(sptr + 2*vtype::nlanes)); + s3 = updateOp(s3, vx_load(sptr + 3*vtype::nlanes)); + } + v_store(dst + i, s0); + v_store(dst + i + vtype::nlanes, s1); + v_store(dst + i + 2*vtype::nlanes, s2); + v_store(dst + i + 3*vtype::nlanes, s3); + } + if( i <= width - 2*vtype::nlanes ) + { + const stype* sptr = src[0] + i; + vtype s0 = vx_load(sptr); + vtype s1 = vx_load(sptr + vtype::nlanes); + for( k = 1; k < nz; k++ ) + { + sptr = src[k] + i; + s0 = updateOp(s0, vx_load(sptr)); + s1 = updateOp(s1, vx_load(sptr + vtype::nlanes)); + } + v_store(dst + i, s0); + v_store(dst + i + vtype::nlanes, s1); + i += 2*vtype::nlanes; + } + if( i <= width - vtype::nlanes ) + { + vtype s0 = vx_load(src[0] + i); + for( k = 1; k < nz; k++ ) + s0 = updateOp(s0, vx_load(src[k] + i)); + v_store(dst + i, s0); + i += vtype::nlanes; + } + if( i <= width - vtype::nlanes/2 ) + { + vtype s0 = vx_load_low(src[0] + i); + for( k = 1; k < nz; k++ ) + s0 = updateOp(s0, vx_load_low(src[k] + i)); + v_store_low(dst + i, s0); + i += vtype::nlanes/2; + } + return i; + } +}; + +template struct VMin +{ + typedef T vtype; + vtype operator()(const vtype& a, const vtype& b) const { return v_min(a,b); } +}; +template struct VMax +{ + typedef T vtype; + vtype operator()(const vtype& a, const vtype& b) const { return v_max(a,b); } +}; + +typedef MorphRowVec > ErodeRowVec8u; +typedef MorphRowVec > DilateRowVec8u; +typedef MorphRowVec > ErodeRowVec16u; +typedef MorphRowVec > DilateRowVec16u; +typedef MorphRowVec > ErodeRowVec16s; +typedef MorphRowVec > DilateRowVec16s; +typedef MorphRowVec > ErodeRowVec32f; +typedef MorphRowVec > DilateRowVec32f; + +typedef MorphColumnVec > ErodeColumnVec8u; +typedef MorphColumnVec > DilateColumnVec8u; +typedef MorphColumnVec > ErodeColumnVec16u; +typedef MorphColumnVec > DilateColumnVec16u; +typedef MorphColumnVec > ErodeColumnVec16s; +typedef MorphColumnVec > DilateColumnVec16s; +typedef MorphColumnVec > ErodeColumnVec32f; +typedef MorphColumnVec > DilateColumnVec32f; + +typedef MorphVec > ErodeVec8u; +typedef MorphVec > DilateVec8u; +typedef MorphVec > ErodeVec16u; +typedef MorphVec > DilateVec16u; +typedef MorphVec > ErodeVec16s; +typedef MorphVec > DilateVec16s; +typedef MorphVec > ErodeVec32f; +typedef MorphVec > DilateVec32f; + +#else + +typedef MorphRowNoVec ErodeRowVec8u; +typedef MorphRowNoVec DilateRowVec8u; + +typedef MorphColumnNoVec ErodeColumnVec8u; +typedef MorphColumnNoVec DilateColumnVec8u; + +typedef MorphRowNoVec ErodeRowVec16u; +typedef MorphRowNoVec DilateRowVec16u; +typedef MorphRowNoVec ErodeRowVec16s; +typedef MorphRowNoVec DilateRowVec16s; +typedef MorphRowNoVec ErodeRowVec32f; +typedef MorphRowNoVec DilateRowVec32f; + +typedef MorphColumnNoVec ErodeColumnVec16u; +typedef MorphColumnNoVec DilateColumnVec16u; +typedef MorphColumnNoVec ErodeColumnVec16s; +typedef MorphColumnNoVec DilateColumnVec16s; +typedef MorphColumnNoVec ErodeColumnVec32f; +typedef MorphColumnNoVec DilateColumnVec32f; + +typedef MorphNoVec ErodeVec8u; +typedef MorphNoVec DilateVec8u; +typedef MorphNoVec ErodeVec16u; +typedef MorphNoVec DilateVec16u; +typedef MorphNoVec ErodeVec16s; +typedef MorphNoVec DilateVec16s; +typedef MorphNoVec ErodeVec32f; +typedef MorphNoVec DilateVec32f; + +#endif + +typedef MorphRowNoVec ErodeRowVec64f; +typedef MorphRowNoVec DilateRowVec64f; +typedef MorphColumnNoVec ErodeColumnVec64f; +typedef MorphColumnNoVec DilateColumnVec64f; +typedef MorphNoVec ErodeVec64f; +typedef MorphNoVec DilateVec64f; + + +template struct MorphRowFilter : public BaseRowFilter +{ + typedef typename Op::rtype T; + + MorphRowFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor) + { + ksize = _ksize; + anchor = _anchor; + } + + void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE + { + CV_INSTRUMENT_REGION(); + + int i, j, k, _ksize = ksize*cn; + const T* S = (const T*)src; + Op op; + T* D = (T*)dst; + + if( _ksize == cn ) + { + for( i = 0; i < width*cn; i++ ) + D[i] = S[i]; + return; + } + + int i0 = vecOp(src, dst, width, cn); + width *= cn; + + for( k = 0; k < cn; k++, S++, D++ ) + { + for( i = i0; i <= width - cn*2; i += cn*2 ) + { + const T* s = S + i; + T m = s[cn]; + for( j = cn*2; j < _ksize; j += cn ) + m = op(m, s[j]); + D[i] = op(m, s[0]); + D[i+cn] = op(m, s[j]); + } + + for( ; i < width; i += cn ) + { + const T* s = S + i; + T m = s[0]; + for( j = cn; j < _ksize; j += cn ) + m = op(m, s[j]); + D[i] = m; + } + } + } + + VecOp vecOp; +}; + + +template struct MorphColumnFilter : public BaseColumnFilter +{ + typedef typename Op::rtype T; + + MorphColumnFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor) + { + ksize = _ksize; + anchor = _anchor; + } + + void operator()(const uchar** _src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE + { + CV_INSTRUMENT_REGION(); + + int i, k, _ksize = ksize; + const T** src = (const T**)_src; + T* D = (T*)dst; + Op op; + + int i0 = vecOp(_src, dst, dststep, count, width); + dststep /= sizeof(D[0]); + + for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 ) + { + i = i0; + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) + { + const T* sptr = src[1] + i; + T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; + + for( k = 2; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); + s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); + } + + sptr = src[0] + i; + D[i] = op(s0, sptr[0]); + D[i+1] = op(s1, sptr[1]); + D[i+2] = op(s2, sptr[2]); + D[i+3] = op(s3, sptr[3]); + + sptr = src[k] + i; + D[i+dststep] = op(s0, sptr[0]); + D[i+dststep+1] = op(s1, sptr[1]); + D[i+dststep+2] = op(s2, sptr[2]); + D[i+dststep+3] = op(s3, sptr[3]); + } + #endif + for( ; i < width; i++ ) + { + T s0 = src[1][i]; + + for( k = 2; k < _ksize; k++ ) + s0 = op(s0, src[k][i]); + + D[i] = op(s0, src[0][i]); + D[i+dststep] = op(s0, src[k][i]); + } + } + + for( ; count > 0; count--, D += dststep, src++ ) + { + i = i0; + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) + { + const T* sptr = src[0] + i; + T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; + + for( k = 1; k < _ksize; k++ ) + { + sptr = src[k] + i; + s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); + s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); + } + + D[i] = s0; D[i+1] = s1; + D[i+2] = s2; D[i+3] = s3; + } + #endif + for( ; i < width; i++ ) + { + T s0 = src[0][i]; + for( k = 1; k < _ksize; k++ ) + s0 = op(s0, src[k][i]); + D[i] = s0; + } + } + } + + VecOp vecOp; +}; + + +template struct MorphFilter : BaseFilter +{ + typedef typename Op::rtype T; + + MorphFilter( const Mat& _kernel, Point _anchor ) + { + anchor = _anchor; + ksize = _kernel.size(); + CV_Assert( _kernel.type() == CV_8U ); + + std::vector coeffs; // we do not really the values of non-zero + // kernel elements, just their locations + preprocess2DKernel( _kernel, coords, coeffs ); + ptrs.resize( coords.size() ); + } + + void operator()(const uchar** src, uchar* dst, int dststep, int count, int width, int cn) CV_OVERRIDE + { + CV_INSTRUMENT_REGION(); + + const Point* pt = &coords[0]; + const T** kp = (const T**)&ptrs[0]; + int i, k, nz = (int)coords.size(); + Op op; + + width *= cn; + for( ; count > 0; count--, dst += dststep, src++ ) + { + T* D = (T*)dst; + + for( k = 0; k < nz; k++ ) + kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn; + + i = vecOp(&ptrs[0], nz, dst, width); + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) + { + const T* sptr = kp[0] + i; + T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; + + for( k = 1; k < nz; k++ ) + { + sptr = kp[k] + i; + s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]); + s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]); + } + + D[i] = s0; D[i+1] = s1; + D[i+2] = s2; D[i+3] = s3; + } + #endif + for( ; i < width; i++ ) + { + T s0 = kp[0][i]; + for( k = 1; k < nz; k++ ) + s0 = op(s0, kp[k][i]); + D[i] = s0; + } + } + } + + std::vector coords; + std::vector ptrs; + VecOp vecOp; +}; + +} // namespace anon + +/////////////////////////////////// External Interface ///////////////////////////////////// + +Ptr getMorphologyRowFilter(int op, int type, int ksize, int anchor) +{ + CV_INSTRUMENT_REGION(); + + int depth = CV_MAT_DEPTH(type); + if( anchor < 0 ) + anchor = ksize/2; + CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); + if( op == MORPH_ERODE ) + { + if( depth == CV_8U ) + return makePtr, + ErodeRowVec8u> >(ksize, anchor); + if( depth == CV_16U ) + return makePtr, + ErodeRowVec16u> >(ksize, anchor); + if( depth == CV_16S ) + return makePtr, + ErodeRowVec16s> >(ksize, anchor); + if( depth == CV_32F ) + return makePtr, + ErodeRowVec32f> >(ksize, anchor); + if( depth == CV_64F ) + return makePtr, + ErodeRowVec64f> >(ksize, anchor); + } + else + { + if( depth == CV_8U ) + return makePtr, + DilateRowVec8u> >(ksize, anchor); + if( depth == CV_16U ) + return makePtr, + DilateRowVec16u> >(ksize, anchor); + if( depth == CV_16S ) + return makePtr, + DilateRowVec16s> >(ksize, anchor); + if( depth == CV_32F ) + return makePtr, + DilateRowVec32f> >(ksize, anchor); + if( depth == CV_64F ) + return makePtr, + DilateRowVec64f> >(ksize, anchor); + } + + CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); +} + +Ptr getMorphologyColumnFilter(int op, int type, int ksize, int anchor) +{ + CV_INSTRUMENT_REGION(); + + int depth = CV_MAT_DEPTH(type); + if( anchor < 0 ) + anchor = ksize/2; + CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); + if( op == MORPH_ERODE ) + { + if( depth == CV_8U ) + return makePtr, + ErodeColumnVec8u> >(ksize, anchor); + if( depth == CV_16U ) + return makePtr, + ErodeColumnVec16u> >(ksize, anchor); + if( depth == CV_16S ) + return makePtr, + ErodeColumnVec16s> >(ksize, anchor); + if( depth == CV_32F ) + return makePtr, + ErodeColumnVec32f> >(ksize, anchor); + if( depth == CV_64F ) + return makePtr, + ErodeColumnVec64f> >(ksize, anchor); + } + else + { + if( depth == CV_8U ) + return makePtr, + DilateColumnVec8u> >(ksize, anchor); + if( depth == CV_16U ) + return makePtr, + DilateColumnVec16u> >(ksize, anchor); + if( depth == CV_16S ) + return makePtr, + DilateColumnVec16s> >(ksize, anchor); + if( depth == CV_32F ) + return makePtr, + DilateColumnVec32f> >(ksize, anchor); + if( depth == CV_64F ) + return makePtr, + DilateColumnVec64f> >(ksize, anchor); + } + + CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); +} + +Ptr getMorphologyFilter(int op, int type, const Mat& kernel, Point anchor) +{ + CV_INSTRUMENT_REGION(); + + int depth = CV_MAT_DEPTH(type); + anchor = normalizeAnchor(anchor, kernel.size()); + CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); + if( op == MORPH_ERODE ) + { + if( depth == CV_8U ) + return makePtr, ErodeVec8u> >(kernel, anchor); + if( depth == CV_16U ) + return makePtr, ErodeVec16u> >(kernel, anchor); + if( depth == CV_16S ) + return makePtr, ErodeVec16s> >(kernel, anchor); + if( depth == CV_32F ) + return makePtr, ErodeVec32f> >(kernel, anchor); + if( depth == CV_64F ) + return makePtr, ErodeVec64f> >(kernel, anchor); + } + else + { + if( depth == CV_8U ) + return makePtr, DilateVec8u> >(kernel, anchor); + if( depth == CV_16U ) + return makePtr, DilateVec16u> >(kernel, anchor); + if( depth == CV_16S ) + return makePtr, DilateVec16s> >(kernel, anchor); + if( depth == CV_32F ) + return makePtr, DilateVec32f> >(kernel, anchor); + if( depth == CV_64F ) + return makePtr, DilateVec64f> >(kernel, anchor); + } + + CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type)); +} + +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp new file mode 100644 index 0000000000..4e514eb8b8 --- /dev/null +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -0,0 +1,582 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014-2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#include + +#include "opencv2/core/hal/intrin.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "opencv2/core/openvx/ovx_defs.hpp" + +#include "filter.hpp" + +#include "opencv2/core/softfloat.hpp" + +namespace cv { +#include "fixedpoint.inl.hpp" +} + +#include "smooth.simd.hpp" +#include "smooth.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +/****************************************************************************************\ + Gaussian Blur +\****************************************************************************************/ + +Mat getGaussianKernel(int n, double sigma, int ktype) +{ + CV_Assert(n > 0); + const int SMALL_GAUSSIAN_SIZE = 7; + static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE] = + { + {1.f}, + {0.25f, 0.5f, 0.25f}, + {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f}, + {0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f} + }; + + const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ? + small_gaussian_tab[n>>1] : 0; + + CV_Assert( ktype == CV_32F || ktype == CV_64F ); + Mat kernel(n, 1, ktype); + float* cf = kernel.ptr(); + double* cd = kernel.ptr(); + + double sigmaX = sigma > 0 ? sigma : ((n-1)*0.5 - 1)*0.3 + 0.8; + double scale2X = -0.5/(sigmaX*sigmaX); + double sum = 0; + + int i; + for( i = 0; i < n; i++ ) + { + double x = i - (n-1)*0.5; + double t = fixed_kernel ? (double)fixed_kernel[i] : std::exp(scale2X*x*x); + if( ktype == CV_32F ) + { + cf[i] = (float)t; + sum += cf[i]; + } + else + { + cd[i] = t; + sum += cd[i]; + } + } + + CV_DbgAssert(fabs(sum) > 0); + sum = 1./sum; + for( i = 0; i < n; i++ ) + { + if( ktype == CV_32F ) + cf[i] = (float)(cf[i]*sum); + else + cd[i] *= sum; + } + + return kernel; +} + +template +static std::vector getFixedpointGaussianKernel( int n, double sigma ) +{ + if (sigma <= 0) + { + if(n == 1) + return std::vector(1, softdouble(1.0)); + else if(n == 3) + { + T v3[] = { softdouble(0.25), softdouble(0.5), softdouble(0.25) }; + return std::vector(v3, v3 + 3); + } + else if(n == 5) + { + T v5[] = { softdouble(0.0625), softdouble(0.25), softdouble(0.375), softdouble(0.25), softdouble(0.0625) }; + return std::vector(v5, v5 + 5); + } + else if(n == 7) + { + T v7[] = { softdouble(0.03125), softdouble(0.109375), softdouble(0.21875), softdouble(0.28125), softdouble(0.21875), softdouble(0.109375), softdouble(0.03125) }; + return std::vector(v7, v7 + 7); + } + } + + + softdouble sigmaX = sigma > 0 ? softdouble(sigma) : mulAdd(softdouble(n),softdouble(0.15),softdouble(0.35));// softdouble(((n-1)*0.5 - 1)*0.3 + 0.8) + softdouble scale2X = softdouble(-0.5*0.25)/(sigmaX*sigmaX); + std::vector values(n); + softdouble sum(0.); + for(int i = 0, x = 1 - n; i < n; i++, x+=2 ) + { + // x = i - (n - 1)*0.5 + // t = std::exp(scale2X*x*x) + values[i] = exp(softdouble(x*x)*scale2X); + sum += values[i]; + } + sum = softdouble::one()/sum; + + std::vector kernel(n); + for(int i = 0; i < n; i++ ) + { + kernel[i] = values[i] * sum; + } + + return kernel; +}; + +static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); } +template static void getGaussianKernel(int n, double sigma, int, std::vector& res) { res = getFixedpointGaussianKernel(n, sigma); } + +template +static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize, + double sigma1, double sigma2 ) +{ + int depth = CV_MAT_DEPTH(type); + if( sigma2 <= 0 ) + sigma2 = sigma1; + + // automatic detection of kernel size from sigma + if( ksize.width <= 0 && sigma1 > 0 ) + ksize.width = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1; + if( ksize.height <= 0 && sigma2 > 0 ) + ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1; + + CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && + ksize.height > 0 && ksize.height % 2 == 1 ); + + sigma1 = std::max( sigma1, 0. ); + sigma2 = std::max( sigma2, 0. ); + + getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F), kx ); + if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON ) + ky = kx; + else + getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F), ky ); +} + +Ptr createGaussianFilter( int type, Size ksize, + double sigma1, double sigma2, + int borderType ) +{ + Mat kx, ky; + createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); + + return createSeparableLinearFilter( type, type, kx, ky, Point(-1,-1), 0, borderType ); +} + +#ifdef HAVE_OPENCL + +static bool ocl_GaussianBlur_8UC1(InputArray _src, OutputArray _dst, Size ksize, int ddepth, + InputArray _kernelX, InputArray _kernelY, int borderType) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + if ( !(dev.isIntel() && (type == CV_8UC1) && + (_src.offset() == 0) && (_src.step() % 4 == 0) && + ((ksize.width == 5 && (_src.cols() % 4 == 0)) || + (ksize.width == 3 && (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)))) ) + return false; + + Mat kernelX = _kernelX.getMat().reshape(1, 1); + if (kernelX.cols % 2 != 1) + return false; + Mat kernelY = _kernelY.getMat().reshape(1, 1); + if (kernelY.cols % 2 != 1) + return false; + + if (ddepth < 0) + ddepth = sdepth; + + Size size = _src.size(); + size_t globalsize[2] = { 0, 0 }; + size_t localsize[2] = { 0, 0 }; + + if (ksize.width == 3) + { + globalsize[0] = size.width / 16; + globalsize[1] = size.height / 2; + } + else if (ksize.width == 5) + { + globalsize[0] = size.width / 4; + globalsize[1] = size.height / 1; + } + + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; + char build_opts[1024]; + sprintf(build_opts, "-D %s %s%s", borderMap[borderType & ~BORDER_ISOLATED], + ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(), + ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str()); + + ocl::Kernel kernel; + + if (ksize.width == 3) + kernel.create("gaussianBlur3x3_8UC1_cols16_rows2", cv::ocl::imgproc::gaussianBlur3x3_oclsrc, build_opts); + else if (ksize.width == 5) + kernel.create("gaussianBlur5x5_8UC1_cols4", cv::ocl::imgproc::gaussianBlur5x5_oclsrc, build_opts); + + if (kernel.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(size, CV_MAKETYPE(ddepth, cn)); + if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) + return false; + UMat dst = _dst.getUMat(); + + int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); + idxArg = kernel.set(idxArg, (int)src.step); + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); + idxArg = kernel.set(idxArg, (int)dst.step); + idxArg = kernel.set(idxArg, (int)dst.rows); + idxArg = kernel.set(idxArg, (int)dst.cols); + + return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); +} + +#endif + +#ifdef HAVE_OPENVX + +namespace ovx { + template <> inline bool skipSmallImages(int w, int h) { return w*h < 320 * 240; } +} +static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, + double sigma1, double sigma2, int borderType) +{ + if (sigma2 <= 0) + sigma2 = sigma1; + // automatic detection of kernel size from sigma + if (ksize.width <= 0 && sigma1 > 0) + ksize.width = cvRound(sigma1*6 + 1) | 1; + if (ksize.height <= 0 && sigma2 > 0) + ksize.height = cvRound(sigma2*6 + 1) | 1; + + if (_src.type() != CV_8UC1 || + _src.cols() < 3 || _src.rows() < 3 || + ksize.width != 3 || ksize.height != 3) + return false; + + sigma1 = std::max(sigma1, 0.); + sigma2 = std::max(sigma2, 0.); + + if (!(sigma1 == 0.0 || (sigma1 - 0.8) < DBL_EPSILON) || !(sigma2 == 0.0 || (sigma2 - 0.8) < DBL_EPSILON) || + ovx::skipSmallImages(_src.cols(), _src.rows())) + return false; + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix()) + return false; //Process isolated borders only + vx_enum border; + switch (borderType & ~BORDER_ISOLATED) + { + case BORDER_CONSTANT: + border = VX_BORDER_CONSTANT; + break; + case BORDER_REPLICATE: + border = VX_BORDER_REPLICATE; + break; + default: + return false; + } + + try + { + ivx::Context ctx = ovx::getOpenVXContext(); + + Mat a; + if (dst.data != src.data) + a = src; + else + src.copyTo(a); + + ivx::Image + ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), + ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); + + //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments + //since OpenVX standard says nothing about thread-safety for now + ivx::border_t prevBorder = ctx.immediateBorder(); + ctx.setImmediateBorder(border, (vx_uint8)(0)); + ivx::IVX_CHECK_STATUS(vxuGaussian3x3(ctx, ia, ib)); + ctx.setImmediateBorder(prevBorder); + } + catch (const ivx::RuntimeError & e) + { + VX_DbgThrow(e.what()); + } + catch (const ivx::WrapperError & e) + { + VX_DbgThrow(e.what()); + } + return true; +} + +#endif + +#ifdef HAVE_IPP +// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling +#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL +#define IPP_GAUSSIANBLUR_PARALLEL 0 +#else +#define IPP_GAUSSIANBLUR_PARALLEL 1 +#endif + +#ifdef HAVE_IPP_IW + +class ipp_gaussianBlurParallel: public ParallelLoopBody +{ +public: + ipp_gaussianBlurParallel(::ipp::IwiImage &src, ::ipp::IwiImage &dst, int kernelSize, float sigma, ::ipp::IwiBorderType &border, bool *pOk): + m_src(src), m_dst(dst), m_kernelSize(kernelSize), m_sigma(sigma), m_border(border), m_pOk(pOk) { + *m_pOk = true; + } + ~ipp_gaussianBlurParallel() + { + } + + virtual void operator() (const Range& range) const CV_OVERRIDE + { + CV_INSTRUMENT_REGION_IPP(); + + if(!*m_pOk) + return; + + try + { + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile); + } + catch(const ::ipp::IwException &) + { + *m_pOk = false; + return; + } + } +private: + ::ipp::IwiImage &m_src; + ::ipp::IwiImage &m_dst; + + int m_kernelSize; + float m_sigma; + ::ipp::IwiBorderType &m_border; + + volatile bool *m_pOk; + const ipp_gaussianBlurParallel& operator= (const ipp_gaussianBlurParallel&); +}; + +#endif + +static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, + double sigma1, double sigma2, int borderType ) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + +#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) + CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); + return false; // bug on ia32 +#else + if(sigma1 != sigma2) + return false; + + if(sigma1 < FLT_EPSILON) + return false; + + if(ksize.width != ksize.height) + return false; + + // Acquire data and begin processing + try + { + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); + ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); + if(!ippBorder) + return false; + + const int threads = ippiSuggestThreadsNum(iwDst, 2); + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { + bool ok; + ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); + + if(!ok) + return false; + const Range range(0, (int) iwDst.m_size.height); + parallel_for_(range, invoker, threads*4); + + if(!ok) + return false; + } else { + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder); + } + } + catch (const ::ipp::IwException &) + { + return false; + } + + return true; +#endif +#else + CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); + return false; +#endif +} +#endif + +void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, + double sigma1, double sigma2, + int borderType) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(); + Size size = _src.size(); + _dst.create( size, type ); + + if( (borderType & ~BORDER_ISOLATED) != BORDER_CONSTANT && + ((borderType & BORDER_ISOLATED) != 0 || !_src.getMat().isSubmatrix()) ) + { + if( size.height == 1 ) + ksize.height = 1; + if( size.width == 1 ) + ksize.width = 1; + } + + if( ksize.width == 1 && ksize.height == 1 ) + { + _src.copyTo(_dst); + return; + } + + bool useOpenCL = (ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 && + ((ksize.width == 3 && ksize.height == 3) || + (ksize.width == 5 && ksize.height == 5)) && + _src.rows() > ksize.height && _src.cols() > ksize.width); + CV_UNUSED(useOpenCL); + + int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + Mat kx, ky; + createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); + + CV_OCL_RUN(useOpenCL, ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType)); + + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (size_t)_src.rows() > kx.total() && (size_t)_src.cols() > kx.total(), + ocl_sepFilter2D(_src, _dst, sdepth, kx, ky, Point(-1, -1), 0, borderType)) + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + Point ofs; + Size wsz(src.cols, src.rows); + if(!(borderType & BORDER_ISOLATED)) + src.locateROI( wsz, ofs ); + + CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, + ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, + sigma1, sigma2, borderType&~BORDER_ISOLATED); + + CV_OVX_RUN(true, + openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) + + CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); + + if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix())) + { + std::vector fkx, fky; + createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2); + if (src.data == dst.data) + src = src.clone(); + CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType), + CV_CPU_DISPATCH_MODES_ALL); + return; + } + + sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); +} + +} // namespace + +////////////////////////////////////////////////////////////////////////////////////////// + +CV_IMPL void +cvSmooth( const void* srcarr, void* dstarr, int smooth_type, + int param1, int param2, double param3, double param4 ) +{ + cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0; + + CV_Assert( dst.size() == src.size() && + (smooth_type == CV_BLUR_NO_SCALE || dst.type() == src.type()) ); + + if( param2 <= 0 ) + param2 = param1; + + if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ) + cv::boxFilter( src, dst, dst.depth(), cv::Size(param1, param2), cv::Point(-1,-1), + smooth_type == CV_BLUR, cv::BORDER_REPLICATE ); + else if( smooth_type == CV_GAUSSIAN ) + cv::GaussianBlur( src, dst, cv::Size(param1, param2), param3, param4, cv::BORDER_REPLICATE ); + else if( smooth_type == CV_MEDIAN ) + cv::medianBlur( src, dst, param1 ); + else + cv::bilateralFilter( src, dst, param1, param3, param4, cv::BORDER_REPLICATE ); + + if( dst.data != dst0.data ) + CV_Error( CV_StsUnmatchedFormats, "The destination image does not have the proper type" ); +} + +/* End of file. */ diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.simd.hpp similarity index 84% rename from modules/imgproc/src/smooth.cpp rename to modules/imgproc/src/smooth.simd.hpp index ed3e93c6cb..4f52bc0d80 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.simd.hpp @@ -46,120 +46,28 @@ #include #include "opencv2/core/hal/intrin.hpp" -#include "opencl_kernels_imgproc.hpp" - -#include "opencv2/core/openvx/ovx_defs.hpp" #include "filter.hpp" -#include "fixedpoint.inl.hpp" - -/****************************************************************************************\ - Gaussian Blur -\****************************************************************************************/ - -cv::Mat cv::getGaussianKernel( int n, double sigma, int ktype ) -{ - CV_Assert(n > 0); - const int SMALL_GAUSSIAN_SIZE = 7; - static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE] = - { - {1.f}, - {0.25f, 0.5f, 0.25f}, - {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f}, - {0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f} - }; - - const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ? - small_gaussian_tab[n>>1] : 0; - - CV_Assert( ktype == CV_32F || ktype == CV_64F ); - Mat kernel(n, 1, ktype); - float* cf = kernel.ptr(); - double* cd = kernel.ptr(); - - double sigmaX = sigma > 0 ? sigma : ((n-1)*0.5 - 1)*0.3 + 0.8; - double scale2X = -0.5/(sigmaX*sigmaX); - double sum = 0; - - int i; - for( i = 0; i < n; i++ ) - { - double x = i - (n-1)*0.5; - double t = fixed_kernel ? (double)fixed_kernel[i] : std::exp(scale2X*x*x); - if( ktype == CV_32F ) - { - cf[i] = (float)t; - sum += cf[i]; - } - else - { - cd[i] = t; - sum += cd[i]; - } - } - - CV_DbgAssert(fabs(sum) > 0); - sum = 1./sum; - for( i = 0; i < n; i++ ) - { - if( ktype == CV_32F ) - cf[i] = (float)(cf[i]*sum); - else - cd[i] *= sum; - } - - return kernel; -} +#include "opencv2/core/softfloat.hpp" namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, + const uint16_t/*ufixedpoint16*/* fkx, int fkx_size, + const uint16_t/*ufixedpoint16*/* fky, int fky_size, + int borderType); -template -static std::vector getFixedpointGaussianKernel( int n, double sigma ) -{ - if (sigma <= 0) - { - if(n == 1) - return std::vector(1, softdouble(1.0)); - else if(n == 3) - { - T v3[] = { softdouble(0.25), softdouble(0.5), softdouble(0.25) }; - return std::vector(v3, v3 + 3); - } - else if(n == 5) - { - T v5[] = { softdouble(0.0625), softdouble(0.25), softdouble(0.375), softdouble(0.25), softdouble(0.0625) }; - return std::vector(v5, v5 + 5); - } - else if(n == 7) - { - T v7[] = { softdouble(0.03125), softdouble(0.109375), softdouble(0.21875), softdouble(0.28125), softdouble(0.21875), softdouble(0.109375), softdouble(0.03125) }; - return std::vector(v7, v7 + 7); - } - } +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#if defined(CV_CPU_BASELINE_MODE) +// included in dispatch.cpp +#else +#include "fixedpoint.inl.hpp" +#endif - softdouble sigmaX = sigma > 0 ? softdouble(sigma) : mulAdd(softdouble(n),softdouble(0.15),softdouble(0.35));// softdouble(((n-1)*0.5 - 1)*0.3 + 0.8) - softdouble scale2X = softdouble(-0.5*0.25)/(sigmaX*sigmaX); - std::vector values(n); - softdouble sum(0.); - for(int i = 0, x = 1 - n; i < n; i++, x+=2 ) - { - // x = i - (n - 1)*0.5 - // t = std::exp(scale2X*x*x) - values[i] = exp(softdouble(x*x)*scale2X); - sum += values[i]; - } - sum = softdouble::one()/sum; - - std::vector kernel(n); - for(int i = 0; i < n; i++ ) - { - kernel[i] = values[i] * sum; - } - - return kernel; -}; +namespace { template void hlineSmooth1N(const ET* src, int cn, const FT* m, int, FT* dst, int len, int) @@ -2119,418 +2027,27 @@ private: fixedSmoothInvoker& operator=(const fixedSmoothInvoker&); }; -static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); } -template static void getGaussianKernel(int n, double sigma, int, std::vector& res) { res = getFixedpointGaussianKernel(n, sigma); } +} // namespace anon -template -static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize, - double sigma1, double sigma2 ) -{ - int depth = CV_MAT_DEPTH(type); - if( sigma2 <= 0 ) - sigma2 = sigma1; - - // automatic detection of kernel size from sigma - if( ksize.width <= 0 && sigma1 > 0 ) - ksize.width = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1; - if( ksize.height <= 0 && sigma2 > 0 ) - ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1; - - CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && - ksize.height > 0 && ksize.height % 2 == 1 ); - - sigma1 = std::max( sigma1, 0. ); - sigma2 = std::max( sigma2, 0. ); - - getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F), kx ); - if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON ) - ky = kx; - else - getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F), ky ); -} - -} - -cv::Ptr cv::createGaussianFilter( int type, Size ksize, - double sigma1, double sigma2, - int borderType ) -{ - Mat kx, ky; - createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); - - return createSeparableLinearFilter( type, type, kx, ky, Point(-1,-1), 0, borderType ); -} - -namespace cv -{ -#ifdef HAVE_OPENCL - -static bool ocl_GaussianBlur_8UC1(InputArray _src, OutputArray _dst, Size ksize, int ddepth, - InputArray _kernelX, InputArray _kernelY, int borderType) -{ - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - - if ( !(dev.isIntel() && (type == CV_8UC1) && - (_src.offset() == 0) && (_src.step() % 4 == 0) && - ((ksize.width == 5 && (_src.cols() % 4 == 0)) || - (ksize.width == 3 && (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)))) ) - return false; - - Mat kernelX = _kernelX.getMat().reshape(1, 1); - if (kernelX.cols % 2 != 1) - return false; - Mat kernelY = _kernelY.getMat().reshape(1, 1); - if (kernelY.cols % 2 != 1) - return false; - - if (ddepth < 0) - ddepth = sdepth; - - Size size = _src.size(); - size_t globalsize[2] = { 0, 0 }; - size_t localsize[2] = { 0, 0 }; - - if (ksize.width == 3) - { - globalsize[0] = size.width / 16; - globalsize[1] = size.height / 2; - } - else if (ksize.width == 5) - { - globalsize[0] = size.width / 4; - globalsize[1] = size.height / 1; - } - - const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; - char build_opts[1024]; - sprintf(build_opts, "-D %s %s%s", borderMap[borderType & ~BORDER_ISOLATED], - ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(), - ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str()); - - ocl::Kernel kernel; - - if (ksize.width == 3) - kernel.create("gaussianBlur3x3_8UC1_cols16_rows2", cv::ocl::imgproc::gaussianBlur3x3_oclsrc, build_opts); - else if (ksize.width == 5) - kernel.create("gaussianBlur5x5_8UC1_cols4", cv::ocl::imgproc::gaussianBlur5x5_oclsrc, build_opts); - - if (kernel.empty()) - return false; - - UMat src = _src.getUMat(); - _dst.create(size, CV_MAKETYPE(ddepth, cn)); - if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) - return false; - UMat dst = _dst.getUMat(); - - int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); - idxArg = kernel.set(idxArg, (int)src.step); - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); - idxArg = kernel.set(idxArg, (int)dst.step); - idxArg = kernel.set(idxArg, (int)dst.rows); - idxArg = kernel.set(idxArg, (int)dst.cols); - - return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); -} - -#endif - -#ifdef HAVE_OPENVX - -namespace ovx { - template <> inline bool skipSmallImages(int w, int h) { return w*h < 320 * 240; } -} -static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, - double sigma1, double sigma2, int borderType) -{ - if (sigma2 <= 0) - sigma2 = sigma1; - // automatic detection of kernel size from sigma - if (ksize.width <= 0 && sigma1 > 0) - ksize.width = cvRound(sigma1*6 + 1) | 1; - if (ksize.height <= 0 && sigma2 > 0) - ksize.height = cvRound(sigma2*6 + 1) | 1; - - if (_src.type() != CV_8UC1 || - _src.cols() < 3 || _src.rows() < 3 || - ksize.width != 3 || ksize.height != 3) - return false; - - sigma1 = std::max(sigma1, 0.); - sigma2 = std::max(sigma2, 0.); - - if (!(sigma1 == 0.0 || (sigma1 - 0.8) < DBL_EPSILON) || !(sigma2 == 0.0 || (sigma2 - 0.8) < DBL_EPSILON) || - ovx::skipSmallImages(_src.cols(), _src.rows())) - return false; - - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); - - if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix()) - return false; //Process isolated borders only - vx_enum border; - switch (borderType & ~BORDER_ISOLATED) - { - case BORDER_CONSTANT: - border = VX_BORDER_CONSTANT; - break; - case BORDER_REPLICATE: - border = VX_BORDER_REPLICATE; - break; - default: - return false; - } - - try - { - ivx::Context ctx = ovx::getOpenVXContext(); - - Mat a; - if (dst.data != src.data) - a = src; - else - src.copyTo(a); - - ivx::Image - ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), - ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); - - //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments - //since OpenVX standard says nothing about thread-safety for now - ivx::border_t prevBorder = ctx.immediateBorder(); - ctx.setImmediateBorder(border, (vx_uint8)(0)); - ivx::IVX_CHECK_STATUS(vxuGaussian3x3(ctx, ia, ib)); - ctx.setImmediateBorder(prevBorder); - } - catch (const ivx::RuntimeError & e) - { - VX_DbgThrow(e.what()); - } - catch (const ivx::WrapperError & e) - { - VX_DbgThrow(e.what()); - } - return true; -} - -#endif - -#if 0 //defined HAVE_IPP -// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling -#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL -#define IPP_GAUSSIANBLUR_PARALLEL 0 -#else -#define IPP_GAUSSIANBLUR_PARALLEL 1 -#endif - -#ifdef HAVE_IPP_IW - -class ipp_gaussianBlurParallel: public ParallelLoopBody -{ -public: - ipp_gaussianBlurParallel(::ipp::IwiImage &src, ::ipp::IwiImage &dst, int kernelSize, float sigma, ::ipp::IwiBorderType &border, bool *pOk): - m_src(src), m_dst(dst), m_kernelSize(kernelSize), m_sigma(sigma), m_border(border), m_pOk(pOk) { - *m_pOk = true; - } - ~ipp_gaussianBlurParallel() - { - } - - virtual void operator() (const Range& range) const CV_OVERRIDE - { - CV_INSTRUMENT_REGION_IPP(); - - if(!*m_pOk) - return; - - try - { - ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile); - } - catch(const ::ipp::IwException &) - { - *m_pOk = false; - return; - } - } -private: - ::ipp::IwiImage &m_src; - ::ipp::IwiImage &m_dst; - - int m_kernelSize; - float m_sigma; - ::ipp::IwiBorderType &m_border; - - volatile bool *m_pOk; - const ipp_gaussianBlurParallel& operator= (const ipp_gaussianBlurParallel&); -}; - -#endif - -static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, - double sigma1, double sigma2, int borderType ) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - -#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) - CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); - return false; // bug on ia32 -#else - if(sigma1 != sigma2) - return false; - - if(sigma1 < FLT_EPSILON) - return false; - - if(ksize.width != ksize.height) - return false; - - // Acquire data and begin processing - try - { - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); - ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder) - return false; - - const int threads = ippiSuggestThreadsNum(iwDst, 2); - if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { - bool ok; - ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); - - if(!ok) - return false; - const Range range(0, (int) iwDst.m_size.height); - parallel_for_(range, invoker, threads*4); - - if(!ok) - return false; - } else { - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder); - } - } - catch (const ::ipp::IwException &) - { - return false; - } - - return true; -#endif -#else - CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); - return false; -#endif -} -#endif -} - -void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, - double sigma1, double sigma2, - int borderType ) +void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, + const uint16_t/*ufixedpoint16*/* fkx, int fkx_size, + const uint16_t/*ufixedpoint16*/* fky, int fky_size, + int borderType) { CV_INSTRUMENT_REGION(); - int type = _src.type(); - Size size = _src.size(); - _dst.create( size, type ); - - if( (borderType & ~BORDER_ISOLATED) != BORDER_CONSTANT && - ((borderType & BORDER_ISOLATED) != 0 || !_src.getMat().isSubmatrix()) ) + CV_Assert(src.depth() == CV_8U && ((borderType & BORDER_ISOLATED) || !src.isSubmatrix())); + fixedSmoothInvoker invoker( + src.ptr(), src.step1(), + dst.ptr(), dst.step1(), dst.cols, dst.rows, dst.channels(), + (const ufixedpoint16*)fkx, fkx_size, (const ufixedpoint16*)fky, fky_size, + borderType & ~BORDER_ISOLATED); { - if( size.height == 1 ) - ksize.height = 1; - if( size.width == 1 ) - ksize.width = 1; - } - - if( ksize.width == 1 && ksize.height == 1 ) - { - _src.copyTo(_dst); - return; - } - - bool useOpenCL = (ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 && - ((ksize.width == 3 && ksize.height == 3) || - (ksize.width == 5 && ksize.height == 5)) && - _src.rows() > ksize.height && _src.cols() > ksize.width); - CV_UNUSED(useOpenCL); - - int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - - Mat kx, ky; - createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); - - CV_OCL_RUN(useOpenCL, ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType)); - - CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (size_t)_src.rows() > kx.total() && (size_t)_src.cols() > kx.total(), - ocl_sepFilter2D(_src, _dst, sdepth, kx, ky, Point(-1, -1), 0, borderType)) - - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); - - Point ofs; - Size wsz(src.cols, src.rows); - if(!(borderType & BORDER_ISOLATED)) - src.locateROI( wsz, ofs ); - - CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, - ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, - sigma1, sigma2, borderType&~BORDER_ISOLATED); - - CV_OVX_RUN(true, - openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) - - //CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); - - if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix())) - { - std::vector fkx, fky; - createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2); - if (src.data == dst.data) - src = src.clone(); - fixedSmoothInvoker invoker(src.ptr(), src.step1(), dst.ptr(), dst.step1(), dst.cols, dst.rows, dst.channels(), &fkx[0], (int)fkx.size(), &fky[0], (int)fky.size(), borderType & ~BORDER_ISOLATED); + // TODO AVX guard (external call) parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs()))); - return; } - - sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); } -////////////////////////////////////////////////////////////////////////////////////////// - -CV_IMPL void -cvSmooth( const void* srcarr, void* dstarr, int smooth_type, - int param1, int param2, double param3, double param4 ) -{ - cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0; - - CV_Assert( dst.size() == src.size() && - (smooth_type == CV_BLUR_NO_SCALE || dst.type() == src.type()) ); - - if( param2 <= 0 ) - param2 = param1; - - if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ) - cv::boxFilter( src, dst, dst.depth(), cv::Size(param1, param2), cv::Point(-1,-1), - smooth_type == CV_BLUR, cv::BORDER_REPLICATE ); - else if( smooth_type == CV_GAUSSIAN ) - cv::GaussianBlur( src, dst, cv::Size(param1, param2), param3, param4, cv::BORDER_REPLICATE ); - else if( smooth_type == CV_MEDIAN ) - cv::medianBlur( src, dst, param1 ); - else - cv::bilateralFilter( src, dst, param1, param3, param4, cv::BORDER_REPLICATE ); - - if( dst.data != dst0.data ) - CV_Error( CV_StsUnmatchedFormats, "The destination image does not have the proper type" ); -} - -/* End of file. */ +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/java/generator/android/java/org/opencv/android/BaseLoaderCallback.java b/modules/java/generator/android/java/org/opencv/android/BaseLoaderCallback.java index 0b8aeedc6a..8ece662514 100644 --- a/modules/java/generator/android/java/org/opencv/android/BaseLoaderCallback.java +++ b/modules/java/generator/android/java/org/opencv/android/BaseLoaderCallback.java @@ -137,5 +137,5 @@ public abstract class BaseLoaderCallback implements LoaderCallbackInterface { } protected Context mAppContext; - private final static String TAG = "OpenCVLoader/BaseLoaderCallback"; + private final static String TAG = "OCV/BaseLoaderCallback"; } diff --git a/modules/java/generator/src/cpp/Mat.cpp b/modules/java/generator/src/cpp/Mat.cpp index e222cb5904..1ae2aa6e8c 100644 --- a/modules/java/generator/src/cpp/Mat.cpp +++ b/modules/java/generator/src/cpp/Mat.cpp @@ -32,6 +32,18 @@ static void throwJavaException(JNIEnv *env, const std::exception *e, const char CV_UNUSED(method); // avoid "unused" warning } +// jint could be int or int32_t so casting jint* to int* in general wouldn't work +static std::vector convertJintArrayToVector(JNIEnv* env, jintArray in) { + std::vector out; + int len = env->GetArrayLength(in); + jint* inArray = env->GetIntArrayElements(in, 0); + for ( int i = 0; i < len; i++ ) { + out.push_back(inArray[i]); + } + env->ReleaseIntArrayElements(in, inArray, 0); + return out; +} + extern "C" { @@ -100,6 +112,30 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__III return 0; } +// +// Mat::Mat(int[] sizes, int type) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__I_3II + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__I_3II + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type) +{ + static const char method_name[] = "Mat::n_1Mat__I_3II()"; + try { + LOGD("%s", method_name); + std::vector sizes = convertJintArrayToVector(env, sizesArray); + return (jlong) new Mat( ndims, sizes.data(), type ); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + // @@ -182,6 +218,33 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__DDIDDDD +// +// Mat::Mat(int[] sizes, int type, Scalar s) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__I_3IIDDDD + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__I_3IIDDDD + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3) +{ + static const char method_name[] = "Mat::n_1Mat__I_3IIDDDD()"; + try { + LOGD("%s", method_name); + std::vector sizes = convertJintArrayToVector(env, sizesArray); + Scalar s(s_val0, s_val1, s_val2, s_val3); + return (jlong) new Mat( ndims, sizes.data(), type, s ); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + + + // // Mat::Mat(Mat m, Range rowRange, Range colRange = Range::all()) // @@ -207,6 +270,59 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JIIII return 0; } +jint getObjectIntField(JNIEnv* env, jobject obj, const char * fieldName); + +jint getObjectIntField(JNIEnv* env, jobject obj, const char * fieldName) { + jfieldID fid; /* store the field ID */ + + /* Get a reference to obj's class */ + jclass cls = env->GetObjectClass(obj); + + /* Look for the instance field s in cls */ + fid = env->GetFieldID(cls, fieldName, "I"); + if (fid == NULL) + { + return 0; /* failed to find the field */ + } + + /* Read the instance field s */ + return env->GetIntField(obj, fid); +} + +#define RANGE_START_FIELD "start" +#define RANGE_END_FIELD "end" + +// +// Mat::Mat(Mat m, Range[] ranges) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__J_3Lorg_opencv_core_Range_2 + (JNIEnv* env, jclass, jlong m_nativeObj, jobjectArray rangesArray); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__J_3Lorg_opencv_core_Range_2 + (JNIEnv* env, jclass, jlong m_nativeObj, jobjectArray rangesArray) +{ + static const char method_name[] = "Mat::n_1Mat__J_3Lorg_opencv_core_Range_2()"; + try { + LOGD("%s", method_name); + std::vector ranges; + int rangeCount = env->GetArrayLength(rangesArray); + for (int i = 0; i < rangeCount; i++) { + jobject range = env->GetObjectArrayElement(rangesArray, i); + jint start = getObjectIntField(env, range, RANGE_START_FIELD); + jint end = getObjectIntField(env, range, RANGE_END_FIELD); + ranges.push_back(Range(start, end)); + } + return (jlong) new Mat( (*(Mat*)m_nativeObj), ranges ); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JII (JNIEnv* env, jclass, jlong m_nativeObj, jint rowRange_start, jint rowRange_end); @@ -718,6 +834,56 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JDDI +// +// void Mat::create(int[] sizes, int type) +// + +JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JI_3II + (JNIEnv* env, jclass, jlong self, jint ndims, jintArray sizesArray, jint type); + +JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JI_3II + (JNIEnv* env, jclass, jlong self, jint ndims, jintArray sizesArray, jint type) +{ + static const char method_name[] = "Mat::n_1create__JI_3II()"; + try { + LOGD("%s", method_name); + Mat* me = (Mat*) self; + std::vector sizes = convertJintArrayToVector(env, sizesArray); + me->create( ndims, sizes.data(), type ); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } +} + + + +// +// Mat Mat::copySize(Mat m) +// + +JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copySize + (JNIEnv* env, jclass, jlong self, jlong m_nativeObj); + +JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copySize + (JNIEnv* env, jclass, jlong self, jlong m_nativeObj) +{ + static const char method_name[] = "Mat::n_1copySize()"; + try { + LOGD("%s", method_name); + Mat* me = (Mat*) self; + Mat& m = *((Mat*)m_nativeObj); + me->copySize( m ); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } +} + + + // // Mat Mat::cross(Mat m) // @@ -1234,6 +1400,33 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__DDI +// +// static Mat Mat::ones(int[] sizes, int type) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__I_3II + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__I_3II + (JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type) +{ + static const char method_name[] = "Mat::n_1ones__I_3II()"; + try { + LOGD("%s", method_name); + std::vector sizes = convertJintArrayToVector(env, sizesArray); + Mat _retval_ = Mat::ones( ndims, sizes.data(), type ); + return (jlong) new Mat(_retval_); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + + + // // void Mat::push_back(Mat m) // @@ -1344,8 +1537,8 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1reshape_11 try { LOGD("%s", method_name); Mat* me = (Mat*) self; //TODO: check for NULL - int* newsz = (int*)env->GetPrimitiveArrayCritical(newshape, 0); - Mat _retval_ = me->reshape( cn, newndims, newsz ); + std::vector newsz = convertJintArrayToVector(env, newshape); + Mat _retval_ = me->reshape( cn, newndims, newsz.data() ); return (jlong) new Mat(_retval_); } catch(const std::exception &e) { throwJavaException(env, &e, method_name); @@ -1649,6 +1842,39 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat_1rr return 0; } +// +// Mat Mat::operator()(Range[] ranges) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat_1ranges +(JNIEnv* env, jclass, jlong self, jobjectArray rangesArray); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat_1ranges +(JNIEnv* env, jclass, jlong self, jobjectArray rangesArray) +{ + static const char method_name[] = "Mat::n_1submat_1ranges()"; + try { + LOGD("%s", method_name); + Mat* me = (Mat*) self; + std::vector ranges; + int rangeCount = env->GetArrayLength(rangesArray); + for (int i = 0; i < rangeCount; i++) { + jobject range = env->GetObjectArrayElement(rangesArray, i); + jint start = getObjectIntField(env, range, RANGE_START_FIELD); + jint end = getObjectIntField(env, range, RANGE_END_FIELD); + ranges.push_back(Range(start, end)); + } + Mat _retval_ = me->operator()( ranges ); + return (jlong) new Mat(_retval_); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + // @@ -1811,6 +2037,33 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__DDI +// +// static Mat Mat::zeros(int[] sizes, int type) +// + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__I_3II +(JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type); + +JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__I_3II +(JNIEnv* env, jclass, jint ndims, jintArray sizesArray, jint type) +{ + static const char method_name[] = "Mat::n_1zeros__I_3II()"; + try { + LOGD("%s", method_name); + std::vector sizes = convertJintArrayToVector(env, sizesArray); + Mat _retval_ = Mat::zeros( ndims, sizes.data(), type ); + return (jlong) new Mat(_retval_); + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + + + // // native support for java finalize() // static void Mat::n_delete( __int64 self ) @@ -1880,6 +2133,50 @@ template static int mat_put(cv::Mat* m, int row, int col, int count, return res; } +// returns true if final index was reached +static bool updateIdx(cv::Mat* m, std::vector& idx, int inc) { + for (int i=m->dims-1; i>=0; i--) { + if (inc == 0) return false; + idx[i] = (idx[i] + 1) % m->size[i]; + inc--; + } + return true; +} + +template static int mat_put_idx(cv::Mat* m, std::vector& idx, int count, int offset, char* buff) +{ + if(! m) return 0; + if(! buff) return 0; + + count *= sizeof(T); + int rest = (int)m->elemSize(); + for (int i = 0; i < m->dims; i++) { + rest *= (m->size[i] - idx[i]); + } + if(count>rest) count = rest; + int res = count; + + if( m->isContinuous() ) + { + memcpy(m->ptr(idx.data()), buff + offset, count); + } else { + // dim by dim + int num = (m->size[m->dims-1] - idx[m->dims-1]) * (int)m->elemSize(); // 1st partial row + if(countptr(idx.data()); + while(count>0){ + memcpy(data, buff + offset, num); + updateIdx(m, idx, num / (int)m->elemSize()); + count -= num; + buff += num; + num = m->size[m->dims-1] * (int)m->elemSize(); + if(countptr(idx.data()); + } + } + return res; +} + template static jint java_mat_put(JNIEnv* env, jlong self, jint row, jint col, jint count, jint offset, ARRAY vals) { static const char *method_name = JavaOpenCVTrait::put; @@ -1903,6 +2200,31 @@ template static jint java_mat_put(JNIEnv* env, jlong self, jint row return 0; } +template static jint java_mat_put_idx(JNIEnv* env, jlong self, jintArray idxArray, jint count, jint offset, ARRAY vals) +{ + static const char *method_name = JavaOpenCVTrait::put; + try { + LOGD("%s", method_name); + cv::Mat* me = (cv::Mat*) self; + if(! self) return 0; // no native object behind + if(me->depth() != JavaOpenCVTrait::cvtype_1 && me->depth() != JavaOpenCVTrait::cvtype_2) return 0; // incompatible type + std::vector idx = convertJintArrayToVector(env, idxArray); + for (int i = 0; i < me->dims ; i++ ) { + if (me->size[i]<=idx[i]) return 0; + } + char* values = (char*)env->GetPrimitiveArrayCritical(vals, 0); + int res = mat_put_idx::value_type>(me, idx, count, offset, values); + env->ReleasePrimitiveArrayCritical(vals, values, JNI_ABORT); + return res; + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + extern "C" { JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutB @@ -1914,6 +2236,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutB return java_mat_put(env, self, row, col, count, 0, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jbyteArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jbyteArray vals) +{ + return java_mat_put_idx(env, self, idxArray, count, 0, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBwOffset (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jint offset, jbyteArray vals); @@ -1923,6 +2254,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBwOffset return java_mat_put(env, self, row, col, count, offset, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBwIdxOffset + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jint offset, jbyteArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutBwIdxOffset + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jint offset, jbyteArray vals) +{ + return java_mat_put_idx(env, self, idxArray, count, offset, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jshortArray vals); @@ -1932,6 +2272,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS return java_mat_put(env, self, row, col, count, 0, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutSIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jshortArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutSIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jshortArray vals) +{ + return java_mat_put_idx(env, self, idxArray, count, 0, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jintArray vals); @@ -1941,6 +2290,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI return java_mat_put(env, self, row, col, count, 0, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutIIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jintArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutIIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jintArray vals) +{ + return java_mat_put_idx(env, self, idxArray, count, 0, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jfloatArray vals); @@ -1950,6 +2308,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF return java_mat_put(env, self, row, col, count, 0, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutFIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jfloatArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutFIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jfloatArray vals) +{ + return java_mat_put_idx(env, self, idxArray, count, 0, vals); +} + // unlike other nPut()-s this one (with double[]) should convert input values to correct type #define PUT_ITEM(T, R, C) { T*dst = (T*)me->ptr(R, C); for(int ch=0; chchannels() && count>0; count--,ch++,src++,dst++) *dst = cv::saturate_cast(*src); } @@ -2010,6 +2377,56 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutD return 0; } +// unlike other nPut()-s this one (with double[]) should convert input values to correct type +#define PUT_ITEM_IDX(T, I) { T*dst = (T*)me->ptr(I); for(int ch=0; chchannels() && count>0; count--,ch++,src++,dst++) *dst = cv::saturate_cast(*src); } + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutDIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jdoubleArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutDIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jdoubleArray vals) +{ + static const char* method_name = JavaOpenCVTrait::put; + try { + LOGD("%s", method_name); + cv::Mat* me = (cv::Mat*) self; + if(!me || !me->data) return 0; // no native object behind + std::vector idx = convertJintArrayToVector(env, idxArray); + for (int i=0; idims; i++) { + if (me->size[i]<=idx[i]) return 0; // indexes out of range + } + int rest = me->channels(); + for (int i=0; idims; i++) { + rest *= (me->size[i] - idx[i]); + } + if(count>rest) count = rest; + int res = count; + double* values = (double*)env->GetPrimitiveArrayCritical(vals, 0); + double* src = values; + bool reachedFinalIndex = false; + for(; !reachedFinalIndex && count>0; reachedFinalIndex = updateIdx(me, idx, 1)) + { + switch(me->depth()) { + case CV_8U: PUT_ITEM_IDX(uchar, idx.data()); break; + case CV_8S: PUT_ITEM_IDX(schar, idx.data()); break; + case CV_16U: PUT_ITEM_IDX(ushort, idx.data()); break; + case CV_16S: PUT_ITEM_IDX(short, idx.data()); break; + case CV_32S: PUT_ITEM_IDX(int, idx.data()); break; + case CV_32F: PUT_ITEM_IDX(float, idx.data()); break; + case CV_64F: PUT_ITEM_IDX(double, idx.data()); break; + } + } + env->ReleasePrimitiveArrayCritical(vals, values, 0); + return res; + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + } // extern "C" template static int mat_get(cv::Mat* m, int row, int col, int count, char* buff) @@ -2042,6 +2459,40 @@ template static int mat_get(cv::Mat* m, int row, int col, int count, return res; } +template static int mat_get_idx(cv::Mat* m, std::vector& idx, int count, char* buff) +{ + if(! m) return 0; + if(! buff) return 0; + + count *= sizeof(T); + int rest = (int)m->elemSize(); + for (int i = 0; i < m->dims; i++) { + rest *= (m->size[i] - idx[i]); + } + if(count>rest) count = rest; + int res = count; + + if( m->isContinuous() ) + { + memcpy(buff, m->ptr(idx.data()), count); + } else { + // dim by dim + int num = (m->size[m->dims-1] - idx[m->dims-1]) * (int)m->elemSize(); // 1st partial row + if(countptr(idx.data()); + while(count>0){ + memcpy(buff, data, num); + updateIdx(m, idx, num / (int)m->elemSize()); + count -= num; + buff += num; + num = m->size[m->dims-1] * (int)m->elemSize(); + if(countptr(idx.data()); + } + } + return res; +} + template static jint java_mat_get(JNIEnv* env, jlong self, jint row, jint col, jint count, ARRAY vals) { static const char *method_name = JavaOpenCVTrait::get; try { @@ -2064,6 +2515,31 @@ template static jint java_mat_get(JNIEnv* env, jlong self, jint row return 0; } +template static jint java_mat_get_idx(JNIEnv* env, jlong self, jintArray idxArray, jint count, ARRAY vals) { + static const char *method_name = JavaOpenCVTrait::get; + try { + LOGD("%s", method_name); + cv::Mat* me = (cv::Mat*) self; + if(! self) return 0; // no native object behind + if(me->depth() != JavaOpenCVTrait::cvtype_1 && me->depth() != JavaOpenCVTrait::cvtype_2) return 0; // incompatible type + std::vector idx = convertJintArrayToVector(env, idxArray); + for (int i = 0; i < me->dims ; i++ ) { + if (me->size[i]<=idx[i]) return 0; + } + + char* values = (char*)env->GetPrimitiveArrayCritical(vals, 0); + int res = mat_get_idx::value_type>(me, idx, count, values); + env->ReleasePrimitiveArrayCritical(vals, values, 0); + return res; + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + extern "C" { JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetB @@ -2075,6 +2551,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetB return java_mat_get(env, self, row, col, count, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetBIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jbyteArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetBIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jbyteArray vals) +{ + return java_mat_get_idx(env, self, idxArray, count, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jshortArray vals); @@ -2084,6 +2569,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS return java_mat_get(env, self, row, col, count, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetSIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jshortArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetSIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jshortArray vals) +{ + return java_mat_get_idx(env, self, idxArray, count, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jintArray vals); @@ -2093,6 +2587,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI return java_mat_get(env, self, row, col, count, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetIIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jintArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetIIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jintArray vals) +{ + return java_mat_get_idx(env, self, idxArray, count, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jfloatArray vals); @@ -2102,6 +2605,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF return java_mat_get(env, self, row, col, count, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetFIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jfloatArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetFIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jfloatArray vals) +{ + return java_mat_get_idx(env, self, idxArray, count, vals); +} + JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jdoubleArray vals); @@ -2111,6 +2623,15 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD return java_mat_get(env, self, row, col, count, vals); } +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetDIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jdoubleArray vals); + +JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetDIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray, jint count, jdoubleArray vals) +{ + return java_mat_get_idx(env, self, idxArray, count, vals); +} + JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet (JNIEnv* env, jclass, jlong self, jint row, jint col); @@ -2149,6 +2670,47 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet return 0; } +JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGetIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray); + +JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGetIdx + (JNIEnv* env, jclass, jlong self, jintArray idxArray) +{ + static const char method_name[] = "Mat::nGetIdx()"; + try { + LOGD("%s", method_name); + cv::Mat* me = (cv::Mat*) self; + if(! self) return 0; // no native object behind + std::vector idx = convertJintArrayToVector(env, idxArray); + for (int i=0; idims; i++) { + if (me->size[i]<=idx[i]) return 0; // indexes out of range + } + + jdoubleArray res = env->NewDoubleArray(me->channels()); + if(res){ + jdouble buff[CV_CN_MAX];//me->channels() + int i; + switch(me->depth()){ + case CV_8U: for(i=0; ichannels(); i++) buff[i] = *((unsigned char*) me->ptr(idx.data()) + i); break; + case CV_8S: for(i=0; ichannels(); i++) buff[i] = *((signed char*) me->ptr(idx.data()) + i); break; + case CV_16U: for(i=0; ichannels(); i++) buff[i] = *((unsigned short*)me->ptr(idx.data()) + i); break; + case CV_16S: for(i=0; ichannels(); i++) buff[i] = *((signed short*) me->ptr(idx.data()) + i); break; + case CV_32S: for(i=0; ichannels(); i++) buff[i] = *((int*) me->ptr(idx.data()) + i); break; + case CV_32F: for(i=0; ichannels(); i++) buff[i] = *((float*) me->ptr(idx.data()) + i); break; + case CV_64F: for(i=0; ichannels(); i++) buff[i] = *((double*) me->ptr(idx.data()) + i); break; + } + env->SetDoubleArrayRegion(res, 0, me->channels(), buff); + } + return res; + } catch(const std::exception &e) { + throwJavaException(env, &e, method_name); + } catch (...) { + throwJavaException(env, 0, method_name); + } + + return 0; +} + JNIEXPORT jstring JNICALL Java_org_opencv_core_Mat_nDump (JNIEnv *env, jclass, jlong self); diff --git a/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java index c3af0b343b..cc7eb9dca7 100644 --- a/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java +++ b/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java @@ -99,6 +99,8 @@ public class OpenCVTestCase extends TestCase { protected Mat rgbLena; protected Mat grayChess; + protected Mat gray255_32f_3d; + protected Mat v1; protected Mat v2; @@ -149,6 +151,8 @@ public class OpenCVTestCase extends TestCase { rgbLena = Imgcodecs.imread(OpenCVTestRunner.LENA_PATH); grayChess = Imgcodecs.imread(OpenCVTestRunner.CHESS_PATH, 0); + gray255_32f_3d = new Mat(new int[]{matSize, matSize, matSize}, CvType.CV_32F, new Scalar(255.0)); + v1 = new Mat(1, 3, CvType.CV_32F); v1.put(0, 0, 1.0, 3.0, 2.0); v2 = new Mat(1, 3, CvType.CV_32F); @@ -184,6 +188,7 @@ public class OpenCVTestCase extends TestCase { rgba128.release(); rgbLena.release(); grayChess.release(); + gray255_32f_3d.release(); v1.release(); v2.release(); @@ -442,8 +447,24 @@ public class OpenCVTestCase extends TestCase { assertEquals(msg, expected.z, actual.z, eps); } + static private boolean dimensionsEqual(Mat expected, Mat actual) { + if (expected.dims() != actual.dims()) { + return false; + } + if (expected.dims() > 2) { + for (int i = 0; i < expected.dims(); i++) { + if (expected.size(i) != actual.size(i)) { + return false; + } + } + return true; + } else { + return expected.cols() == actual.cols() && expected.rows() == actual.rows(); + } + } + static private void compareMats(Mat expected, Mat actual, boolean isEqualityMeasured) { - if (expected.type() != actual.type() || expected.cols() != actual.cols() || expected.rows() != actual.rows()) { + if (expected.type() != actual.type() || !dimensionsEqual(expected, actual)) { throw new UnsupportedOperationException("Can not compare " + expected + " and " + actual); } @@ -471,7 +492,7 @@ public class OpenCVTestCase extends TestCase { } static private void compareMats(Mat expected, Mat actual, double eps, boolean isEqualityMeasured) { - if (expected.type() != actual.type() || expected.cols() != actual.cols() || expected.rows() != actual.rows()) { + if (expected.type() != actual.type() || !dimensionsEqual(expected, actual)) { throw new UnsupportedOperationException("Can not compare " + expected + " and " + actual); } diff --git a/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java index a66206e223..7dc3432add 100644 --- a/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java +++ b/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java @@ -97,6 +97,8 @@ public class OpenCVTestCase extends TestCase { protected Mat rgbLena; protected Mat grayChess; + protected Mat gray255_32f_3d; + protected Mat v1; protected Mat v2; @@ -175,6 +177,8 @@ public class OpenCVTestCase extends TestCase { rgbLena = Imgcodecs.imread(OpenCVTestRunner.LENA_PATH); grayChess = Imgcodecs.imread(OpenCVTestRunner.CHESS_PATH, 0); + gray255_32f_3d = new Mat(new int[]{matSize, matSize, matSize}, CvType.CV_32F, new Scalar(255.0)); + v1 = new Mat(1, 3, CvType.CV_32F); v1.put(0, 0, 1.0, 3.0, 2.0); v2 = new Mat(1, 3, CvType.CV_32F); @@ -210,6 +214,7 @@ public class OpenCVTestCase extends TestCase { rgba128.release(); rgbLena.release(); grayChess.release(); + gray255_32f_3d.release(); v1.release(); v2.release(); @@ -468,8 +473,24 @@ public class OpenCVTestCase extends TestCase { assertEquals(msg, expected.z, actual.z, eps); } + static private boolean dimensionsEqual(Mat expected, Mat actual) { + if (expected.dims() != actual.dims()) { + return false; + } + if (expected.dims() > 2) { + for (int i = 0; i < expected.dims(); i++) { + if (expected.size(i) != actual.size(i)) { + return false; + } + } + return true; + } else { + return expected.cols() == actual.cols() && expected.rows() == actual.rows(); + } + } + static private void compareMats(Mat expected, Mat actual, boolean isEqualityMeasured) { - if (expected.type() != actual.type() || expected.cols() != actual.cols() || expected.rows() != actual.rows()) { + if (expected.type() != actual.type() || !dimensionsEqual(expected, actual)) { throw new UnsupportedOperationException("Can not compare " + expected + " and " + actual); } @@ -497,7 +518,7 @@ public class OpenCVTestCase extends TestCase { } static private void compareMats(Mat expected, Mat actual, double eps, boolean isEqualityMeasured) { - if (expected.type() != actual.type() || expected.cols() != actual.cols() || expected.rows() != actual.rows()) { + if (expected.type() != actual.type() || !dimensionsEqual(expected, actual)) { throw new UnsupportedOperationException("Can not compare " + expected + " and " + actual); } diff --git a/modules/video/perf/perf_optflowpyrlk.cpp b/modules/video/perf/perf_optflowpyrlk.cpp index ca16204384..aca8a1399b 100644 --- a/modules/video/perf/perf_optflowpyrlk.cpp +++ b/modules/video/perf/perf_optflowpyrlk.cpp @@ -95,7 +95,7 @@ PERF_TEST_P(Path_Idx_Cn_NPoints_WSize, OpticalFlowPyrLK_full, testing::Combine( typedef tuple, int> Path_Idx_NPoints_WSize_t; typedef TestBaseWithParam Path_Idx_NPoints_WSize; -PERF_TEST_P(Path_Idx_NPoints_WSize, OpticalFlowPyrLK_ovx, testing::Combine( +PERF_TEST_P(Path_Idx_NPoints_WSize, DISABLED_OpticalFlowPyrLK_ovx, testing::Combine( testing::Values("cv/optflow/frames/VGA_%02d.png", "cv/optflow/frames/720p_%02d.png"), testing::Range(1, 3), testing::Values(make_tuple(9, 9), make_tuple(15, 15)), diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index 6c7dd76f10..4378171b3c 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -1590,6 +1590,10 @@ void handleMessage(GstElement * pipeline) while(gst_bus_have_pending(bus)) { msg = gst_bus_pop(bus); + if (!msg || !GST_IS_MESSAGE(msg)) + { + continue; + } if(gst_is_missing_plugin_message(msg)) {