mirror of
https://github.com/opencv/opencv.git
synced 2025-06-13 04:52:53 +08:00
Merge pull request #8694 from mschoeneck:Canny
Parallelize Canny with custom gradient (#8694) * New Canny implementation. Restructuring code in parallelCanny class. Align mag buffer and map. * Fix warnings. * Missing SIMD check added. * Replaced local trailingZeros in contours.cpp. Use alignSize in canny.cpp * Fix warnings in alignSize and allocate just minimum extra columns. * Fix another warning in map.create. * Exchange for loop by do loop to avoid double check at the beginning. Define extra SIMD CANNY_CHECK to avoid unnecessary continue.
This commit is contained in:
parent
2e056fbe8a
commit
4a4d94f266
@ -433,6 +433,29 @@ template <> struct V_RegTrait128<double> {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
inline unsigned int trailingZeros32(unsigned int value) {
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#if (_MSC_VER < 1700)
|
||||||
|
unsigned long index = 0;
|
||||||
|
_BitScanForward(&index, value);
|
||||||
|
return (unsigned int)index;
|
||||||
|
#else
|
||||||
|
return _tzcnt_u32(value);
|
||||||
|
#endif
|
||||||
|
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||||
|
return __builtin_ctz(value);
|
||||||
|
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
||||||
|
return _bit_scan_forward(value);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return llvm.cttz.i32(value, true);
|
||||||
|
#else
|
||||||
|
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||||
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||||
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||||
|
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef CV_DOXYGEN
|
#ifndef CV_DOXYGEN
|
||||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -39,6 +39,7 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin.hpp"
|
||||||
|
|
||||||
/* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */
|
/* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */
|
||||||
#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \
|
#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \
|
||||||
@ -50,33 +51,6 @@
|
|||||||
static const CvPoint icvCodeDeltas[8] =
|
static const CvPoint icvCodeDeltas[8] =
|
||||||
{ CvPoint(1, 0), CvPoint(1, -1), CvPoint(0, -1), CvPoint(-1, -1), CvPoint(-1, 0), CvPoint(-1, 1), CvPoint(0, 1), CvPoint(1, 1) };
|
{ CvPoint(1, 0), CvPoint(1, -1), CvPoint(0, -1), CvPoint(-1, -1), CvPoint(-1, 0), CvPoint(-1, 1), CvPoint(0, 1), CvPoint(1, 1) };
|
||||||
|
|
||||||
#if CV_SSE2
|
|
||||||
static
|
|
||||||
inline unsigned int trailingZeros(unsigned int value) {
|
|
||||||
CV_DbgAssert(value != 0); // undefined for zero input (https://en.wikipedia.org/wiki/Find_first_set)
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#if (_MSC_VER < 1700)
|
|
||||||
unsigned long index = 0;
|
|
||||||
_BitScanForward(&index, value);
|
|
||||||
return (unsigned int)index;
|
|
||||||
#else
|
|
||||||
return _tzcnt_u32(value);
|
|
||||||
#endif
|
|
||||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
|
||||||
return __builtin_ctz(value);
|
|
||||||
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
|
||||||
return _bit_scan_forward(value);
|
|
||||||
#elif defined(__clang__)
|
|
||||||
return llvm.cttz.i32(value, true);
|
|
||||||
#else
|
|
||||||
static const int MultiplyDeBruijnBitPosition[32] = {
|
|
||||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
|
||||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
|
||||||
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
CV_IMPL void
|
CV_IMPL void
|
||||||
cvStartReadChainPoints( CvChain * chain, CvChainPtReader * reader )
|
cvStartReadChainPoints( CvChain * chain, CvChainPtReader * reader )
|
||||||
{
|
{
|
||||||
@ -1097,12 +1071,12 @@ cvFindNextContour( CvContourScanner scanner )
|
|||||||
mask2 ^= 0x0000ffff;
|
mask2 ^= 0x0000ffff;
|
||||||
|
|
||||||
if (mask1) {
|
if (mask1) {
|
||||||
p = img[(x += trailingZeros(mask1))];
|
p = img[(x += cv::trailingZeros32(mask1))];
|
||||||
goto _next_contour;
|
goto _next_contour;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mask2) {
|
if (mask2) {
|
||||||
p = img[(x += trailingZeros(mask2 << 16))];
|
p = img[(x += cv::trailingZeros32(mask2 << 16))];
|
||||||
goto _next_contour;
|
goto _next_contour;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1113,7 +1087,7 @@ cvFindNextContour( CvContourScanner scanner )
|
|||||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
|
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
|
||||||
|
|
||||||
if (mask) {
|
if (mask) {
|
||||||
p = img[(x += trailingZeros(mask))];
|
p = img[(x += cv::trailingZeros32(mask))];
|
||||||
goto _next_contour;
|
goto _next_contour;
|
||||||
}
|
}
|
||||||
x += 16;
|
x += 16;
|
||||||
@ -1394,12 +1368,12 @@ inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool h
|
|||||||
mask2 ^= 0x0000ffff;
|
mask2 ^= 0x0000ffff;
|
||||||
|
|
||||||
if (mask1) {
|
if (mask1) {
|
||||||
j += trailingZeros(mask1);
|
j += cv::trailingZeros32(mask1);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mask2) {
|
if (mask2) {
|
||||||
j += trailingZeros(mask2 << 16);
|
j += cv::trailingZeros32(mask2 << 16);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1410,7 +1384,7 @@ inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool h
|
|||||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero)) ^ 0x0000ffff;
|
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero)) ^ 0x0000ffff;
|
||||||
|
|
||||||
if (mask) {
|
if (mask) {
|
||||||
j += trailingZeros(mask);
|
j += cv::trailingZeros32(mask);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
j += 16;
|
j += 16;
|
||||||
@ -1443,12 +1417,12 @@ inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool hav
|
|||||||
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
|
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
|
||||||
|
|
||||||
if (mask1) {
|
if (mask1) {
|
||||||
j += trailingZeros(mask1);
|
j += cv::trailingZeros32(mask1);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mask2) {
|
if (mask2) {
|
||||||
j += trailingZeros(mask2 << 16);
|
j += cv::trailingZeros32(mask2 << 16);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1459,7 +1433,7 @@ inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool hav
|
|||||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero));
|
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero));
|
||||||
|
|
||||||
if (mask) {
|
if (mask) {
|
||||||
j += trailingZeros(mask);
|
j += cv::trailingZeros32(mask);
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
j += 16;
|
j += 16;
|
||||||
|
Loading…
Reference in New Issue
Block a user