mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 22:00:25 +08:00
Improves findContours using SSE _mm_movemask_epi8 to find next contour point. Cleaned up code a little bit
This commit is contained in:
parent
d102ea96c0
commit
8af8c4d0aa
@ -50,6 +50,21 @@
|
||||
static const CvPoint icvCodeDeltas[8] =
|
||||
{ CvPoint(1, 0), CvPoint(1, -1), CvPoint(0, -1), CvPoint(-1, -1), CvPoint(-1, 0), CvPoint(-1, 1), CvPoint(0, 1), CvPoint(1, 1) };
|
||||
|
||||
inline unsigned int trailingZeros(unsigned int value) {
|
||||
#if defined(_MSC_VER)
|
||||
return _tzcnt_u32(value);
|
||||
#elif defined(_GCC)
|
||||
return __builtin_ctz(value);
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
//return _bit_scan_reverse(value);
|
||||
#else
|
||||
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
CV_IMPL void
|
||||
cvStartReadChainPoints( CvChain * chain, CvChainPtReader * reader )
|
||||
{
|
||||
@ -527,10 +542,8 @@ icvFetchContour( schar *ptr,
|
||||
{
|
||||
s = (s - 1) & 7;
|
||||
i1 = i0 + deltas[s];
|
||||
if( *i1 != 0 )
|
||||
break;
|
||||
}
|
||||
while( s != s_end );
|
||||
while( *i1 == 0 && s != s_end );
|
||||
|
||||
if( s == s_end ) /* single pixel domain */
|
||||
{
|
||||
@ -631,10 +644,8 @@ icvTraceContour( schar *ptr, int step, schar *stop_ptr, int is_hole )
|
||||
{
|
||||
s = (s - 1) & 7;
|
||||
i1 = i0 + deltas[s];
|
||||
if( *i1 != 0 )
|
||||
break;
|
||||
}
|
||||
while( s != s_end );
|
||||
while( *i1 == 0 && s != s_end );
|
||||
|
||||
i3 = i0;
|
||||
|
||||
@ -644,7 +655,6 @@ icvTraceContour( schar *ptr, int step, schar *stop_ptr, int is_hole )
|
||||
/* follow border */
|
||||
for( ;; )
|
||||
{
|
||||
s_end = s;
|
||||
|
||||
for( ;; )
|
||||
{
|
||||
@ -702,10 +712,8 @@ icvFetchContourEx( schar* ptr,
|
||||
{
|
||||
s = (s - 1) & 7;
|
||||
i1 = i0 + deltas[s];
|
||||
if( *i1 != 0 )
|
||||
break;
|
||||
}
|
||||
while( s != s_end );
|
||||
while( *i1 == 0 && s != s_end );
|
||||
|
||||
if( s == s_end ) /* single pixel domain */
|
||||
{
|
||||
@ -817,10 +825,8 @@ icvTraceContour_32s( int *ptr, int step, int *stop_ptr, int is_hole )
|
||||
{
|
||||
s = (s - 1) & 7;
|
||||
i1 = i0 + deltas[s];
|
||||
if( (*i1 & value_mask) == ccomp_val )
|
||||
break;
|
||||
}
|
||||
while( s != s_end );
|
||||
while( (*i1 & value_mask) != ccomp_val && s != s_end );
|
||||
|
||||
i3 = i0;
|
||||
|
||||
@ -892,10 +898,8 @@ icvFetchContourEx_32s( int* ptr,
|
||||
{
|
||||
s = (s - 1) & 7;
|
||||
i1 = i0 + deltas[s];
|
||||
if( (*i1 & value_mask) == ccomp_val )
|
||||
break;
|
||||
}
|
||||
while( s != s_end );
|
||||
while( (*i1 & value_mask) != ccomp_val && s != s_end );
|
||||
|
||||
if( s == s_end ) /* single pixel domain */
|
||||
{
|
||||
@ -990,6 +994,11 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
{
|
||||
if( !scanner )
|
||||
CV_Error( CV_StsNullPtr, "" );
|
||||
|
||||
#if CV_SSE2
|
||||
bool haveSSE2 = cv::checkHardwareSupport(CPU_SSE2);
|
||||
#endif
|
||||
|
||||
icvEndProcessContour( scanner );
|
||||
|
||||
/* initialize local state */
|
||||
@ -1034,13 +1043,60 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CV_SSE2
|
||||
if ((p = img[x]) != prev) {
|
||||
goto _next_contour;
|
||||
} else if (haveSSE2) {
|
||||
|
||||
__m128i v_prev = _mm_set1_epi8(prev);
|
||||
int v_size = width - 32;
|
||||
|
||||
for (; x <= v_size; x += 32) {
|
||||
__m128i v_p1 = _mm_loadu_si128((__m128i*)(img + x));
|
||||
__m128i v_p2 = _mm_loadu_si128((__m128i*)(img + x + 16));
|
||||
|
||||
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_prev);
|
||||
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_prev);
|
||||
|
||||
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
|
||||
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
|
||||
|
||||
mask1 ^= 0x0000ffff;
|
||||
mask2 ^= 0x0000ffff;
|
||||
|
||||
if (mask1) {
|
||||
p = img[(x += trailingZeros(mask1))];
|
||||
goto _next_contour;
|
||||
}
|
||||
|
||||
if (mask2) {
|
||||
p = img[(x += trailingZeros(mask2 << 16))];
|
||||
goto _next_contour;
|
||||
}
|
||||
}
|
||||
|
||||
if(x <= width - 16) {
|
||||
__m128i v_p = _mm_loadu_si128((__m128i*)(img + x));
|
||||
|
||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
|
||||
|
||||
if (mask) {
|
||||
p = img[(x += trailingZeros(mask))];
|
||||
goto _next_contour;
|
||||
}
|
||||
x += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x < width && (p = img[x]) == prev; x++ )
|
||||
;
|
||||
}
|
||||
|
||||
if( x >= width )
|
||||
break;
|
||||
|
||||
#if CV_SSE2
|
||||
_next_contour:
|
||||
#endif
|
||||
{
|
||||
_CvContourInfo *par_info = 0;
|
||||
_CvContourInfo *l_cinfo = 0;
|
||||
@ -1275,7 +1331,7 @@ cvEndFindContours( CvContourScanner * _scanner )
|
||||
#define ICV_SINGLE 0
|
||||
#define ICV_CONNECTING_ABOVE 1
|
||||
#define ICV_CONNECTING_BELOW -1
|
||||
#define ICV_IS_COMPONENT_POINT(val) ((val) != 0)
|
||||
//#define ICV_IS_COMPONENT_POINT(val) ((val) != 0)
|
||||
|
||||
#define CV_GET_WRITTEN_ELEM( writer ) ((writer).ptr - (writer).seq->elem_size)
|
||||
|
||||
@ -1287,6 +1343,108 @@ typedef struct CvLinkedRunPoint
|
||||
}
|
||||
CvLinkedRunPoint;
|
||||
|
||||
#if CV_SSE2
|
||||
inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSSE2) {
|
||||
#else
|
||||
inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j) {
|
||||
#endif
|
||||
#if CV_SSE2
|
||||
if (haveSSE2) {
|
||||
__m128i sseZero = _mm_setzero_si128();
|
||||
int sizeSse = img_size.width - 32;
|
||||
|
||||
for (; j <= sizeSse; j += 32) {
|
||||
__m128i sseP1 = _mm_loadu_si128((__m128i*)(src_data + j));
|
||||
__m128i sseP2 = _mm_loadu_si128((__m128i*)(src_data + j + 16));
|
||||
|
||||
__m128i sseCmp1 = _mm_cmpeq_epi8(sseP1, sseZero);
|
||||
__m128i sseCmp2 = _mm_cmpeq_epi8(sseP2, sseZero);
|
||||
|
||||
unsigned int mask1 = _mm_movemask_epi8(sseCmp1);
|
||||
unsigned int mask2 = _mm_movemask_epi8(sseCmp2);
|
||||
|
||||
mask1 ^= 0x0000ffff;
|
||||
mask2 ^= 0x0000ffff;
|
||||
|
||||
if (mask1) {
|
||||
j += trailingZeros(mask1);
|
||||
return j;
|
||||
}
|
||||
|
||||
if (mask2) {
|
||||
j += trailingZeros(mask2 << 16);
|
||||
return j;
|
||||
}
|
||||
}
|
||||
|
||||
if (j <= img_size.width - 16) {
|
||||
__m128i sseP = _mm_loadu_si128((__m128i*)(src_data + j));
|
||||
|
||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(sseP, sseZero)) ^ 0x0000ffff;
|
||||
|
||||
if (mask) {
|
||||
j += trailingZeros(mask);
|
||||
return j;
|
||||
}
|
||||
j += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; j < img_size.width && !src_data[j]; ++j)
|
||||
;
|
||||
return j;
|
||||
}
|
||||
|
||||
#if CV_SSE2
|
||||
inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSSE2) {
|
||||
#else
|
||||
inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j) {
|
||||
#endif
|
||||
#if CV_SSE2
|
||||
if (!src_data[j]) {
|
||||
return j;
|
||||
} else if (haveSSE2) {
|
||||
__m128i sseZero = _mm_setzero_si128();
|
||||
int sizeSse = img_size.width - 32;
|
||||
|
||||
for (; j <= sizeSse; j += 32) {
|
||||
__m128i sseP1 = _mm_loadu_si128((__m128i*)(src_data + j));
|
||||
__m128i sseP2 = _mm_loadu_si128((__m128i*)(src_data + j + 16));
|
||||
|
||||
__m128i sseCmp1 = _mm_cmpeq_epi8(sseP1, sseZero);
|
||||
__m128i sseCmp2 = _mm_cmpeq_epi8(sseP2, sseZero);
|
||||
|
||||
unsigned int mask1 = _mm_movemask_epi8(sseCmp1);
|
||||
unsigned int mask2 = _mm_movemask_epi8(sseCmp2);
|
||||
|
||||
if (mask1) {
|
||||
j += trailingZeros(mask1);
|
||||
return j;
|
||||
}
|
||||
|
||||
if (mask2) {
|
||||
j += trailingZeros(mask2 << 16);
|
||||
return j;
|
||||
}
|
||||
}
|
||||
|
||||
if (j <= img_size.width - 16) {
|
||||
__m128i sseP = _mm_loadu_si128((__m128i*)(src_data + j));
|
||||
|
||||
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(sseP, sseZero));
|
||||
|
||||
if (mask) {
|
||||
j += trailingZeros(mask);
|
||||
return j;
|
||||
}
|
||||
j += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; j < img_size.width && src_data[j]; ++j)
|
||||
;
|
||||
return j;
|
||||
}
|
||||
|
||||
static int
|
||||
icvFindContoursInInterval( const CvArr* src,
|
||||
@ -1339,7 +1497,9 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
|
||||
if( contourHeaderSize < (int)sizeof(CvContour))
|
||||
CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" );
|
||||
|
||||
#if CV_SSE2
|
||||
bool haveSSE2 = cv::checkHardwareSupport(CPU_SSE2);
|
||||
#endif
|
||||
storage00.reset(cvCreateChildMemStorage(storage));
|
||||
storage01.reset(cvCreateChildMemStorage(storage));
|
||||
|
||||
@ -1372,8 +1532,11 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
tmp_prev = upper_line;
|
||||
for( j = 0; j < img_size.width; )
|
||||
{
|
||||
for( ; j < img_size.width && !ICV_IS_COMPONENT_POINT(src_data[j]); j++ )
|
||||
;
|
||||
#if CV_SSE2
|
||||
j = findStartContourPoint(src_data, img_size, j, haveSSE2);
|
||||
#else
|
||||
j = findStartContourPoint(src_data, img_size, j);
|
||||
#endif
|
||||
if( j == img_size.width )
|
||||
break;
|
||||
|
||||
@ -1381,10 +1544,11 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
CV_WRITE_SEQ_ELEM( tmp, writer );
|
||||
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
|
||||
tmp_prev = tmp_prev->next;
|
||||
|
||||
for( ; j < img_size.width && ICV_IS_COMPONENT_POINT(src_data[j]); j++ )
|
||||
;
|
||||
|
||||
#if CV_SSE2
|
||||
j = findEndContourPoint(src_data, img_size, j, haveSSE2);
|
||||
#else
|
||||
j = findEndContourPoint(src_data, img_size, j);
|
||||
#endif
|
||||
tmp.pt.x = j-1;
|
||||
CV_WRITE_SEQ_ELEM( tmp, writer );
|
||||
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
|
||||
@ -1407,18 +1571,22 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
all_total = runs->total;
|
||||
for( j = 0; j < img_size.width; )
|
||||
{
|
||||
for( ; j < img_size.width && !ICV_IS_COMPONENT_POINT(src_data[j]); j++ )
|
||||
;
|
||||
#if CV_SSE2
|
||||
j = findStartContourPoint(src_data, img_size, j, haveSSE2);
|
||||
#else
|
||||
j = findStartContourPoint(src_data, img_size, j);
|
||||
#endif
|
||||
if( j == img_size.width ) break;
|
||||
|
||||
tmp.pt.x = j;
|
||||
CV_WRITE_SEQ_ELEM( tmp, writer );
|
||||
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
|
||||
tmp_prev = tmp_prev->next;
|
||||
|
||||
for( ; j < img_size.width && ICV_IS_COMPONENT_POINT(src_data[j]); j++ )
|
||||
;
|
||||
|
||||
#if CV_SSE2
|
||||
j = findEndContourPoint(src_data, img_size, j, haveSSE2);
|
||||
#else
|
||||
j = findEndContourPoint(src_data, img_size, j);
|
||||
#endif
|
||||
tmp.pt.x = j-1;
|
||||
CV_WRITE_SEQ_ELEM( tmp, writer );
|
||||
tmp_prev = tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
|
||||
|
Loading…
Reference in New Issue
Block a user