opencv/3rdparty/carotene/src/canny.cpp

774 lines
26 KiB
C++

/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3-clause BSD License)
*
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
* Third party copyrights are property of their respective owners.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort (including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "common.hpp"
#include "saturate_cast.hpp"
#include <vector>
#include <cstring>
namespace CAROTENE_NS {
#ifdef CAROTENE_NEON
namespace {
struct RowFilter3x3Canny
{
inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
{
vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
lookLeft = offsetk - borderxl;
lookRight = offsetk - borderxr;
}
inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
{
uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
ptrdiff_t i = 0;
for (; i < width - 8 + lookRight; i += 8)
{
internal::prefetch(src + i);
uint8x8_t l18u = vld1_u8(src + i + 1);
uint8x8_t l2 = l18u;
uint8x8_t l0 = vext_u8(l, l18u, 6);
int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
l = l18u;
int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
int16x8_t ldy = vaddq_s16(l02, l1x2);
vst1q_s16(dstx + i, ldx);
vst1q_s16(dsty + i, ldy);
}
//tail
if (lookRight == 0 || i != width)
{
uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
vst1q_s16(dstx + (width - 8), taildx);
vst1q_s16(dsty + (width - 8), taildy);
}
}
uint8x8_t vfmask;
uint8x8_t vtmask;
enum { offsetk = 1};
ptrdiff_t lookLeft;
ptrdiff_t lookRight;
};
template <bool L2gradient>
inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
{
ptrdiff_t j = 0;
for (; j <= width - 8; j += 8)
{
ColFilter3x3CannyL1Loop:
int16x8_t line0x = vld1q_s16(src0 + j);
int16x8_t line1x = vld1q_s16(src1 + j);
int16x8_t line2x = vld1q_s16(src2 + j);
int16x8_t line0y = vld1q_s16(src0 + j + width);
int16x8_t line2y = vld1q_s16(src2 + j + width);
int16x8_t l02 = vaddq_s16(line0x, line2x);
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
int16x8_t dy = vsubq_s16(line2y, line0y);
int16x8_t dx = vaddq_s16(l1x2, l02);
int16x8_t dya = vabsq_s16(dy);
int16x8_t dxa = vabsq_s16(dx);
int16x8_t norm = vaddq_s16(dya, dxa);
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
vst1q_s16(dsty + j, dy);
vst1q_s16(dstx + j, dx);
vst1q_s32(mag + j + 4, normh);
vst1q_s32(mag + j, norml);
}
if (j != width)
{
j = width - 8;
goto ColFilter3x3CannyL1Loop;
}
}
template <>
inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
{
ptrdiff_t j = 0;
for (; j <= width - 8; j += 8)
{
ColFilter3x3CannyL2Loop:
int16x8_t line0x = vld1q_s16(src0 + j);
int16x8_t line1x = vld1q_s16(src1 + j);
int16x8_t line2x = vld1q_s16(src2 + j);
int16x8_t line0y = vld1q_s16(src0 + j + width);
int16x8_t line2y = vld1q_s16(src2 + j + width);
int16x8_t l02 = vaddq_s16(line0x, line2x);
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
int16x8_t dy = vsubq_s16(line2y, line0y);
int16x8_t dx = vaddq_s16(l1x2, l02);
int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
vst1q_s16(dsty + j, dy);
vst1q_s16(dstx + j, dx);
vst1q_s32(mag + j, norml);
vst1q_s32(mag + j + 4, normh);
}
if (j != width)
{
j = width - 8;
goto ColFilter3x3CannyL2Loop;
}
}
template <bool L2gradient>
inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
{
ptrdiff_t j = 0;
if (colscn >= 8)
{
int16x8_t vx = vld1q_s16(_dx);
int16x8_t vy = vld1q_s16(_dy);
for (; j <= colscn - 16; j+=8)
{
internal::prefetch(_dx);
internal::prefetch(_dy);
int16x8_t vx2 = vld1q_s16(_dx + j + 8);
int16x8_t vy2 = vld1q_s16(_dy + j + 8);
int16x8_t vabsx = vabsq_s16(vx);
int16x8_t vabsy = vabsq_s16(vy);
int16x8_t norm = vaddq_s16(vabsx, vabsy);
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
vst1q_s32(_norm + j + 4, normh);
vst1q_s32(_norm + j + 0, norml);
vx = vx2;
vy = vy2;
}
int16x8_t vabsx = vabsq_s16(vx);
int16x8_t vabsy = vabsq_s16(vy);
int16x8_t norm = vaddq_s16(vabsx, vabsy);
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
vst1q_s32(_norm + j + 4, normh);
vst1q_s32(_norm + j + 0, norml);
}
for (; j < colscn; j++)
_norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
}
template <>
inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
{
ptrdiff_t j = 0;
if (colscn >= 8)
{
int16x8_t vx = vld1q_s16(_dx);
int16x8_t vy = vld1q_s16(_dy);
for (; j <= colscn - 16; j+=8)
{
internal::prefetch(_dx);
internal::prefetch(_dy);
int16x8_t vxnext = vld1q_s16(_dx + j + 8);
int16x8_t vynext = vld1q_s16(_dy + j + 8);
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
vst1q_s32(_norm + j + 0, norml);
vst1q_s32(_norm + j + 4, normh);
vx = vxnext;
vy = vynext;
}
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
vst1q_s32(_norm + j + 0, norml);
vst1q_s32(_norm + j + 4, normh);
}
for (; j < colscn; j++)
_norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
}
template <bool L2gradient>
inline void prepareThresh(f64 low_thresh, f64 high_thresh,
s32 &low, s32 &high)
{
if (low_thresh > high_thresh)
std::swap(low_thresh, high_thresh);
#if defined __GNUC__
low = (s32)low_thresh;
high = (s32)high_thresh;
low -= (low > low_thresh);
high -= (high > high_thresh);
#else
low = internal::round(low_thresh);
high = internal::round(high_thresh);
f32 ldiff = (f32)(low_thresh - low);
f32 hdiff = (f32)(high_thresh - high);
low -= (ldiff < 0);
high -= (hdiff < 0);
#endif
}
template <>
inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
s32 &low, s32 &high)
{
if (low_thresh > high_thresh)
std::swap(low_thresh, high_thresh);
if (low_thresh > 0) low_thresh *= low_thresh;
if (high_thresh > 0) high_thresh *= high_thresh;
#if defined __GNUC__
low = (s32)low_thresh;
high = (s32)high_thresh;
low -= (low > low_thresh);
high -= (high > high_thresh);
#else
low = internal::round(low_thresh);
high = internal::round(high_thresh);
f32 ldiff = (f32)(low_thresh - low);
f32 hdiff = (f32)(high_thresh - high);
low -= (ldiff < 0);
high -= (hdiff < 0);
#endif
}
template <bool L2gradient, bool externalSobel>
struct _normEstimator
{
ptrdiff_t magstep;
ptrdiff_t dxOffset;
ptrdiff_t dyOffset;
ptrdiff_t shxOffset;
ptrdiff_t shyOffset;
std::vector<u8> buffer;
const ptrdiff_t offsetk;
ptrdiff_t borderyt, borderyb;
RowFilter3x3Canny sobelRow;
inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
offsetk(1),
sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
{
mapstep = size.width + 2;
magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
dxOffset = mapstep * sizeof(s32)/sizeof(s16);
dyOffset = dxOffset + size.width * 1;
shxOffset = dxOffset + size.width * 2;
shyOffset = dxOffset + size.width * 3;
buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
mag_buf[0] = (s32*)&buffer[0];
mag_buf[1] = mag_buf[0] + magstep;
mag_buf[2] = mag_buf[1] + magstep;
memset(mag_buf[0], 0, mapstep * sizeof(s32));
map = (u8*)(mag_buf[2] + magstep);
memset(map, 1, mapstep);
memset(map + mapstep*(size.height + 1), 1, mapstep);
borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
}
inline void firstRow(const Size2D &size, s32,
const u8 *srcBase, ptrdiff_t srcStride,
s16*, ptrdiff_t,
s16*, ptrdiff_t,
s32** mag_buf)
{
//sobelH row #0
const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
//sobelH row #1
_src = internal::getRowPtr(srcBase, srcStride, 1);
sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
if (borderyt == 0)
{
//sobelH row #-1
_src = internal::getRowPtr(srcBase, srcStride, -1);
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
}
else
{
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
}
}
inline void nextRow(const Size2D &size, s32,
const u8 *srcBase, ptrdiff_t srcStride,
s16*, ptrdiff_t,
s16*, ptrdiff_t,
const ptrdiff_t &mapstep, s32** mag_buf,
size_t i, const s16* &_x, const s16* &_y)
{
mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
if (i < size.height - borderyb)
{
const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
//sobelH row #i+1
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
}
else if (i < size.height)
{
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
}
else
memset(mag_buf[2], 0, mapstep*sizeof(s32));
_x = ((s16*)mag_buf[1]) + dxOffset;
_y = ((s16*)mag_buf[1]) + dyOffset;
}
};
template <bool L2gradient>
struct _normEstimator<L2gradient, true>
{
std::vector<u8> buffer;
inline _normEstimator(const Size2D &size, s32 cn, Margin,
ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
{
mapstep = size.width + 2;
buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
mag_buf[0] = (s32*)&buffer[0];
mag_buf[1] = mag_buf[0] + mapstep*cn;
mag_buf[2] = mag_buf[1] + mapstep*cn;
memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
map = (u8*)(mag_buf[2] + mapstep*cn);
memset(map, 1, mapstep);
memset(map + mapstep*(size.height + 1), 1, mapstep);
}
inline void firstRow(const Size2D &size, s32 cn,
const u8 *, ptrdiff_t,
s16* dxBase, ptrdiff_t dxStride,
s16* dyBase, ptrdiff_t dyStride,
s32** mag_buf)
{
s32* _norm = mag_buf[1] + 1;
s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
if(cn > 1)
{
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
{
size_t maxIdx = jn;
for(s32 k = 1; k < cn; ++k)
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
_norm[j] = _norm[maxIdx];
_dx[j] = _dx[maxIdx];
_dy[j] = _dy[maxIdx];
}
}
_norm[-1] = _norm[size.width] = 0;
}
inline void nextRow(const Size2D &size, s32 cn,
const u8 *, ptrdiff_t,
s16* dxBase, ptrdiff_t dxStride,
s16* dyBase, ptrdiff_t dyStride,
const ptrdiff_t &mapstep, s32** mag_buf,
size_t i, const s16* &_x, const s16* &_y)
{
s32* _norm = mag_buf[(i > 0) + 1] + 1;
if (i < size.height)
{
s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
if(cn > 1)
{
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
{
size_t maxIdx = jn;
for(s32 k = 1; k < cn; ++k)
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
_norm[j] = _norm[maxIdx];
_dx[j] = _dx[maxIdx];
_dy[j] = _dy[maxIdx];
}
}
_norm[-1] = _norm[size.width] = 0;
}
else
memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
_x = internal::getRowPtr(dxBase, dxStride, i-1);
_y = internal::getRowPtr(dyBase, dyStride, i-1);
}
};
template <bool L2gradient, bool externalSobel>
inline void Canny3x3(const Size2D &size, s32 cn,
const u8 * srcBase, ptrdiff_t srcStride,
u8 * dstBase, ptrdiff_t dstStride,
s16 * dxBase, ptrdiff_t dxStride,
s16 * dyBase, ptrdiff_t dyStride,
f64 low_thresh, f64 high_thresh,
Margin borderMargin)
{
s32 low, high;
prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
ptrdiff_t mapstep;
s32* mag_buf[3];
u8* map;
_normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
std::vector<u8*> stack( maxsize );
u8 **stack_top = &stack[0];
u8 **stack_bottom = &stack[0];
/* sector numbers
(Top-Left Origin)
1 2 3
* * *
* * *
0*******0
* * *
* * *
3 2 1
*/
#define CANNY_PUSH(d) *(d) = u8(2), *stack_top++ = (d)
#define CANNY_POP(d) (d) = *--stack_top
//i == 0
normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
// calculate magnitude and angle of gradient, perform non-maxima supression.
// fill the map with one of the following values:
// 0 - the pixel might belong to an edge
// 1 - the pixel can not belong to an edge
// 2 - the pixel does belong to an edge
for (size_t i = 1; i <= size.height; i++)
{
const s16 *_x, *_y;
normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
u8* _map = map + mapstep*i + 1;
_map[-1] = _map[size.width] = 1;
s32* _mag = mag_buf[1] + 1; // take the central row
ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
if ((stack_top - stack_bottom) + size.width > maxsize)
{
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
maxsize = maxsize * 3/2;
stack.resize(maxsize);
stack_bottom = &stack[0];
stack_top = stack_bottom + sz;
}
s32 prev_flag = 0;
for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
{
#define CANNY_SHIFT 15
const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
s32 m = _mag[j];
if (m > low)
{
s32 xs = _x[j];
s32 ys = _y[j];
s32 x = abs(xs);
s32 y = abs(ys) << CANNY_SHIFT;
s32 tg22x = x * TG22;
if (y < tg22x)
{
if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
}
else
{
s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
if (y > tg67x)
{
if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
}
else
{
s32 s = (xs ^ ys) < 0 ? -1 : 1;
if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
}
}
}
prev_flag = 0;
_map[j] = u8(1);
continue;
__push:
if (!prev_flag && m > high && _map[j-mapstep] != 2)
{
CANNY_PUSH(_map + j);
prev_flag = 1;
}
else
_map[j] = 0;
}
// scroll the ring buffer
_mag = mag_buf[0];
mag_buf[0] = mag_buf[1];
mag_buf[1] = mag_buf[2];
mag_buf[2] = _mag;
}
// now track the edges (hysteresis thresholding)
while (stack_top > stack_bottom)
{
u8* m;
if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
{
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
maxsize = maxsize * 3/2;
stack.resize(maxsize);
stack_bottom = &stack[0];
stack_top = stack_bottom + sz;
}
CANNY_POP(m);
if (!m[-1]) CANNY_PUSH(m - 1);
if (!m[1]) CANNY_PUSH(m + 1);
if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
if (!m[mapstep]) CANNY_PUSH(m + mapstep);
if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
}
// the final pass, form the final image
uint8x16_t v2 = vmovq_n_u8(2);
const u8* ptrmap = map + mapstep + 1;
for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
{
u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
ptrdiff_t j = 0;
for (; j < (ptrdiff_t)size.width - 16; j += 16)
{
internal::prefetch(ptrmap);
uint8x16_t vmap = vld1q_u8(ptrmap + j);
uint8x16_t vdst = vceqq_u8(vmap, v2);
vst1q_u8(_dst+j, vdst);
}
for (; j < (ptrdiff_t)size.width; j++)
_dst[j] = (u8)-(ptrmap[j] >> 1);
}
}
} // namespace
#endif
bool isCanny3x3Supported(const Size2D &size)
{
return isSupportedConfiguration() &&
size.height >= 2 && size.width >= 9;
}
void Canny3x3L1(const Size2D &size,
const u8 * srcBase, ptrdiff_t srcStride,
u8 * dstBase, ptrdiff_t dstStride,
f64 low_thresh, f64 high_thresh,
Margin borderMargin)
{
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
#ifdef CAROTENE_NEON
Canny3x3<false, false>(size, 1,
srcBase, srcStride,
dstBase, dstStride,
NULL, 0,
NULL, 0,
low_thresh, high_thresh,
borderMargin);
#else
(void)size;
(void)srcBase;
(void)srcStride;
(void)dstBase;
(void)dstStride;
(void)low_thresh;
(void)high_thresh;
(void)borderMargin;
#endif
}
void Canny3x3L2(const Size2D &size,
const u8 * srcBase, ptrdiff_t srcStride,
u8 * dstBase, ptrdiff_t dstStride,
f64 low_thresh, f64 high_thresh,
Margin borderMargin)
{
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
#ifdef CAROTENE_NEON
Canny3x3<true, false>(size, 1,
srcBase, srcStride,
dstBase, dstStride,
NULL, 0,
NULL, 0,
low_thresh, high_thresh,
borderMargin);
#else
(void)size;
(void)srcBase;
(void)srcStride;
(void)dstBase;
(void)dstStride;
(void)low_thresh;
(void)high_thresh;
(void)borderMargin;
#endif
}
void Canny3x3L1(const Size2D &size, s32 cn,
s16 * dxBase, ptrdiff_t dxStride,
s16 * dyBase, ptrdiff_t dyStride,
u8 * dstBase, ptrdiff_t dstStride,
f64 low_thresh, f64 high_thresh)
{
internal::assertSupportedConfiguration();
#ifdef CAROTENE_NEON
Canny3x3<false, true>(size, cn,
NULL, 0,
dstBase, dstStride,
dxBase, dxStride,
dyBase, dyStride,
low_thresh, high_thresh,
Margin());
#else
(void)size;
(void)cn;
(void)dstBase;
(void)dstStride;
(void)dxBase;
(void)dxStride;
(void)dyBase;
(void)dyStride;
(void)low_thresh;
(void)high_thresh;
#endif
}
void Canny3x3L2(const Size2D &size, s32 cn,
s16 * dxBase, ptrdiff_t dxStride,
s16 * dyBase, ptrdiff_t dyStride,
u8 * dstBase, ptrdiff_t dstStride,
f64 low_thresh, f64 high_thresh)
{
internal::assertSupportedConfiguration();
#ifdef CAROTENE_NEON
Canny3x3<true, true>(size, cn,
NULL, 0,
dstBase, dstStride,
dxBase, dxStride,
dyBase, dyStride,
low_thresh, high_thresh,
Margin());
#else
(void)size;
(void)cn;
(void)dstBase;
(void)dstStride;
(void)dxBase;
(void)dxStride;
(void)dyBase;
(void)dyStride;
(void)low_thresh;
(void)high_thresh;
#endif
}
} // namespace CAROTENE_NS