mirror of
https://github.com/opencv/opencv.git
synced 2024-12-25 01:28:15 +08:00
774 lines
26 KiB
C++
774 lines
26 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include "common.hpp"
|
|
|
|
#include "saturate_cast.hpp"
|
|
#include <vector>
|
|
#include <cstring>
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
#ifdef CAROTENE_NEON
|
|
namespace {
|
|
struct RowFilter3x3Canny
|
|
{
|
|
inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
|
|
{
|
|
vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
|
|
vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
|
|
lookLeft = offsetk - borderxl;
|
|
lookRight = offsetk - borderxr;
|
|
}
|
|
|
|
inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
|
|
{
|
|
uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
|
|
ptrdiff_t i = 0;
|
|
for (; i < width - 8 + lookRight; i += 8)
|
|
{
|
|
internal::prefetch(src + i);
|
|
uint8x8_t l18u = vld1_u8(src + i + 1);
|
|
|
|
uint8x8_t l2 = l18u;
|
|
uint8x8_t l0 = vext_u8(l, l18u, 6);
|
|
int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
|
|
|
|
l = l18u;
|
|
|
|
int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
|
|
int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
|
|
int16x8_t ldy = vaddq_s16(l02, l1x2);
|
|
|
|
vst1q_s16(dstx + i, ldx);
|
|
vst1q_s16(dsty + i, ldy);
|
|
}
|
|
|
|
//tail
|
|
if (lookRight == 0 || i != width)
|
|
{
|
|
uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
|
|
uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
|
|
uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
|
|
|
|
int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
|
|
int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
|
|
int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
|
|
int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
|
|
|
|
vst1q_s16(dstx + (width - 8), taildx);
|
|
vst1q_s16(dsty + (width - 8), taildy);
|
|
}
|
|
}
|
|
|
|
uint8x8_t vfmask;
|
|
uint8x8_t vtmask;
|
|
enum { offsetk = 1};
|
|
ptrdiff_t lookLeft;
|
|
ptrdiff_t lookRight;
|
|
};
|
|
|
|
template <bool L2gradient>
|
|
inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
|
|
{
|
|
ptrdiff_t j = 0;
|
|
for (; j <= width - 8; j += 8)
|
|
{
|
|
ColFilter3x3CannyL1Loop:
|
|
int16x8_t line0x = vld1q_s16(src0 + j);
|
|
int16x8_t line1x = vld1q_s16(src1 + j);
|
|
int16x8_t line2x = vld1q_s16(src2 + j);
|
|
int16x8_t line0y = vld1q_s16(src0 + j + width);
|
|
int16x8_t line2y = vld1q_s16(src2 + j + width);
|
|
|
|
int16x8_t l02 = vaddq_s16(line0x, line2x);
|
|
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
|
|
int16x8_t dy = vsubq_s16(line2y, line0y);
|
|
int16x8_t dx = vaddq_s16(l1x2, l02);
|
|
|
|
int16x8_t dya = vabsq_s16(dy);
|
|
int16x8_t dxa = vabsq_s16(dx);
|
|
int16x8_t norm = vaddq_s16(dya, dxa);
|
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
|
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
|
|
|
|
vst1q_s16(dsty + j, dy);
|
|
vst1q_s16(dstx + j, dx);
|
|
vst1q_s32(mag + j + 4, normh);
|
|
vst1q_s32(mag + j, norml);
|
|
}
|
|
if (j != width)
|
|
{
|
|
j = width - 8;
|
|
goto ColFilter3x3CannyL1Loop;
|
|
}
|
|
}
|
|
template <>
|
|
inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
|
|
{
|
|
ptrdiff_t j = 0;
|
|
for (; j <= width - 8; j += 8)
|
|
{
|
|
ColFilter3x3CannyL2Loop:
|
|
int16x8_t line0x = vld1q_s16(src0 + j);
|
|
int16x8_t line1x = vld1q_s16(src1 + j);
|
|
int16x8_t line2x = vld1q_s16(src2 + j);
|
|
int16x8_t line0y = vld1q_s16(src0 + j + width);
|
|
int16x8_t line2y = vld1q_s16(src2 + j + width);
|
|
|
|
int16x8_t l02 = vaddq_s16(line0x, line2x);
|
|
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
|
|
int16x8_t dy = vsubq_s16(line2y, line0y);
|
|
int16x8_t dx = vaddq_s16(l1x2, l02);
|
|
|
|
int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
|
|
int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
|
|
|
|
norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
|
|
normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
|
|
|
|
vst1q_s16(dsty + j, dy);
|
|
vst1q_s16(dstx + j, dx);
|
|
vst1q_s32(mag + j, norml);
|
|
vst1q_s32(mag + j + 4, normh);
|
|
}
|
|
if (j != width)
|
|
{
|
|
j = width - 8;
|
|
goto ColFilter3x3CannyL2Loop;
|
|
}
|
|
}
|
|
|
|
template <bool L2gradient>
|
|
inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
|
|
{
|
|
ptrdiff_t j = 0;
|
|
if (colscn >= 8)
|
|
{
|
|
int16x8_t vx = vld1q_s16(_dx);
|
|
int16x8_t vy = vld1q_s16(_dy);
|
|
for (; j <= colscn - 16; j+=8)
|
|
{
|
|
internal::prefetch(_dx);
|
|
internal::prefetch(_dy);
|
|
|
|
int16x8_t vx2 = vld1q_s16(_dx + j + 8);
|
|
int16x8_t vy2 = vld1q_s16(_dy + j + 8);
|
|
|
|
int16x8_t vabsx = vabsq_s16(vx);
|
|
int16x8_t vabsy = vabsq_s16(vy);
|
|
|
|
int16x8_t norm = vaddq_s16(vabsx, vabsy);
|
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
|
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
|
|
|
|
vst1q_s32(_norm + j + 4, normh);
|
|
vst1q_s32(_norm + j + 0, norml);
|
|
|
|
vx = vx2;
|
|
vy = vy2;
|
|
}
|
|
int16x8_t vabsx = vabsq_s16(vx);
|
|
int16x8_t vabsy = vabsq_s16(vy);
|
|
|
|
int16x8_t norm = vaddq_s16(vabsx, vabsy);
|
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
|
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
|
|
|
|
vst1q_s32(_norm + j + 4, normh);
|
|
vst1q_s32(_norm + j + 0, norml);
|
|
}
|
|
for (; j < colscn; j++)
|
|
_norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
|
|
}
|
|
|
|
template <>
|
|
inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
|
|
{
|
|
ptrdiff_t j = 0;
|
|
if (colscn >= 8)
|
|
{
|
|
int16x8_t vx = vld1q_s16(_dx);
|
|
int16x8_t vy = vld1q_s16(_dy);
|
|
|
|
for (; j <= colscn - 16; j+=8)
|
|
{
|
|
internal::prefetch(_dx);
|
|
internal::prefetch(_dy);
|
|
|
|
int16x8_t vxnext = vld1q_s16(_dx + j + 8);
|
|
int16x8_t vynext = vld1q_s16(_dy + j + 8);
|
|
|
|
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
|
|
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
|
|
|
|
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
|
|
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
|
|
|
|
vst1q_s32(_norm + j + 0, norml);
|
|
vst1q_s32(_norm + j + 4, normh);
|
|
|
|
vx = vxnext;
|
|
vy = vynext;
|
|
}
|
|
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
|
|
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
|
|
|
|
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
|
|
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
|
|
|
|
vst1q_s32(_norm + j + 0, norml);
|
|
vst1q_s32(_norm + j + 4, normh);
|
|
}
|
|
for (; j < colscn; j++)
|
|
_norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
|
|
}
|
|
|
|
template <bool L2gradient>
|
|
inline void prepareThresh(f64 low_thresh, f64 high_thresh,
|
|
s32 &low, s32 &high)
|
|
{
|
|
if (low_thresh > high_thresh)
|
|
std::swap(low_thresh, high_thresh);
|
|
#if defined __GNUC__
|
|
low = (s32)low_thresh;
|
|
high = (s32)high_thresh;
|
|
low -= (low > low_thresh);
|
|
high -= (high > high_thresh);
|
|
#else
|
|
low = internal::round(low_thresh);
|
|
high = internal::round(high_thresh);
|
|
f32 ldiff = (f32)(low_thresh - low);
|
|
f32 hdiff = (f32)(high_thresh - high);
|
|
low -= (ldiff < 0);
|
|
high -= (hdiff < 0);
|
|
#endif
|
|
}
|
|
template <>
|
|
inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
|
|
s32 &low, s32 &high)
|
|
{
|
|
if (low_thresh > high_thresh)
|
|
std::swap(low_thresh, high_thresh);
|
|
if (low_thresh > 0) low_thresh *= low_thresh;
|
|
if (high_thresh > 0) high_thresh *= high_thresh;
|
|
#if defined __GNUC__
|
|
low = (s32)low_thresh;
|
|
high = (s32)high_thresh;
|
|
low -= (low > low_thresh);
|
|
high -= (high > high_thresh);
|
|
#else
|
|
low = internal::round(low_thresh);
|
|
high = internal::round(high_thresh);
|
|
f32 ldiff = (f32)(low_thresh - low);
|
|
f32 hdiff = (f32)(high_thresh - high);
|
|
low -= (ldiff < 0);
|
|
high -= (hdiff < 0);
|
|
#endif
|
|
}
|
|
|
|
template <bool L2gradient, bool externalSobel>
|
|
struct _normEstimator
|
|
{
|
|
ptrdiff_t magstep;
|
|
ptrdiff_t dxOffset;
|
|
ptrdiff_t dyOffset;
|
|
ptrdiff_t shxOffset;
|
|
ptrdiff_t shyOffset;
|
|
std::vector<u8> buffer;
|
|
const ptrdiff_t offsetk;
|
|
ptrdiff_t borderyt, borderyb;
|
|
RowFilter3x3Canny sobelRow;
|
|
|
|
inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
|
|
ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
|
|
offsetk(1),
|
|
sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
|
|
std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
|
|
{
|
|
mapstep = size.width + 2;
|
|
magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
|
|
dxOffset = mapstep * sizeof(s32)/sizeof(s16);
|
|
dyOffset = dxOffset + size.width * 1;
|
|
shxOffset = dxOffset + size.width * 2;
|
|
shyOffset = dxOffset + size.width * 3;
|
|
buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
|
|
mag_buf[0] = (s32*)&buffer[0];
|
|
mag_buf[1] = mag_buf[0] + magstep;
|
|
mag_buf[2] = mag_buf[1] + magstep;
|
|
memset(mag_buf[0], 0, mapstep * sizeof(s32));
|
|
|
|
map = (u8*)(mag_buf[2] + magstep);
|
|
memset(map, 1, mapstep);
|
|
memset(map + mapstep*(size.height + 1), 1, mapstep);
|
|
borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
|
|
borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
|
|
}
|
|
inline void firstRow(const Size2D &size, s32,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
s16*, ptrdiff_t,
|
|
s16*, ptrdiff_t,
|
|
s32** mag_buf)
|
|
{
|
|
//sobelH row #0
|
|
const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
|
|
sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
|
|
//sobelH row #1
|
|
_src = internal::getRowPtr(srcBase, srcStride, 1);
|
|
sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
|
|
|
|
mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
|
|
if (borderyt == 0)
|
|
{
|
|
//sobelH row #-1
|
|
_src = internal::getRowPtr(srcBase, srcStride, -1);
|
|
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
|
|
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
|
|
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
|
|
}
|
|
else
|
|
{
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
|
|
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
|
|
}
|
|
}
|
|
inline void nextRow(const Size2D &size, s32,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
s16*, ptrdiff_t,
|
|
s16*, ptrdiff_t,
|
|
const ptrdiff_t &mapstep, s32** mag_buf,
|
|
size_t i, const s16* &_x, const s16* &_y)
|
|
{
|
|
mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
|
|
if (i < size.height - borderyb)
|
|
{
|
|
const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
|
|
//sobelH row #i+1
|
|
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
|
|
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
|
|
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
|
|
}
|
|
else if (i < size.height)
|
|
{
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
|
|
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
|
|
}
|
|
else
|
|
memset(mag_buf[2], 0, mapstep*sizeof(s32));
|
|
_x = ((s16*)mag_buf[1]) + dxOffset;
|
|
_y = ((s16*)mag_buf[1]) + dyOffset;
|
|
}
|
|
};
|
|
template <bool L2gradient>
|
|
struct _normEstimator<L2gradient, true>
|
|
{
|
|
std::vector<u8> buffer;
|
|
|
|
inline _normEstimator(const Size2D &size, s32 cn, Margin,
|
|
ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
|
|
{
|
|
mapstep = size.width + 2;
|
|
buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
|
|
mag_buf[0] = (s32*)&buffer[0];
|
|
mag_buf[1] = mag_buf[0] + mapstep*cn;
|
|
mag_buf[2] = mag_buf[1] + mapstep*cn;
|
|
memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
|
|
|
|
map = (u8*)(mag_buf[2] + mapstep*cn);
|
|
memset(map, 1, mapstep);
|
|
memset(map + mapstep*(size.height + 1), 1, mapstep);
|
|
}
|
|
inline void firstRow(const Size2D &size, s32 cn,
|
|
const u8 *, ptrdiff_t,
|
|
s16* dxBase, ptrdiff_t dxStride,
|
|
s16* dyBase, ptrdiff_t dyStride,
|
|
s32** mag_buf)
|
|
{
|
|
s32* _norm = mag_buf[1] + 1;
|
|
|
|
s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
|
|
s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
|
|
|
|
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
|
|
|
|
if(cn > 1)
|
|
{
|
|
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
|
|
{
|
|
size_t maxIdx = jn;
|
|
for(s32 k = 1; k < cn; ++k)
|
|
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
|
|
_norm[j] = _norm[maxIdx];
|
|
_dx[j] = _dx[maxIdx];
|
|
_dy[j] = _dy[maxIdx];
|
|
}
|
|
}
|
|
|
|
_norm[-1] = _norm[size.width] = 0;
|
|
}
|
|
inline void nextRow(const Size2D &size, s32 cn,
|
|
const u8 *, ptrdiff_t,
|
|
s16* dxBase, ptrdiff_t dxStride,
|
|
s16* dyBase, ptrdiff_t dyStride,
|
|
const ptrdiff_t &mapstep, s32** mag_buf,
|
|
size_t i, const s16* &_x, const s16* &_y)
|
|
{
|
|
s32* _norm = mag_buf[(i > 0) + 1] + 1;
|
|
if (i < size.height)
|
|
{
|
|
s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
|
|
s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
|
|
|
|
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
|
|
|
|
if(cn > 1)
|
|
{
|
|
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
|
|
{
|
|
size_t maxIdx = jn;
|
|
for(s32 k = 1; k < cn; ++k)
|
|
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
|
|
_norm[j] = _norm[maxIdx];
|
|
_dx[j] = _dx[maxIdx];
|
|
_dy[j] = _dy[maxIdx];
|
|
}
|
|
}
|
|
|
|
_norm[-1] = _norm[size.width] = 0;
|
|
}
|
|
else
|
|
memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
|
|
|
|
_x = internal::getRowPtr(dxBase, dxStride, i-1);
|
|
_y = internal::getRowPtr(dyBase, dyStride, i-1);
|
|
}
|
|
};
|
|
|
|
template <bool L2gradient, bool externalSobel>
|
|
inline void Canny3x3(const Size2D &size, s32 cn,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
u8 * dstBase, ptrdiff_t dstStride,
|
|
s16 * dxBase, ptrdiff_t dxStride,
|
|
s16 * dyBase, ptrdiff_t dyStride,
|
|
f64 low_thresh, f64 high_thresh,
|
|
Margin borderMargin)
|
|
{
|
|
s32 low, high;
|
|
prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
|
|
|
|
ptrdiff_t mapstep;
|
|
s32* mag_buf[3];
|
|
u8* map;
|
|
_normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
|
|
|
|
size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
|
|
std::vector<u8*> stack( maxsize );
|
|
u8 **stack_top = &stack[0];
|
|
u8 **stack_bottom = &stack[0];
|
|
|
|
/* sector numbers
|
|
(Top-Left Origin)
|
|
|
|
1 2 3
|
|
* * *
|
|
* * *
|
|
0*******0
|
|
* * *
|
|
* * *
|
|
3 2 1
|
|
*/
|
|
|
|
#define CANNY_PUSH(d) *(d) = u8(2), *stack_top++ = (d)
|
|
#define CANNY_POP(d) (d) = *--stack_top
|
|
|
|
//i == 0
|
|
normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
|
|
// calculate magnitude and angle of gradient, perform non-maxima supression.
|
|
// fill the map with one of the following values:
|
|
// 0 - the pixel might belong to an edge
|
|
// 1 - the pixel can not belong to an edge
|
|
// 2 - the pixel does belong to an edge
|
|
for (size_t i = 1; i <= size.height; i++)
|
|
{
|
|
const s16 *_x, *_y;
|
|
normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
|
|
|
|
u8* _map = map + mapstep*i + 1;
|
|
_map[-1] = _map[size.width] = 1;
|
|
|
|
s32* _mag = mag_buf[1] + 1; // take the central row
|
|
ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
|
|
ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
|
|
|
|
if ((stack_top - stack_bottom) + size.width > maxsize)
|
|
{
|
|
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
|
|
maxsize = maxsize * 3/2;
|
|
stack.resize(maxsize);
|
|
stack_bottom = &stack[0];
|
|
stack_top = stack_bottom + sz;
|
|
}
|
|
|
|
s32 prev_flag = 0;
|
|
for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
|
|
{
|
|
#define CANNY_SHIFT 15
|
|
const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
|
|
|
|
s32 m = _mag[j];
|
|
|
|
if (m > low)
|
|
{
|
|
s32 xs = _x[j];
|
|
s32 ys = _y[j];
|
|
s32 x = abs(xs);
|
|
s32 y = abs(ys) << CANNY_SHIFT;
|
|
|
|
s32 tg22x = x * TG22;
|
|
|
|
if (y < tg22x)
|
|
{
|
|
if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
|
|
}
|
|
else
|
|
{
|
|
s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
|
|
if (y > tg67x)
|
|
{
|
|
if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
|
|
}
|
|
else
|
|
{
|
|
s32 s = (xs ^ ys) < 0 ? -1 : 1;
|
|
if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
|
|
}
|
|
}
|
|
}
|
|
prev_flag = 0;
|
|
_map[j] = u8(1);
|
|
continue;
|
|
__push:
|
|
if (!prev_flag && m > high && _map[j-mapstep] != 2)
|
|
{
|
|
CANNY_PUSH(_map + j);
|
|
prev_flag = 1;
|
|
}
|
|
else
|
|
_map[j] = 0;
|
|
}
|
|
|
|
// scroll the ring buffer
|
|
_mag = mag_buf[0];
|
|
mag_buf[0] = mag_buf[1];
|
|
mag_buf[1] = mag_buf[2];
|
|
mag_buf[2] = _mag;
|
|
}
|
|
|
|
// now track the edges (hysteresis thresholding)
|
|
while (stack_top > stack_bottom)
|
|
{
|
|
u8* m;
|
|
if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
|
|
{
|
|
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
|
|
maxsize = maxsize * 3/2;
|
|
stack.resize(maxsize);
|
|
stack_bottom = &stack[0];
|
|
stack_top = stack_bottom + sz;
|
|
}
|
|
|
|
CANNY_POP(m);
|
|
|
|
if (!m[-1]) CANNY_PUSH(m - 1);
|
|
if (!m[1]) CANNY_PUSH(m + 1);
|
|
if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
|
|
if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
|
|
if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
|
|
if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
|
|
if (!m[mapstep]) CANNY_PUSH(m + mapstep);
|
|
if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
|
|
}
|
|
|
|
// the final pass, form the final image
|
|
uint8x16_t v2 = vmovq_n_u8(2);
|
|
const u8* ptrmap = map + mapstep + 1;
|
|
for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
|
|
{
|
|
u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
ptrdiff_t j = 0;
|
|
for (; j < (ptrdiff_t)size.width - 16; j += 16)
|
|
{
|
|
internal::prefetch(ptrmap);
|
|
uint8x16_t vmap = vld1q_u8(ptrmap + j);
|
|
uint8x16_t vdst = vceqq_u8(vmap, v2);
|
|
vst1q_u8(_dst+j, vdst);
|
|
}
|
|
for (; j < (ptrdiff_t)size.width; j++)
|
|
_dst[j] = (u8)-(ptrmap[j] >> 1);
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
#endif
|
|
|
|
bool isCanny3x3Supported(const Size2D &size)
|
|
{
|
|
return isSupportedConfiguration() &&
|
|
size.height >= 2 && size.width >= 9;
|
|
}
|
|
|
|
void Canny3x3L1(const Size2D &size,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
u8 * dstBase, ptrdiff_t dstStride,
|
|
f64 low_thresh, f64 high_thresh,
|
|
Margin borderMargin)
|
|
{
|
|
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
|
|
#ifdef CAROTENE_NEON
|
|
Canny3x3<false, false>(size, 1,
|
|
srcBase, srcStride,
|
|
dstBase, dstStride,
|
|
NULL, 0,
|
|
NULL, 0,
|
|
low_thresh, high_thresh,
|
|
borderMargin);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)low_thresh;
|
|
(void)high_thresh;
|
|
(void)borderMargin;
|
|
#endif
|
|
}
|
|
|
|
void Canny3x3L2(const Size2D &size,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
u8 * dstBase, ptrdiff_t dstStride,
|
|
f64 low_thresh, f64 high_thresh,
|
|
Margin borderMargin)
|
|
{
|
|
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
|
|
#ifdef CAROTENE_NEON
|
|
Canny3x3<true, false>(size, 1,
|
|
srcBase, srcStride,
|
|
dstBase, dstStride,
|
|
NULL, 0,
|
|
NULL, 0,
|
|
low_thresh, high_thresh,
|
|
borderMargin);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)low_thresh;
|
|
(void)high_thresh;
|
|
(void)borderMargin;
|
|
#endif
|
|
}
|
|
|
|
void Canny3x3L1(const Size2D &size, s32 cn,
|
|
s16 * dxBase, ptrdiff_t dxStride,
|
|
s16 * dyBase, ptrdiff_t dyStride,
|
|
u8 * dstBase, ptrdiff_t dstStride,
|
|
f64 low_thresh, f64 high_thresh)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
Canny3x3<false, true>(size, cn,
|
|
NULL, 0,
|
|
dstBase, dstStride,
|
|
dxBase, dxStride,
|
|
dyBase, dyStride,
|
|
low_thresh, high_thresh,
|
|
Margin());
|
|
#else
|
|
(void)size;
|
|
(void)cn;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)dxBase;
|
|
(void)dxStride;
|
|
(void)dyBase;
|
|
(void)dyStride;
|
|
(void)low_thresh;
|
|
(void)high_thresh;
|
|
#endif
|
|
}
|
|
|
|
void Canny3x3L2(const Size2D &size, s32 cn,
|
|
s16 * dxBase, ptrdiff_t dxStride,
|
|
s16 * dyBase, ptrdiff_t dyStride,
|
|
u8 * dstBase, ptrdiff_t dstStride,
|
|
f64 low_thresh, f64 high_thresh)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
Canny3x3<true, true>(size, cn,
|
|
NULL, 0,
|
|
dstBase, dstStride,
|
|
dxBase, dxStride,
|
|
dyBase, dyStride,
|
|
low_thresh, high_thresh,
|
|
Margin());
|
|
#else
|
|
(void)size;
|
|
(void)cn;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)dxBase;
|
|
(void)dxStride;
|
|
(void)dyBase;
|
|
(void)dyStride;
|
|
(void)low_thresh;
|
|
(void)high_thresh;
|
|
#endif
|
|
}
|
|
|
|
} // namespace CAROTENE_NS
|