mirror of
https://github.com/opencv/opencv.git
synced 2025-08-01 10:26:53 +08:00
341 lines
12 KiB
C++
341 lines
12 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include "common.hpp"
|
|
#include "vtransform.hpp"
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
#ifdef CAROTENE_NEON
|
|
|
|
namespace {
|
|
|
|
inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
|
|
inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
|
|
inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
|
|
|
|
template <typename Op, int elsize> struct vtail
|
|
{
|
|
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
|
|
u8 * dst, const Op & op,
|
|
size_t &x, size_t width)
|
|
{
|
|
//do nothing since there couldn't be enough data
|
|
(void)src0;
|
|
(void)src1;
|
|
(void)dst;
|
|
(void)op;
|
|
(void)x;
|
|
(void)width;
|
|
}
|
|
};
|
|
template <typename Op> struct vtail<Op, 2>
|
|
{
|
|
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
|
|
u8 * dst, const Op & op,
|
|
size_t &x, size_t width)
|
|
{
|
|
typedef typename Op::type type;
|
|
typedef typename internal::VecTraits<type>::vec128 vec128;
|
|
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
|
|
//There no more than 15 elements in the tail, so we could handle 8 element vector only once
|
|
if( x + 8 < width)
|
|
{
|
|
vec128 v_src0, v_src1;
|
|
uvec128 v_dst;
|
|
|
|
v_src0 = internal::vld1q(src0 + x);
|
|
v_src1 = internal::vld1q(src1 + x);
|
|
op(v_src0, v_src1, v_dst);
|
|
internal::vst1(dst + x, internal::vmovn(v_dst));
|
|
x+=8;
|
|
}
|
|
}
|
|
};
|
|
template <typename Op> struct vtail<Op, 1>
|
|
{
|
|
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
|
|
u8 * dst, const Op & op,
|
|
size_t &x, size_t width)
|
|
{
|
|
typedef typename Op::type type;
|
|
typedef typename internal::VecTraits<type>::vec128 vec128;
|
|
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
|
|
typedef typename internal::VecTraits<type>::vec64 vec64;
|
|
typedef typename internal::VecTraits<type>::unsign::vec64 uvec64;
|
|
//There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
|
|
if( x + 16 < width)
|
|
{
|
|
vec128 v_src0, v_src1;
|
|
uvec128 v_dst;
|
|
|
|
v_src0 = internal::vld1q(src0 + x);
|
|
v_src1 = internal::vld1q(src1 + x);
|
|
op(v_src0, v_src1, v_dst);
|
|
internal::vst1q(dst + x, v_dst);
|
|
x+=16;
|
|
}
|
|
if( x + 8 < width)
|
|
{
|
|
vec64 v_src0, v_src1;
|
|
uvec64 v_dst;
|
|
|
|
v_src0 = internal::vld1(src0 + x);
|
|
v_src1 = internal::vld1(src1 + x);
|
|
op(v_src0, v_src1, v_dst);
|
|
internal::vst1(dst + x, v_dst);
|
|
x+=8;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename Op>
|
|
void vcompare(Size2D size,
|
|
const typename Op::type * src0Base, ptrdiff_t src0Stride,
|
|
const typename Op::type * src1Base, ptrdiff_t src1Stride,
|
|
u8 * dstBase, ptrdiff_t dstStride, const Op & op)
|
|
{
|
|
typedef typename Op::type type;
|
|
typedef typename internal::VecTraits<type>::vec128 vec128;
|
|
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
|
|
|
|
if (src0Stride == src1Stride && src0Stride == dstStride &&
|
|
src0Stride == (ptrdiff_t)(size.width * sizeof(type)))
|
|
{
|
|
size.width *= size.height;
|
|
size.height = 1;
|
|
}
|
|
|
|
const u32 step_base = 32 / sizeof(type);
|
|
size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
|
|
|
|
for (size_t y = 0; y < size.height; ++y)
|
|
{
|
|
const type * src0 = internal::getRowPtr(src0Base, src0Stride, y);
|
|
const type * src1 = internal::getRowPtr(src1Base, src1Stride, y);
|
|
u8 * dst = internal::getRowPtr(dstBase, dstStride, y);
|
|
size_t x = 0;
|
|
|
|
for( ; x < roiw_base; x += step_base )
|
|
{
|
|
internal::prefetch(src0 + x);
|
|
internal::prefetch(src1 + x);
|
|
|
|
vec128 v_src00 = internal::vld1q(src0 + x), v_src01 = internal::vld1q(src0 + x + 16 / sizeof(type));
|
|
vec128 v_src10 = internal::vld1q(src1 + x), v_src11 = internal::vld1q(src1 + x + 16 / sizeof(type));
|
|
uvec128 v_dst0;
|
|
uvec128 v_dst1;
|
|
|
|
op(v_src00, v_src10, v_dst0);
|
|
op(v_src01, v_src11, v_dst1);
|
|
|
|
vnst(dst + x, v_dst0, v_dst1);
|
|
}
|
|
|
|
vtail<Op, sizeof(type)>::compare(src0, src1, dst, op, x, size.width);
|
|
|
|
for (; x < size.width; ++x)
|
|
{
|
|
op(src0 + x, src1 + x, dst + x);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
struct OpCmpEQ
|
|
{
|
|
typedef T type;
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
|
|
{
|
|
v_dst = internal::vceqq(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
|
|
{
|
|
v_dst = internal::vceq(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const T * src0, const T * src1, u8 * dst) const
|
|
{
|
|
dst[0] = src0[0] == src1[0] ? 255 : 0;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct OpCmpNE
|
|
{
|
|
typedef T type;
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
|
|
{
|
|
v_dst = internal::vmvnq(internal::vceqq(v_src0, v_src1));
|
|
}
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
|
|
{
|
|
v_dst = internal::vmvn(internal::vceq(v_src0, v_src1));
|
|
}
|
|
|
|
void operator() (const T * src0, const T * src1, u8 * dst) const
|
|
{
|
|
dst[0] = src0[0] == src1[0] ? 0 : 255;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct OpCmpGT
|
|
{
|
|
typedef T type;
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
|
|
{
|
|
v_dst = internal::vcgtq(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
|
|
{
|
|
v_dst = internal::vcgt(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const T * src0, const T * src1, u8 * dst) const
|
|
{
|
|
dst[0] = src0[0] > src1[0] ? 255 : 0;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct OpCmpGE
|
|
{
|
|
typedef T type;
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
|
|
{
|
|
v_dst = internal::vcgeq(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
|
|
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
|
|
{
|
|
v_dst = internal::vcge(v_src0, v_src1);
|
|
}
|
|
|
|
void operator() (const T * src0, const T * src1, u8 * dst) const
|
|
{
|
|
dst[0] = src0[0] >= src1[0] ? 255 : 0;
|
|
}
|
|
};
|
|
|
|
}
|
|
|
|
#define IMPL_CMPOP(op, type) \
|
|
void cmp##op(const Size2D &size, \
|
|
const type * src0Base, ptrdiff_t src0Stride, \
|
|
const type * src1Base, ptrdiff_t src1Stride, \
|
|
u8 *dstBase, ptrdiff_t dstStride) \
|
|
{ \
|
|
internal::assertSupportedConfiguration(); \
|
|
vcompare(size, \
|
|
src0Base, src0Stride, \
|
|
src1Base, src1Stride, \
|
|
dstBase, dstStride, \
|
|
OpCmp##op<type>()); \
|
|
}
|
|
|
|
#else
|
|
|
|
#define IMPL_CMPOP(op, type) \
|
|
void cmp##op(const Size2D &size, \
|
|
const type * src0Base, ptrdiff_t src0Stride, \
|
|
const type * src1Base, ptrdiff_t src1Stride, \
|
|
u8 *dstBase, ptrdiff_t dstStride) \
|
|
{ \
|
|
internal::assertSupportedConfiguration(); \
|
|
(void)size; \
|
|
(void)src0Base; \
|
|
(void)src0Stride; \
|
|
(void)src1Base; \
|
|
(void)src1Stride; \
|
|
(void)dstBase; \
|
|
(void)dstStride; \
|
|
}
|
|
|
|
#endif
|
|
|
|
IMPL_CMPOP(EQ, u8)
|
|
IMPL_CMPOP(EQ, s8)
|
|
IMPL_CMPOP(EQ, u16)
|
|
IMPL_CMPOP(EQ, s16)
|
|
IMPL_CMPOP(EQ, u32)
|
|
IMPL_CMPOP(EQ, s32)
|
|
IMPL_CMPOP(EQ, f32)
|
|
|
|
IMPL_CMPOP(NE, u8)
|
|
IMPL_CMPOP(NE, s8)
|
|
IMPL_CMPOP(NE, u16)
|
|
IMPL_CMPOP(NE, s16)
|
|
IMPL_CMPOP(NE, u32)
|
|
IMPL_CMPOP(NE, s32)
|
|
IMPL_CMPOP(NE, f32)
|
|
|
|
IMPL_CMPOP(GT, u8)
|
|
IMPL_CMPOP(GT, s8)
|
|
IMPL_CMPOP(GT, u16)
|
|
IMPL_CMPOP(GT, s16)
|
|
IMPL_CMPOP(GT, u32)
|
|
IMPL_CMPOP(GT, s32)
|
|
IMPL_CMPOP(GT, f32)
|
|
|
|
IMPL_CMPOP(GE, u8)
|
|
IMPL_CMPOP(GE, s8)
|
|
IMPL_CMPOP(GE, u16)
|
|
IMPL_CMPOP(GE, s16)
|
|
IMPL_CMPOP(GE, u32)
|
|
IMPL_CMPOP(GE, s32)
|
|
IMPL_CMPOP(GE, f32)
|
|
|
|
} // namespace CAROTENE_NS
|