/* * By downloading, copying, installing or using the software you agree to this license. * If you do not agree to this license, do not download, install, * copy or use the software. * * * License Agreement * For Open Source Computer Vision Library * (3-clause BSD License) * * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved. * Third party copyrights are property of their respective owners. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * * Neither the names of the copyright holders nor the names of the contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * This software is provided by the copyright holders and contributors "as is" and * any express or implied warranties, including, but not limited to, the implied * warranties of merchantability and fitness for a particular purpose are disclaimed. * In no event shall copyright holders or contributors be liable for any direct, * indirect, incidental, special, exemplary, or consequential damages * (including, but not limited to, procurement of substitute goods or services; * loss of use, data, or profits; or business interruption) however caused * and on any theory of liability, whether in contract, strict liability, * or tort (including negligence or otherwise) arising in any way out of * the use of this software, even if advised of the possibility of such damage. */ #include "common.hpp" #include "vtransform.hpp" namespace CAROTENE_NS { #ifdef CAROTENE_NEON namespace { inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); } inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); } inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); } template struct vtail { static inline void inRange(const T *, const T *, const T *, u8 *, size_t &, size_t) { //do nothing since there couldn't be enough data } }; template struct vtail { static inline void inRange(const T * src, const T * rng1, const T * rng2, u8 * dst, size_t &x, size_t width) { typedef typename internal::VecTraits::vec128 vec128; typedef typename internal::VecTraits::unsign::vec128 uvec128; //There no more than 15 elements in the tail, so we could handle 8 element vector only once if( x + 8 < width) { vec128 vs = internal::vld1q( src + x); vec128 vr1 = internal::vld1q(rng1 + x); vec128 vr2 = internal::vld1q(rng2 + x); uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs)); internal::vst1(dst + x, internal::vmovn(vd)); x+=8; } } }; template struct vtail { static inline void inRange(const T * src, const T * rng1, const T * rng2, u8 * dst, size_t &x, size_t width) { typedef typename internal::VecTraits::vec128 vec128; typedef typename internal::VecTraits::unsign::vec128 uvec128; typedef typename internal::VecTraits::vec64 vec64; typedef typename internal::VecTraits::unsign::vec64 uvec64; //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements if( x + 16 < width) { vec128 vs = internal::vld1q( src + x); vec128 vr1 = internal::vld1q(rng1 + x); vec128 vr2 = internal::vld1q(rng2 + x); uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs)); internal::vst1q(dst + x, vd); x+=16; } if( x + 8 < width) { vec64 vs = internal::vld1( src + x); vec64 vr1 = internal::vld1(rng1 + x); vec64 vr2 = internal::vld1(rng2 + x); uvec64 vd = internal::vand(internal::vcge(vs, vr1), internal::vcge(vr2, vs)); internal::vst1(dst + x, vd); x+=8; } } }; template inline void inRangeCheck(const Size2D &_size, const T * srcBase, ptrdiff_t srcStride, const T * rng1Base, ptrdiff_t rng1Stride, const T * rng2Base, ptrdiff_t rng2Stride, u8 * dstBase, ptrdiff_t dstStride) { typedef typename internal::VecTraits::vec128 vec128; typedef typename internal::VecTraits::unsign::vec128 uvec128; Size2D size(_size); if (srcStride == dstStride && srcStride == rng1Stride && srcStride == rng2Stride && srcStride == (ptrdiff_t)(size.width)) { size.width *= size.height; size.height = 1; } const size_t width = size.width & ~( 32/sizeof(T) - 1 ); for(size_t j = 0; j < size.height; ++j) { const T * src = internal::getRowPtr( srcBase, srcStride, j); const T * rng1 = internal::getRowPtr(rng1Base, rng1Stride, j); const T * rng2 = internal::getRowPtr(rng2Base, rng2Stride, j); u8 * dst = internal::getRowPtr( dstBase, dstStride, j); size_t i = 0; for( ; i < width; i += 32/sizeof(T) ) { internal::prefetch(src + i); internal::prefetch(rng1 + i); internal::prefetch(rng2 + i); vec128 vs = internal::vld1q( src + i); vec128 vr1 = internal::vld1q(rng1 + i); vec128 vr2 = internal::vld1q(rng2 + i); uvec128 vd1 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs)); vs = internal::vld1q( src + i + 16/sizeof(T)); vr1 = internal::vld1q(rng1 + i + 16/sizeof(T)); vr2 = internal::vld1q(rng2 + i + 16/sizeof(T)); uvec128 vd2 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs)); vnst(dst + i, vd1, vd2); } vtail::inRange(src, rng1, rng2, dst, i, size.width); for( ; i < size.width; i++ ) dst[i] = (u8)(-(rng1[i] <= src[i] && src[i] <= rng2[i])); } } } #define INRANGEFUNC(T) \ void inRange(const Size2D &_size, \ const T * srcBase, ptrdiff_t srcStride, \ const T * rng1Base, ptrdiff_t rng1Stride, \ const T * rng2Base, ptrdiff_t rng2Stride, \ u8 * dstBase, ptrdiff_t dstStride) \ { \ internal::assertSupportedConfiguration(); \ inRangeCheck(_size, srcBase, srcStride, \ rng1Base, rng1Stride, rng2Base, rng2Stride, \ dstBase, dstStride); \ } #else #define INRANGEFUNC(T) \ void inRange(const Size2D &, \ const T *, ptrdiff_t, \ const T *, ptrdiff_t, \ const T *, ptrdiff_t, \ u8 *, ptrdiff_t) \ { \ internal::assertSupportedConfiguration(); \ } #endif INRANGEFUNC(u8) INRANGEFUNC(s8) INRANGEFUNC(u16) INRANGEFUNC(s16) INRANGEFUNC(s32) INRANGEFUNC(f32) } // namespace CAROTENE_NS