mirror of
https://github.com/opencv/opencv.git
synced 2024-12-14 17:29:17 +08:00
1341 lines
41 KiB
C++
1341 lines
41 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include "common.hpp"
|
|
#include "vtransform.hpp"
|
|
|
|
#include <limits>
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
#ifdef CAROTENE_NEON
|
|
|
|
namespace {
|
|
|
|
template <typename T>
|
|
void minMaxVals(const Size2D &size,
|
|
const T * srcBase, ptrdiff_t srcStride,
|
|
T * pMinVal, T * pMaxVal)
|
|
{
|
|
using namespace internal;
|
|
|
|
typedef typename VecTraits<T>::vec128 vec128;
|
|
typedef typename VecTraits<T>::vec64 vec64;
|
|
|
|
u32 step_base = 32 / sizeof(T), step_tail = 8 / sizeof(T);
|
|
size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
|
|
size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
|
|
|
|
T maxVal = std::numeric_limits<T>::min();
|
|
T minVal = std::numeric_limits<T>::max();
|
|
vec128 v_min_base = vdupq_n(minVal), v_max_base = vdupq_n(maxVal);
|
|
vec64 v_min_tail = vdup_n(minVal), v_max_tail = vdup_n(maxVal);
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const T * src = getRowPtr(srcBase, srcStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw_base; j += step_base)
|
|
{
|
|
prefetch(src + j);
|
|
vec128 v_src0 = vld1q(src + j), v_src1 = vld1q(src + j + 16 / sizeof(T));
|
|
v_min_base = vminq(v_min_base, v_src0);
|
|
v_max_base = vmaxq(v_max_base, v_src0);
|
|
v_min_base = vminq(v_min_base, v_src1);
|
|
v_max_base = vmaxq(v_max_base, v_src1);
|
|
}
|
|
for (; j < roiw_tail; j += step_tail)
|
|
{
|
|
vec64 v_src0 = vld1(src + j);
|
|
v_min_tail = vmin(v_min_tail, v_src0);
|
|
v_max_tail = vmax(v_max_tail, v_src0);
|
|
}
|
|
|
|
for (; j < size.width; j++)
|
|
{
|
|
T srcval = src[j];
|
|
minVal = std::min(srcval, minVal);
|
|
maxVal = std::max(srcval, maxVal);
|
|
}
|
|
}
|
|
|
|
// collect min & max values
|
|
T ar[16 / sizeof(T)];
|
|
vst1q(ar, vcombine(vmin(v_min_tail, vmin(vget_low(v_min_base), vget_high(v_min_base))),
|
|
vmax(v_max_tail, vmax(vget_low(v_max_base), vget_high(v_max_base)))));
|
|
|
|
for (size_t x = 0; x < 8u / sizeof(T); ++x)
|
|
{
|
|
minVal = std::min(minVal, ar[x]);
|
|
maxVal = std::max(maxVal, ar[x + 8 / sizeof(T)]);
|
|
}
|
|
|
|
if (pMaxVal)
|
|
*pMaxVal = maxVal;
|
|
if (pMinVal)
|
|
*pMinVal = minVal;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
#endif
|
|
|
|
void minMaxVals(const Size2D &size,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
u8 * pMinVal, u8 * pMaxVal)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minMaxVals<u8>(size,
|
|
srcBase, srcStride,
|
|
pMinVal, pMaxVal);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)pMinVal;
|
|
(void)pMaxVal;
|
|
#endif
|
|
}
|
|
|
|
void minMaxVals(const Size2D &size,
|
|
const s16 * srcBase, ptrdiff_t srcStride,
|
|
s16 * pMinVal, s16 * pMaxVal)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minMaxVals<s16>(size,
|
|
srcBase, srcStride,
|
|
pMinVal, pMaxVal);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)pMinVal;
|
|
(void)pMaxVal;
|
|
#endif
|
|
}
|
|
|
|
void minMaxVals(const Size2D &size,
|
|
const u16 * srcBase, ptrdiff_t srcStride,
|
|
u16 * pMinVal, u16 * pMaxVal)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minMaxVals<u16>(size,
|
|
srcBase, srcStride,
|
|
pMinVal, pMaxVal);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)pMinVal;
|
|
(void)pMaxVal;
|
|
#endif
|
|
}
|
|
|
|
void minMaxVals(const Size2D &size,
|
|
const s32 * srcBase, ptrdiff_t srcStride,
|
|
s32 * pMinVal, s32 * pMaxVal)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minMaxVals<s32>(size,
|
|
srcBase, srcStride,
|
|
pMinVal, pMaxVal);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)pMinVal;
|
|
(void)pMaxVal;
|
|
#endif
|
|
}
|
|
|
|
void minMaxVals(const Size2D &size,
|
|
const u32 * srcBase, ptrdiff_t srcStride,
|
|
u32 * pMinVal, u32 * pMaxVal)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minMaxVals<u32>(size,
|
|
srcBase, srcStride,
|
|
pMinVal, pMaxVal);
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)pMinVal;
|
|
(void)pMaxVal;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const f32 * srcBase, ptrdiff_t srcStride,
|
|
f32 &minVal, size_t &minCol, size_t &minRow,
|
|
f32 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
|
|
{
|
|
const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width >= 16)
|
|
{
|
|
u32 tmp0123[4] = { 0, 1, 2, 3 };
|
|
uint32x4_t c4 = vdupq_n_u32(4);
|
|
|
|
#if SIZE_MAX > UINT32_MAX
|
|
size_t boundAll = size.width - (4 - 1);
|
|
for(size_t b = 0; i < boundAll; b = i)
|
|
{
|
|
size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
|
|
#else
|
|
{
|
|
size_t bound = size.width - (4 - 1);
|
|
#endif
|
|
uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
|
|
float32x4_t n_min = vdupq_n_f32(minVal);
|
|
uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
|
|
float32x4_t n_max = vdupq_n_f32(maxVal);
|
|
uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
|
|
|
|
for(; i < bound; i+=4)
|
|
{
|
|
internal::prefetch(src + i);
|
|
float32x4_t line = vld1q_f32(src + i);
|
|
|
|
uint32x4_t minmask = vcltq_f32(line, n_min);
|
|
uint32x4_t maxmask = vcgtq_f32(line, n_max);
|
|
|
|
n_min = vbslq_f32(minmask, line, n_min);
|
|
n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
|
|
n_max = vbslq_f32(maxmask, line, n_max);
|
|
n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
|
|
|
|
// idx[] +=4
|
|
lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
|
|
}
|
|
|
|
f32 fmin[4], fmax[4];
|
|
u32 fminIdx[4], fmaxIdx[4];
|
|
|
|
vst1q_f32(fmin, n_min);
|
|
vst1q_f32(fmax, n_max);
|
|
|
|
vst1q_u32(fminIdx, n_minIdx);
|
|
vst1q_u32(fmaxIdx, n_maxIdx);
|
|
|
|
size_t minIdx = fminIdx[0];
|
|
size_t maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 4; ++j)
|
|
{
|
|
f32 minval = fmin[j];
|
|
f32 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
if(minIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
minCol = b + minIdx;
|
|
#else
|
|
minCol = minIdx;
|
|
#endif
|
|
minRow = l;
|
|
}
|
|
if(maxIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
maxCol = b + maxIdx;
|
|
#else
|
|
maxCol = maxIdx;
|
|
#endif
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
for(; i < size.width; ++i )
|
|
{
|
|
float val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const f32 * srcBase, ptrdiff_t srcStride,
|
|
const u8 * maskBase, ptrdiff_t maskStride,
|
|
f32 &minVal, size_t &minCol, size_t &minRow,
|
|
f32 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = std::numeric_limits<f32>::max();
|
|
minCol = size.width;
|
|
minRow = size.height;
|
|
maxVal = -std::numeric_limits<f32>::max();
|
|
maxCol = size.width;
|
|
maxRow = size.height;
|
|
for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
|
|
{
|
|
const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
const u8 * mask = internal::getRowPtr( maskBase, maskStride, l);
|
|
if (size.width >= 16)
|
|
{
|
|
u32 tmp0123[4] = { 0, 1, 2, 3 };
|
|
uint32x4_t uOne = vdupq_n_u32(1);
|
|
uint32x4_t c4 = vdupq_n_u32(4);
|
|
|
|
#if SIZE_MAX > UINT32_MAX
|
|
size_t boundAll = size.width - (4 - 1);
|
|
for(size_t b = 0; i < boundAll; b = i)
|
|
{
|
|
size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
|
|
#else
|
|
{
|
|
size_t bound = size.width - (4 - 1);
|
|
#endif
|
|
uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
|
|
float32x4_t n_min = vdupq_n_f32(minVal);
|
|
uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
|
|
float32x4_t n_max = vdupq_n_f32(maxVal);
|
|
uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
|
|
|
|
for(; i < bound; i+=4)
|
|
{
|
|
internal::prefetch(src + i);
|
|
internal::prefetch(mask + i);
|
|
float32x4_t line = vld1q_f32(src + i);
|
|
uint8x8_t maskLine = vld1_u8(mask + i);
|
|
|
|
uint32x4_t maskLine4 = vmovl_u16(vget_low_u16(vmovl_u8(maskLine)));
|
|
maskLine4 = vcgeq_u32(maskLine4, uOne);
|
|
|
|
uint32x4_t minmask = vcltq_f32(line, n_min);
|
|
uint32x4_t maxmask = vcgtq_f32(line, n_max);
|
|
|
|
minmask = vandq_u32(minmask, maskLine4);
|
|
maxmask = vandq_u32(maxmask, maskLine4);
|
|
|
|
n_min = vbslq_f32(minmask, line, n_min);
|
|
n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
|
|
n_max = vbslq_f32(maxmask, line, n_max);
|
|
n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
|
|
|
|
// idx[] +=4
|
|
lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
|
|
}
|
|
|
|
f32 fmin[4], fmax[4];
|
|
u32 fminIdx[4], fmaxIdx[4];
|
|
|
|
vst1q_f32(fmin, n_min);
|
|
vst1q_f32(fmax, n_max);
|
|
|
|
vst1q_u32(fminIdx, n_minIdx);
|
|
vst1q_u32(fmaxIdx, n_maxIdx);
|
|
|
|
size_t minIdx = fminIdx[0];
|
|
size_t maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 4; ++j)
|
|
{
|
|
f32 minval = fmin[j];
|
|
f32 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
if(minIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
minCol = b + minIdx;
|
|
#else
|
|
minCol = minIdx;
|
|
#endif
|
|
minRow = l;
|
|
}
|
|
if(maxIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
maxCol = b + maxIdx;
|
|
#else
|
|
maxCol = maxIdx;
|
|
#endif
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
for(; i < size.width; i++ )
|
|
{
|
|
if (!mask[i])
|
|
continue;
|
|
f32 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)maskBase;
|
|
(void)maskStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const s32 * srcBase, ptrdiff_t srcStride,
|
|
s32 &minVal, size_t &minCol, size_t &minRow,
|
|
s32 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
|
|
{
|
|
const s32 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width >= 16)
|
|
{
|
|
u32 tmp0123[4] = { 0, 1, 2, 3 };
|
|
uint32x4_t c4 = vdupq_n_u32(4);
|
|
|
|
#if SIZE_MAX > UINT32_MAX
|
|
size_t boundAll = size.width - (4 - 1);
|
|
for(size_t b = 0; i < boundAll; b = i)
|
|
{
|
|
size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
|
|
#else
|
|
{
|
|
size_t bound = size.width - (4 - 1);
|
|
#endif
|
|
uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
|
|
int32x4_t n_min = vdupq_n_s32(minVal);
|
|
uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
|
|
int32x4_t n_max = vdupq_n_s32(maxVal);
|
|
uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
|
|
|
|
for(; i < bound; i+=4 )
|
|
{
|
|
internal::prefetch(src + i);
|
|
int32x4_t line = vld1q_s32(src + i);
|
|
|
|
uint32x4_t minmask = vcltq_s32(line, n_min);
|
|
uint32x4_t maxmask = vcgtq_s32(line, n_max);
|
|
|
|
n_min = vbslq_s32(minmask, line, n_min);
|
|
n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
|
|
n_max = vbslq_s32(maxmask, line, n_max);
|
|
n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
|
|
|
|
// idx[] +=4
|
|
lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
|
|
}
|
|
|
|
s32 fmin[4], fmax[4];
|
|
u32 fminIdx[4], fmaxIdx[4];
|
|
|
|
vst1q_s32(fmin, n_min);
|
|
vst1q_s32(fmax, n_max);
|
|
|
|
vst1q_u32(fminIdx, n_minIdx);
|
|
vst1q_u32(fmaxIdx, n_maxIdx);
|
|
|
|
size_t minIdx = fminIdx[0];
|
|
size_t maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 4; ++j)
|
|
{
|
|
s32 minval = fmin[j];
|
|
s32 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
if(minIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
minCol = b + minIdx;
|
|
#else
|
|
minCol = minIdx;
|
|
#endif
|
|
minRow = l;
|
|
}
|
|
if(maxIdx < 0xffffFFFC)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
maxCol = b + maxIdx;
|
|
#else
|
|
maxCol = maxIdx;
|
|
#endif
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
for(; i < size.width; ++i )
|
|
{
|
|
s32 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const s16 * srcBase, ptrdiff_t srcStride,
|
|
s16 &minVal, size_t &minCol, size_t &minRow,
|
|
s16 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
|
|
{
|
|
const s16 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width >= 32)
|
|
{
|
|
u32 tmp0123[4] = { 0, 1, 2, 3 };
|
|
uint32x4_t c8 = vdupq_n_u32(8);
|
|
|
|
#if SIZE_MAX > UINT32_MAX
|
|
size_t boundAll = size.width - (8 - 1);
|
|
for(size_t b = 0; i < boundAll; b = i)
|
|
{
|
|
size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
|
|
#else
|
|
{
|
|
size_t bound = size.width - (8 - 1);
|
|
#endif
|
|
uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
|
|
int16x8_t n_min = vdupq_n_s16(minVal);
|
|
uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
|
|
uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
|
|
int16x8_t n_max = vdupq_n_s16(maxVal);
|
|
uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
|
|
uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
|
|
|
|
for(; i < bound; i+=8 )
|
|
{
|
|
internal::prefetch(src + i);
|
|
int16x8_t line = vld1q_s16(src + i);
|
|
|
|
uint16x8_t minmask = vcltq_s16(line, n_min);
|
|
uint16x8_t maxmask = vcgtq_s16(line, n_max);
|
|
|
|
n_min = vbslq_s16(minmask, line, n_min);
|
|
uint16x4_t minml = vget_low_u16(minmask);
|
|
uint16x4_t minmh = vget_high_u16(minmask);
|
|
uint32x4_t minml2 = vmovl_u16(minml);
|
|
uint32x4_t minmh2 = vmovl_u16(minmh);
|
|
minml2 = vqshlq_n_u32(minml2, 31);
|
|
minmh2 = vqshlq_n_u32(minmh2, 31);
|
|
n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
|
|
n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
|
|
|
|
n_max = vbslq_s16(maxmask, line, n_max);
|
|
uint16x4_t maxml = vget_low_u16(maxmask);
|
|
uint16x4_t maxmh = vget_high_u16(maxmask);
|
|
uint32x4_t maxml2 = vmovl_u16(maxml);
|
|
uint32x4_t maxmh2 = vmovl_u16(maxmh);
|
|
maxml2 = vqshlq_n_u32(maxml2, 31);
|
|
maxmh2 = vqshlq_n_u32(maxmh2, 31);
|
|
n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
|
|
n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
|
|
|
|
// idx[] +=8
|
|
lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
|
|
}
|
|
|
|
// fix high part of indexes
|
|
uint32x4_t c4 = vdupq_n_u32((int32_t) 4);
|
|
n_minIdxh = vaddq_u32(n_minIdxh, c4);
|
|
n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
|
|
|
|
s16 fmin[8], fmax[8];
|
|
u32 fminIdx[8], fmaxIdx[8];
|
|
|
|
vst1q_s16(fmin, n_min);
|
|
vst1q_s16(fmax, n_max);
|
|
vst1q_u32(fminIdx+0, n_minIdxl);
|
|
vst1q_u32(fmaxIdx+0, n_maxIdxl);
|
|
vst1q_u32(fminIdx+4, n_minIdxh);
|
|
vst1q_u32(fmaxIdx+4, n_maxIdxh);
|
|
|
|
size_t minIdx = fminIdx[0];
|
|
size_t maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 8; ++j)
|
|
{
|
|
s16 minval = fmin[j];
|
|
s16 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
if(minIdx < 0xffffFFF8)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
minCol = b + minIdx;
|
|
#else
|
|
minCol = minIdx;
|
|
#endif
|
|
minRow = l;
|
|
}
|
|
if(maxIdx < 0xffffFFF8)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
maxCol = b + maxIdx;
|
|
#else
|
|
maxCol = maxIdx;
|
|
#endif
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
for(; i < size.width; ++i )
|
|
{
|
|
short val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const u16 * srcBase, ptrdiff_t srcStride,
|
|
u16 &minVal, size_t &minCol, size_t &minRow,
|
|
u16 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
|
|
{
|
|
const u16 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width >= 32)
|
|
{
|
|
u32 tmp0123[4] = { 0, 1, 2, 3 };
|
|
uint32x4_t c8 = vdupq_n_u32(8);
|
|
|
|
#if SIZE_MAX > UINT32_MAX
|
|
size_t boundAll = size.width - (8 - 1);
|
|
for(size_t b = 0; i < boundAll; b = i)
|
|
{
|
|
size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
|
|
#else
|
|
{
|
|
size_t bound = size.width - (8 - 1);
|
|
#endif
|
|
uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
|
|
uint16x8_t n_min = vdupq_n_u16(minVal);
|
|
uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
|
|
uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
|
|
uint16x8_t n_max = vdupq_n_u16(maxVal);
|
|
uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
|
|
uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
|
|
|
|
for(; i < bound; i+=8 )
|
|
{
|
|
internal::prefetch(src + i);
|
|
uint16x8_t line = vld1q_u16(src + i);
|
|
|
|
uint16x8_t minmask = vcltq_u16(line, n_min);
|
|
uint16x8_t maxmask = vcgtq_u16(line, n_max);
|
|
|
|
n_min = vbslq_u16(minmask, line, n_min);
|
|
uint16x4_t minml = vget_low_u16(minmask);
|
|
uint16x4_t minmh = vget_high_u16(minmask);
|
|
uint32x4_t minml2 = vmovl_u16(minml);
|
|
uint32x4_t minmh2 = vmovl_u16(minmh);
|
|
minml2 = vqshlq_n_u32(minml2, 31);
|
|
minmh2 = vqshlq_n_u32(minmh2, 31);
|
|
n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
|
|
n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
|
|
|
|
n_max = vbslq_u16(maxmask, line, n_max);
|
|
uint16x4_t maxml = vget_low_u16(maxmask);
|
|
uint16x4_t maxmh = vget_high_u16(maxmask);
|
|
uint32x4_t maxml2 = vmovl_u16(maxml);
|
|
uint32x4_t maxmh2 = vmovl_u16(maxmh);
|
|
maxml2 = vqshlq_n_u32(maxml2, 31);
|
|
maxmh2 = vqshlq_n_u32(maxmh2, 31);
|
|
n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
|
|
n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
|
|
|
|
// idx[] +=8
|
|
lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
|
|
}
|
|
|
|
// fix high part of indexes
|
|
uint32x4_t c4 = vdupq_n_u32(4);
|
|
n_minIdxh = vaddq_u32(n_minIdxh, c4);
|
|
n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
|
|
|
|
u16 fmin[8], fmax[8];
|
|
u32 fminIdx[8], fmaxIdx[8];
|
|
|
|
vst1q_u16(fmin, n_min);
|
|
vst1q_u16(fmax, n_max);
|
|
vst1q_u32(fminIdx+0, n_minIdxl);
|
|
vst1q_u32(fmaxIdx+0, n_maxIdxl);
|
|
vst1q_u32(fminIdx+4, n_minIdxh);
|
|
vst1q_u32(fmaxIdx+4, n_maxIdxh);
|
|
|
|
size_t minIdx = fminIdx[0];
|
|
size_t maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 8; ++j)
|
|
{
|
|
u16 minval = fmin[j];
|
|
u16 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
if(minIdx < 0xffffFFF8)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
minCol = b + minIdx;
|
|
#else
|
|
minCol = minIdx;
|
|
#endif
|
|
minRow = l;
|
|
}
|
|
if(maxIdx < 0xffffFFF8)
|
|
{
|
|
#if SIZE_MAX > UINT32_MAX
|
|
maxCol = b + maxIdx;
|
|
#else
|
|
maxCol = maxIdx;
|
|
#endif
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
for(; i < size.width; ++i )
|
|
{
|
|
u16 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
#ifdef CAROTENE_NEON
|
|
namespace {
|
|
|
|
void minMaxLocBlock(const u8 * src, u32 len,
|
|
u8 &minVal, u16 &minIdx,
|
|
u8 &maxVal, u16 &maxIdx)
|
|
{
|
|
u16 tmp0123[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
|
|
|
|
uint8x16_t n_min = vdupq_n_u8(src[0]);
|
|
uint16x8_t n_minIdxl = vdupq_n_u16(0);
|
|
uint16x8_t n_minIdxh = vdupq_n_u16(0);
|
|
uint8x16_t n_max = vdupq_n_u8(src[0]);
|
|
uint16x8_t n_maxIdxl = vdupq_n_u16(0);
|
|
uint16x8_t n_maxIdxh = vdupq_n_u16(0);
|
|
uint16x8_t c16 = vdupq_n_u16(16);
|
|
uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
|
|
|
|
s32 i = 0;
|
|
s32 bound = len - (16 - 1);
|
|
for(; i < bound; i+=16 )
|
|
{
|
|
internal::prefetch(src + i);
|
|
uint8x16_t line = vld1q_u8(src + i);
|
|
|
|
uint8x16_t minmask = vcltq_u8(line, n_min);
|
|
uint8x16_t maxmask = vcgtq_u8(line, n_max);
|
|
|
|
n_min = vbslq_u8(minmask, line, n_min);
|
|
uint8x8_t minml = vget_low_u8(minmask);
|
|
uint8x8_t minmh = vget_high_u8(minmask);
|
|
uint16x8_t minml2 = vmovl_u8(minml);
|
|
uint16x8_t minmh2 = vmovl_u8(minmh);
|
|
minml2 = vqshlq_n_u16(minml2, 15);
|
|
minmh2 = vqshlq_n_u16(minmh2, 15);
|
|
n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
|
|
n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
|
|
|
|
n_max = vbslq_u8(maxmask, line, n_max);
|
|
uint8x8_t maxml = vget_low_u8(maxmask);
|
|
uint8x8_t maxmh = vget_high_u8(maxmask);
|
|
uint16x8_t maxml2 = vmovl_u8(maxml);
|
|
uint16x8_t maxmh2 = vmovl_u8(maxmh);
|
|
maxml2 = vqshlq_n_u16(maxml2, 15);
|
|
maxmh2 = vqshlq_n_u16(maxmh2, 15);
|
|
n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
|
|
n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
|
|
|
|
// idx[] +=16
|
|
lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
|
|
}
|
|
|
|
// fix high part of indexes
|
|
uint16x8_t c8 = vdupq_n_u16(8);
|
|
n_minIdxh = vaddq_u16(n_minIdxh, c8);
|
|
n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
|
|
|
|
u8 fmin[16], fmax[16];
|
|
u16 fminIdx[16], fmaxIdx[16];
|
|
/*{
|
|
uint8x8_t min_low = vget_low_u8(n_min);
|
|
uint8x8_t min_high = vget_high_u8(n_min);
|
|
uint8x8_t max_low = vget_low_u8(n_max);
|
|
uint8x8_t max_high = vget_high_u8(n_max);
|
|
|
|
uint8x8_t minmask = vclt_u8(min_low, min_high);
|
|
uint8x8_t maxmask = vcgt_u8(max_low, max_high);
|
|
|
|
uint8x8_t min2 = vbsl_u8(minmask, min_low, min_high);
|
|
uint8x8_t max2 = vbsl_u8(maxmask, max_low, max_high);
|
|
|
|
uint16x8_t minidxmask = vmovl_u8(minmask);
|
|
uint16x8_t maxidxmask = vmovl_u8(maxmask);
|
|
minidxmask = vqshlq_n_u16(minidxmask, 15);
|
|
maxidxmask = vqshlq_n_u16(maxidxmask, 15);
|
|
|
|
uint16x8_t n_minIdx = vbslq_u16(minidxmask, n_minIdxl, n_minIdxh);
|
|
uint16x8_t n_maxIdx = vbslq_u16(maxidxmask, n_maxIdxl, n_maxIdxh);
|
|
|
|
vst1_u8((uint8_t*)fmin, min2);
|
|
vst1_u8((uint8_t*)fmax, max2);
|
|
|
|
vst1q_u16((uint16_t*)(fminIdx), n_minIdx);
|
|
vst1q_u16((uint16_t*)(fmaxIdx), n_maxIdx);
|
|
}*/
|
|
|
|
vst1q_u8(fmin, n_min);
|
|
vst1q_u8(fmax, n_max);
|
|
vst1q_u16(fminIdx+0, n_minIdxl);
|
|
vst1q_u16(fmaxIdx+0, n_maxIdxl);
|
|
vst1q_u16(fminIdx+8, n_minIdxh);
|
|
vst1q_u16(fmaxIdx+8, n_maxIdxh);
|
|
|
|
minIdx = fminIdx[0];
|
|
maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 16; ++j)
|
|
{
|
|
u8 minval = fmin[j];
|
|
u8 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
|
|
for(; i < (s32)len; ++i )
|
|
{
|
|
u8 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minIdx = (u16)i;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxIdx = (u16)i;
|
|
}
|
|
}
|
|
}
|
|
|
|
void minMaxLocBlock(const s8 * src, u32 len,
|
|
s8 &minVal, u16 &minIdx,
|
|
s8 &maxVal, u16 &maxIdx)
|
|
{
|
|
u16 tmp0123[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
|
|
|
int8x16_t n_min = vdupq_n_s8(src[0]);
|
|
uint16x8_t n_minIdxl = vdupq_n_u16(0);
|
|
uint16x8_t n_minIdxh = vdupq_n_u16(0);
|
|
int8x16_t n_max = vdupq_n_s8(src[0]);
|
|
uint16x8_t n_maxIdxl = vdupq_n_u16(0);
|
|
uint16x8_t n_maxIdxh = vdupq_n_u16(0);
|
|
uint16x8_t c16 = vdupq_n_u16(16);
|
|
uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
|
|
|
|
s32 i = 0;
|
|
s32 bound = len - (16 - 1);
|
|
for(; i < bound; i+=16 )
|
|
{
|
|
internal::prefetch(src + i);
|
|
int8x16_t line = vld1q_s8(src + i);
|
|
|
|
uint8x16_t minmask = vcltq_s8(line, n_min);
|
|
uint8x16_t maxmask = vcgtq_s8(line, n_max);
|
|
|
|
n_min = vbslq_s8(minmask, line, n_min);
|
|
uint8x8_t minml = vget_low_u8(minmask);
|
|
uint8x8_t minmh = vget_high_u8(minmask);
|
|
uint16x8_t minml2 = vmovl_u8(minml);
|
|
uint16x8_t minmh2 = vmovl_u8(minmh);
|
|
minml2 = vqshlq_n_u16(minml2, 15);
|
|
minmh2 = vqshlq_n_u16(minmh2, 15);
|
|
n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
|
|
n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
|
|
|
|
n_max = vbslq_s8(maxmask, line, n_max);
|
|
uint8x8_t maxml = vget_low_u8(maxmask);
|
|
uint8x8_t maxmh = vget_high_u8(maxmask);
|
|
uint16x8_t maxml2 = vmovl_u8(maxml);
|
|
uint16x8_t maxmh2 = vmovl_u8(maxmh);
|
|
maxml2 = vqshlq_n_u16(maxml2, 15);
|
|
maxmh2 = vqshlq_n_u16(maxmh2, 15);
|
|
n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
|
|
n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
|
|
|
|
// idx[] +=16
|
|
lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
|
|
}
|
|
|
|
// fix high part of indexes
|
|
uint16x8_t c8 = vdupq_n_u16(8);
|
|
n_minIdxh = vaddq_u16(n_minIdxh, c8);
|
|
n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
|
|
|
|
s8 fmin[16], fmax[16];
|
|
u16 fminIdx[16], fmaxIdx[16];
|
|
|
|
vst1q_s8(fmin, n_min);
|
|
vst1q_s8(fmax, n_max);
|
|
vst1q_u16(fminIdx+0, n_minIdxl);
|
|
vst1q_u16(fmaxIdx+0, n_maxIdxl);
|
|
vst1q_u16(fminIdx+8, n_minIdxh);
|
|
vst1q_u16(fmaxIdx+8, n_maxIdxh);
|
|
|
|
minIdx = fminIdx[0];
|
|
maxIdx = fmaxIdx[0];
|
|
minVal = fmin[0];
|
|
maxVal = fmax[0];
|
|
|
|
for (s32 j = 1; j < 16; ++j)
|
|
{
|
|
s8 minval = fmin[j];
|
|
s8 maxval = fmax[j];
|
|
if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
|
|
{
|
|
minIdx = fminIdx[j];
|
|
minVal = minval;
|
|
}
|
|
if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
|
|
{
|
|
maxIdx = fmaxIdx[j];
|
|
maxVal = maxval;
|
|
}
|
|
}
|
|
|
|
for(; i < (s32)len; ++i )
|
|
{
|
|
s8 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minIdx = (u16)i;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxIdx = (u16)i;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
#endif // CAROTENE_NEON
|
|
|
|
#define USHORT_BLOCK_MAX_SIZE (1 << 16)
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
u8 &minVal, size_t &minCol, size_t &minRow,
|
|
u8 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0; l < size.height; ++l)
|
|
{
|
|
const u8 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width > 128)
|
|
{
|
|
for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
|
|
{
|
|
u8 locMinVal, locMaxVal;
|
|
u16 locMinIdx, locMaxIdx;
|
|
size_t tail = size.width - blockStart;
|
|
minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
|
|
locMinVal, locMinIdx, locMaxVal, locMaxIdx);
|
|
|
|
if (locMinVal == 0 && locMaxVal == 255)
|
|
{
|
|
minCol = blockStart + locMinIdx;
|
|
maxCol = blockStart + locMaxIdx;
|
|
minRow = l;
|
|
maxRow = l;
|
|
minVal = 0;
|
|
maxVal = 255;
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
if (locMinVal < minVal)
|
|
{
|
|
minCol = blockStart + locMinIdx;
|
|
minRow = l;
|
|
minVal = locMinVal;
|
|
}
|
|
if (locMaxVal > maxVal)
|
|
{
|
|
maxCol = blockStart + locMaxIdx;
|
|
maxRow = l;
|
|
maxVal = locMaxVal;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(size_t i = 0; i < size.width; ++i )
|
|
{
|
|
u8 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minCol = i;
|
|
minRow = l;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxCol = i;
|
|
maxRow = l;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
void minMaxLoc(const Size2D &size,
|
|
const s8 * srcBase, ptrdiff_t srcStride,
|
|
s8 &minVal, size_t &minCol, size_t &minRow,
|
|
s8 &maxVal, size_t &maxCol, size_t &maxRow)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
minVal = srcBase[0];
|
|
minCol = 0;
|
|
minRow = 0;
|
|
maxVal = srcBase[0];
|
|
maxCol = 0;
|
|
maxRow = 0;
|
|
for(size_t l = 0; l < size.height; ++l)
|
|
{
|
|
const s8 * src = internal::getRowPtr( srcBase, srcStride, l);
|
|
if (size.width > 128)
|
|
{
|
|
for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
|
|
{
|
|
s8 locMinVal, locMaxVal;
|
|
u16 locMinIdx, locMaxIdx;
|
|
size_t tail = size.width - blockStart;
|
|
minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
|
|
locMinVal, locMinIdx, locMaxVal, locMaxIdx);
|
|
|
|
if (locMinVal == -128 && locMaxVal == 127)
|
|
{
|
|
minCol = blockStart + locMinIdx;
|
|
maxCol = blockStart + locMaxIdx;
|
|
minRow = l;
|
|
maxRow = l;
|
|
minVal = -128;
|
|
maxVal = 127;
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
if (locMinVal < minVal)
|
|
{
|
|
minCol = blockStart + locMinIdx;
|
|
minRow = l;
|
|
minVal = locMinVal;
|
|
}
|
|
if (locMaxVal > maxVal)
|
|
{
|
|
maxCol = blockStart + locMaxIdx;
|
|
maxRow = l;
|
|
maxVal = locMaxVal;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(size_t i = 0; i < size.width; ++i )
|
|
{
|
|
s8 val = src[i];
|
|
if( val < minVal )
|
|
{
|
|
minVal = val;
|
|
minRow = l;
|
|
minCol = i;
|
|
}
|
|
else if( val > maxVal )
|
|
{
|
|
maxVal = val;
|
|
maxRow = l;
|
|
maxCol = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)minVal;
|
|
(void)minCol;
|
|
(void)minRow;
|
|
(void)maxVal;
|
|
(void)maxCol;
|
|
(void)maxRow;
|
|
#endif
|
|
}
|
|
|
|
} // namespace CAROTENE_NS
|