mirror of
https://github.com/opencv/opencv.git
synced 2025-08-01 10:26:53 +08:00
1628 lines
53 KiB
C++
1628 lines
53 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include "common.hpp"
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold, u8 trueValue, u8 falseValue)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
if(trueValue == 255 && falseValue == 0)
|
|
{
|
|
for (size_t i = 0; i < size.height; ++i) {
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32) {
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcgtq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcgtq_u8(v1, vthreshold);
|
|
vst1q_u8(dst + j, r0);
|
|
vst1q_u8(dst + j + 16, r1);
|
|
}
|
|
for (; j < roiw8; j += 8) {
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcgt_u8(v0, vthreshold8);
|
|
vst1_u8(dst + j, r0);
|
|
}
|
|
|
|
for (; j < size.width; j++) {
|
|
*(dst + j) = *(src + j) > threshold ? 255 : 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint8x16_t vtrue_value = vdupq_n_u8(trueValue);
|
|
uint8x8_t vtrue_value8 = vdup_n_u8(trueValue);
|
|
uint8x16_t vfalse_value = vdupq_n_u8(falseValue);
|
|
uint8x8_t vfalse_value8 = vdup_n_u8(falseValue);
|
|
|
|
for (size_t i = 0; i < size.height; ++i) {
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32) {
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcgtq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcgtq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vbslq_u8(r0, vtrue_value, vfalse_value);
|
|
uint8x16_t r1a = vbslq_u8(r1, vtrue_value, vfalse_value);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8) {
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcgt_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vbsl_u8(r0, vtrue_value8, vfalse_value8);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
|
|
for (; j < size.width; j++) {
|
|
*(dst + j) = *(src + j) > threshold ? trueValue : falseValue;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)trueValue;
|
|
(void)falseValue;
|
|
#endif
|
|
}
|
|
|
|
void thresholdRange(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 lowerThreshold, u8 upperThreshold,
|
|
u8 trueValue, u8 falseValue)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t v_lower = vdupq_n_u8(lowerThreshold), v_upper = vdupq_n_u8(upperThreshold);
|
|
uint8x8_t v_lower8 = vdup_n_u8(lowerThreshold), v_upper8 = vdup_n_u8(upperThreshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
if(trueValue == 255 && falseValue == 0)
|
|
{
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v_src0 = vld1q_u8(src + j), v_src1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t v_dst0 = vandq_u8(vcgeq_u8(v_src0, v_lower), vcleq_u8(v_src0, v_upper));
|
|
uint8x16_t v_dst1 = vandq_u8(vcgeq_u8(v_src1, v_lower), vcleq_u8(v_src1, v_upper));
|
|
vst1q_u8(dst + j, v_dst0);
|
|
vst1q_u8(dst + j + 16, v_dst1);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v_src = vld1_u8(src + j);
|
|
uint8x8_t v_dst = vand_u8(vcge_u8(v_src, v_lower8), vcle_u8(v_src, v_upper8));
|
|
vst1_u8(dst + j, v_dst);
|
|
}
|
|
|
|
for (; j < size.width; j++)
|
|
{
|
|
u8 srcVal = src[j];
|
|
dst[j] = lowerThreshold <= srcVal && srcVal <= upperThreshold ? 255 : 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint8x16_t vtrue_value = vdupq_n_u8(trueValue);
|
|
uint8x8_t vtrue_value8 = vdup_n_u8(trueValue);
|
|
uint8x16_t vfalse_value = vdupq_n_u8(falseValue);
|
|
uint8x8_t vfalse_value8 = vdup_n_u8(falseValue);
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v_src0 = vld1q_u8(src + j), v_src1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t v_dst0 = vandq_u8(vcgeq_u8(v_src0, v_lower), vcleq_u8(v_src0, v_upper));
|
|
uint8x16_t v_dst1 = vandq_u8(vcgeq_u8(v_src1, v_lower), vcleq_u8(v_src1, v_upper));
|
|
v_dst0 = vbslq_u8(v_dst0, vtrue_value, vfalse_value);
|
|
v_dst1 = vbslq_u8(v_dst1, vtrue_value, vfalse_value);
|
|
vst1q_u8(dst + j, v_dst0);
|
|
vst1q_u8(dst + j + 16, v_dst1);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v_src = vld1_u8(src + j);
|
|
uint8x8_t v_dst = vand_u8(vcge_u8(v_src, v_lower8), vcle_u8(v_src, v_upper8));
|
|
v_dst = vbsl_u8(v_dst, vtrue_value8, vfalse_value8);
|
|
vst1_u8(dst + j, v_dst);
|
|
}
|
|
|
|
for (; j < size.width; j++)
|
|
{
|
|
u8 srcVal = src[j];
|
|
dst[j] = lowerThreshold <= srcVal && srcVal <= upperThreshold ? trueValue : falseValue;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)lowerThreshold;
|
|
(void)upperThreshold;
|
|
(void)trueValue;
|
|
(void)falseValue;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold, u8 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x16_t vvalue = vdupq_n_u8(value);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
uint8x8_t vvalue8 = vdup_n_u8(value);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcgtq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcgtq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vandq_u8(r0, vvalue);
|
|
uint8x16_t r1a = vandq_u8(r1, vvalue);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcgt_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vand_u8(r0, vvalue8);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold, u8 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x16_t vvalue = vdupq_n_u8(value);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
uint8x8_t vvalue8 = vdup_n_u8(value);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcleq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcleq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vandq_u8(r0, vvalue);
|
|
uint8x16_t r1a = vandq_u8(r1, vvalue);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcle_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vand_u8(r0, vvalue8);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vqsubq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vqsubq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vqsubq_u8(v0, r0);
|
|
uint8x16_t r1a = vqsubq_u8(v1, r1);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vqsub_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vqsub_u8(v0, r0);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcgtq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcgtq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vandq_u8(v0, r0);
|
|
uint8x16_t r1a = vandq_u8(v1, r1);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcgt_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vand_u8(v0, r0);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const u8 *srcBase, ptrdiff_t srcStride,
|
|
u8 *dstBase, ptrdiff_t dstStride,
|
|
u8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint8x16_t vthreshold = vdupq_n_u8(threshold);
|
|
uint8x8_t vthreshold8 = vdup_n_u8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint8x16_t v0 = vld1q_u8(src + j);
|
|
uint8x16_t v1 = vld1q_u8(src + j + 16);
|
|
uint8x16_t r0 = vcgtq_u8(v0, vthreshold);
|
|
uint8x16_t r1 = vcgtq_u8(v1, vthreshold);
|
|
uint8x16_t r0a = vbicq_u8(v0, r0);
|
|
uint8x16_t r1a = vbicq_u8(v1, r1);
|
|
vst1q_u8(dst + j, r0a);
|
|
vst1q_u8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
uint8x8_t v0 = vld1_u8(src + j);
|
|
uint8x8_t r0 = vcgt_u8(v0, vthreshold8);
|
|
uint8x8_t r0a = vbic_u8(v0, r0);
|
|
vst1_u8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const s8 *srcBase, ptrdiff_t srcStride,
|
|
s8 *dstBase, ptrdiff_t dstStride,
|
|
s8 threshold, s8 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int8x16_t vthreshold = vdupq_n_s8(threshold);
|
|
int8x16_t vvalue = vdupq_n_s8(value);
|
|
int8x8_t vthreshold8 = vdup_n_s8(threshold);
|
|
int8x8_t vvalue8 = vdup_n_s8(value);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int8x16_t v0 = vld1q_s8(src + j);
|
|
int8x16_t v1 = vld1q_s8(src + j + 16);
|
|
int8x16_t r0 = vreinterpretq_s8_u8(vcgtq_s8(v0, vthreshold));
|
|
int8x16_t r1 = vreinterpretq_s8_u8(vcgtq_s8(v1, vthreshold));
|
|
int8x16_t r0a = vandq_s8(r0, vvalue);
|
|
int8x16_t r1a = vandq_s8(r1, vvalue);
|
|
vst1q_s8(dst + j, r0a);
|
|
vst1q_s8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
int8x8_t v0 = vld1_s8(src + j);
|
|
int8x8_t r0 = vreinterpret_s8_u8(vcgt_s8(v0, vthreshold8));
|
|
int8x8_t r0a = vand_s8(r0, vvalue8);
|
|
vst1_s8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const s8 *srcBase, ptrdiff_t srcStride,
|
|
s8 *dstBase, ptrdiff_t dstStride,
|
|
s8 threshold, s8 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int8x16_t vthreshold = vdupq_n_s8(threshold);
|
|
int8x16_t vvalue = vdupq_n_s8(value);
|
|
int8x8_t vthreshold8 = vdup_n_s8(threshold);
|
|
int8x8_t vvalue8 = vdup_n_s8(value);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int8x16_t v0 = vld1q_s8(src + j);
|
|
int8x16_t v1 = vld1q_s8(src + j + 16);
|
|
int8x16_t r0 = vreinterpretq_s8_u8(vcleq_s8(v0, vthreshold));
|
|
int8x16_t r1 = vreinterpretq_s8_u8(vcleq_s8(v1, vthreshold));
|
|
int8x16_t r0a = vandq_s8(r0, vvalue);
|
|
int8x16_t r1a = vandq_s8(r1, vvalue);
|
|
vst1q_s8(dst + j, r0a);
|
|
vst1q_s8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
int8x8_t v0 = vld1_s8(src + j);
|
|
int8x8_t r0 = vreinterpret_s8_u8(vcle_s8(v0, vthreshold8));
|
|
int8x8_t r0a = vand_s8(r0, vvalue8);
|
|
vst1_s8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const s8 *srcBase, ptrdiff_t srcStride,
|
|
s8 *dstBase, ptrdiff_t dstStride,
|
|
s8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int8x16_t vthreshold = vdupq_n_s8(threshold);
|
|
int8x8_t vthreshold8 = vdup_n_s8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int8x16_t v0 = vld1q_s8(src + j);
|
|
int8x16_t v1 = vld1q_s8(src + j + 16);
|
|
int8x16_t r0 = vqsubq_s8(v0, vthreshold);
|
|
int8x16_t r1 = vqsubq_s8(v1, vthreshold);
|
|
int8x16_t r0a = vqsubq_s8(v0, r0);
|
|
int8x16_t r1a = vqsubq_s8(v1, r1);
|
|
vst1q_s8(dst + j, r0a);
|
|
vst1q_s8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
int8x8_t v0 = vld1_s8(src + j);
|
|
int8x8_t r0 = vqsub_s8(v0, vthreshold8);
|
|
int8x8_t r0a = vqsub_s8(v0, r0);
|
|
vst1_s8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const s8 *srcBase, ptrdiff_t srcStride,
|
|
s8 *dstBase, ptrdiff_t dstStride,
|
|
s8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int8x16_t vthreshold = vdupq_n_s8(threshold);
|
|
int8x8_t vthreshold8 = vdup_n_s8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int8x16_t v0 = vld1q_s8(src + j);
|
|
int8x16_t v1 = vld1q_s8(src + j + 16);
|
|
int8x16_t r0 = vreinterpretq_s8_u8(vcgtq_s8(v0, vthreshold));
|
|
int8x16_t r1 = vreinterpretq_s8_u8(vcgtq_s8(v1, vthreshold));
|
|
int8x16_t r0a = vandq_s8(v0, r0);
|
|
int8x16_t r1a = vandq_s8(v1, r1);
|
|
vst1q_s8(dst + j, r0a);
|
|
vst1q_s8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
int8x8_t v0 = vld1_s8(src + j);
|
|
int8x8_t r0 = vreinterpret_s8_u8(vcgt_s8(v0, vthreshold8));
|
|
int8x8_t r0a = vand_s8(v0, r0);
|
|
vst1_s8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const s8 *srcBase, ptrdiff_t srcStride,
|
|
s8 *dstBase, ptrdiff_t dstStride,
|
|
s8 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int8x16_t vthreshold = vdupq_n_s8(threshold);
|
|
int8x8_t vthreshold8 = vdup_n_s8(threshold);
|
|
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s8* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s8* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw32; j += 32)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int8x16_t v0 = vld1q_s8(src + j);
|
|
int8x16_t v1 = vld1q_s8(src + j + 16);
|
|
int8x16_t r0 = vreinterpretq_s8_u8(vcgtq_s8(v0, vthreshold));
|
|
int8x16_t r1 = vreinterpretq_s8_u8(vcgtq_s8(v1, vthreshold));
|
|
int8x16_t r0a = vbicq_s8(v0, r0);
|
|
int8x16_t r1a = vbicq_s8(v1, r1);
|
|
vst1q_s8(dst + j, r0a);
|
|
vst1q_s8(dst + j + 16, r1a);
|
|
}
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
int8x8_t v0 = vld1_s8(src + j);
|
|
int8x8_t r0 = vreinterpret_s8_u8(vcgt_s8(v0, vthreshold8));
|
|
int8x8_t r0a = vbic_s8(v0, r0);
|
|
vst1_s8(dst + j, r0a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const s16 *srcBase, ptrdiff_t srcStride,
|
|
s16 *dstBase, ptrdiff_t dstStride,
|
|
s16 threshold, s16 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int16x8_t vthreshold16 = vdupq_n_s16(threshold);
|
|
int16x8_t vvalue16 = vdupq_n_s16(value);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int16x8_t v0 = vld1q_s16(src + j);
|
|
int16x8_t v1 = vld1q_s16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_s16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_s16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(r0, vreinterpretq_u16_s16(vvalue16));
|
|
uint16x8_t r1a = vandq_u16(r1, vreinterpretq_u16_s16(vvalue16));
|
|
vst1q_u16((u16*)dst + j, r0a);
|
|
vst1q_u16((u16*)dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const s16 *srcBase, ptrdiff_t srcStride,
|
|
s16 *dstBase, ptrdiff_t dstStride,
|
|
s16 threshold, s16 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int16x8_t vthreshold16 = vdupq_n_s16(threshold);
|
|
int16x8_t vvalue16 = vdupq_n_s16(value);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int16x8_t v0 = vld1q_s16(src + j);
|
|
int16x8_t v1 = vld1q_s16(src + j + 8);
|
|
uint16x8_t r0 = vcleq_s16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcleq_s16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(r0, vreinterpretq_u16_s16(vvalue16));
|
|
uint16x8_t r1a = vandq_u16(r1, vreinterpretq_u16_s16(vvalue16));
|
|
vst1q_s16(dst + j, vreinterpretq_s16_u16(r0a));
|
|
vst1q_s16(dst + j + 8, vreinterpretq_s16_u16(r1a));
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const s16 *srcBase, ptrdiff_t srcStride,
|
|
s16 *dstBase, ptrdiff_t dstStride,
|
|
s16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int16x8_t vthreshold16 = vdupq_n_s16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int16x8_t v0 = vld1q_s16(src + j);
|
|
int16x8_t v1 = vld1q_s16(src + j + 8);
|
|
int16x8_t r0 = vminq_s16(v0, vthreshold16);
|
|
int16x8_t r1 = vminq_s16(v1, vthreshold16);
|
|
vst1q_s16(dst + j, r0);
|
|
vst1q_s16(dst + j + 8, r1);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const s16 *srcBase, ptrdiff_t srcStride,
|
|
s16 *dstBase, ptrdiff_t dstStride,
|
|
s16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int16x8_t vthreshold16 = vdupq_n_s16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int16x8_t v0 = vld1q_s16(src + j);
|
|
int16x8_t v1 = vld1q_s16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_s16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_s16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(vreinterpretq_u16_s16(v0), r0);
|
|
uint16x8_t r1a = vandq_u16(vreinterpretq_u16_s16(v1), r1);
|
|
vst1q_u16((u16*)dst + j, r0a);
|
|
vst1q_u16((u16*)dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const s16 *srcBase, ptrdiff_t srcStride,
|
|
s16 *dstBase, ptrdiff_t dstStride,
|
|
s16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int16x8_t vthreshold16 = vdupq_n_s16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int16x8_t v0 = vld1q_s16(src + j);
|
|
int16x8_t v1 = vld1q_s16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_s16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_s16(v1, vthreshold16);
|
|
uint16x8_t r0a = vbicq_u16(vreinterpretq_u16_s16(v0), r0);
|
|
uint16x8_t r1a = vbicq_u16(vreinterpretq_u16_s16(v1), r1);
|
|
vst1q_u16((u16*)dst + j, r0a);
|
|
vst1q_u16((u16*)dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const u16 *srcBase, ptrdiff_t srcStride,
|
|
u16 *dstBase, ptrdiff_t dstStride,
|
|
u16 threshold, u16 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint16x8_t vthreshold16 = vdupq_n_u16(threshold);
|
|
uint16x8_t vvalue16 = vdupq_n_u16(value);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint16x8_t v0 = vld1q_u16(src + j);
|
|
uint16x8_t v1 = vld1q_u16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_u16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_u16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(r0, vvalue16);
|
|
uint16x8_t r1a = vandq_u16(r1, vvalue16);
|
|
vst1q_u16(dst + j, r0a);
|
|
vst1q_u16(dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const u16 *srcBase, ptrdiff_t srcStride,
|
|
u16 *dstBase, ptrdiff_t dstStride,
|
|
u16 threshold, u16 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint16x8_t vthreshold16 = vdupq_n_u16(threshold);
|
|
uint16x8_t vvalue16 = vdupq_n_u16(value);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint16x8_t v0 = vld1q_u16(src + j);
|
|
uint16x8_t v1 = vld1q_u16(src + j + 8);
|
|
uint16x8_t r0 = vcleq_u16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcleq_u16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(r0, vvalue16);
|
|
uint16x8_t r1a = vandq_u16(r1, vvalue16);
|
|
vst1q_u16(dst + j, r0a);
|
|
vst1q_u16(dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const u16 *srcBase, ptrdiff_t srcStride,
|
|
u16 *dstBase, ptrdiff_t dstStride,
|
|
u16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint16x8_t vthreshold16 = vdupq_n_u16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint16x8_t v0 = vld1q_u16(src + j);
|
|
uint16x8_t v1 = vld1q_u16(src + j + 8);
|
|
uint16x8_t r0 = vminq_u16(v0, vthreshold16);
|
|
uint16x8_t r1 = vminq_u16(v1, vthreshold16);
|
|
vst1q_u16(dst + j, r0);
|
|
vst1q_u16(dst + j + 8, r1);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const u16 *srcBase, ptrdiff_t srcStride,
|
|
u16 *dstBase, ptrdiff_t dstStride,
|
|
u16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint16x8_t vthreshold16 = vdupq_n_u16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint16x8_t v0 = vld1q_u16(src + j);
|
|
uint16x8_t v1 = vld1q_u16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_u16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_u16(v1, vthreshold16);
|
|
uint16x8_t r0a = vandq_u16(v0, r0);
|
|
uint16x8_t r1a = vandq_u16(v1, r1);
|
|
vst1q_u16(dst + j, r0a);
|
|
vst1q_u16(dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const u16 *srcBase, ptrdiff_t srcStride,
|
|
u16 *dstBase, ptrdiff_t dstStride,
|
|
u16 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
uint16x8_t vthreshold16 = vdupq_n_u16(threshold);
|
|
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const u16* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
u16* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw16; j += 16)
|
|
{
|
|
internal::prefetch(src + j);
|
|
uint16x8_t v0 = vld1q_u16(src + j);
|
|
uint16x8_t v1 = vld1q_u16(src + j + 8);
|
|
uint16x8_t r0 = vcgtq_u16(v0, vthreshold16);
|
|
uint16x8_t r1 = vcgtq_u16(v1, vthreshold16);
|
|
uint16x8_t r0a = vbicq_u16(v0, r0);
|
|
uint16x8_t r1a = vbicq_u16(v1, r1);
|
|
vst1q_u16(dst + j, r0a);
|
|
vst1q_u16(dst + j + 8, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const s32 *srcBase, ptrdiff_t srcStride,
|
|
s32 *dstBase, ptrdiff_t dstStride,
|
|
s32 threshold, s32 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int32x4_t vthreshold8 = vdupq_n_s32(threshold);
|
|
int32x4_t vvalue8 = vdupq_n_s32(value);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int32x4_t v0 = vld1q_s32(src + j);
|
|
int32x4_t v1 = vld1q_s32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_s32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_s32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(r0, vreinterpretq_u32_s32(vvalue8));
|
|
uint32x4_t r1a = vandq_u32(r1, vreinterpretq_u32_s32(vvalue8));
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const s32 *srcBase, ptrdiff_t srcStride,
|
|
s32 *dstBase, ptrdiff_t dstStride,
|
|
s32 threshold, s32 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int32x4_t vthreshold8 = vdupq_n_s32(threshold);
|
|
int32x4_t vvalue8 = vdupq_n_s32(value);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int32x4_t v0 = vld1q_s32(src + j);
|
|
int32x4_t v1 = vld1q_s32(src + j + 4);
|
|
uint32x4_t r0 = vcleq_s32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcleq_s32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(r0, vreinterpretq_u32_s32(vvalue8));
|
|
uint32x4_t r1a = vandq_u32(r1, vreinterpretq_u32_s32(vvalue8));
|
|
vst1q_s32(dst + j, vreinterpretq_s32_u32(r0a));
|
|
vst1q_s32(dst + j + 4, vreinterpretq_s32_u32(r1a));
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const s32 *srcBase, ptrdiff_t srcStride,
|
|
s32 *dstBase, ptrdiff_t dstStride,
|
|
s32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int32x4_t vthreshold8 = vdupq_n_s32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int32x4_t v0 = vld1q_s32(src + j);
|
|
int32x4_t v1 = vld1q_s32(src + j + 4);
|
|
int32x4_t r0 = vminq_s32(v0, vthreshold8);
|
|
int32x4_t r1 = vminq_s32(v1, vthreshold8);
|
|
vst1q_s32(dst + j, r0);
|
|
vst1q_s32(dst + j + 4, r1);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const s32 *srcBase, ptrdiff_t srcStride,
|
|
s32 *dstBase, ptrdiff_t dstStride,
|
|
s32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int32x4_t vthreshold8 = vdupq_n_s32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int32x4_t v0 = vld1q_s32(src + j);
|
|
int32x4_t v1 = vld1q_s32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_s32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_s32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(vreinterpretq_u32_s32(v0), r0);
|
|
uint32x4_t r1a = vandq_u32(vreinterpretq_u32_s32(v1), r1);
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const s32 *srcBase, ptrdiff_t srcStride,
|
|
s32 *dstBase, ptrdiff_t dstStride,
|
|
s32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
int32x4_t vthreshold8 = vdupq_n_s32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const s32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
s32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
int32x4_t v0 = vld1q_s32(src + j);
|
|
int32x4_t v1 = vld1q_s32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_s32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_s32(v1, vthreshold8);
|
|
uint32x4_t r0a = vbicq_u32(vreinterpretq_u32_s32(v0), r0);
|
|
uint32x4_t r1a = vbicq_u32(vreinterpretq_u32_s32(v1), r1);
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinary(const Size2D &size,
|
|
const f32 *srcBase, ptrdiff_t srcStride,
|
|
f32 *dstBase, ptrdiff_t dstStride,
|
|
f32 threshold, f32 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
float32x4_t vthreshold8 = vdupq_n_f32(threshold);
|
|
float32x4_t vvalue8 = vdupq_n_f32(value);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const f32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
f32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
float32x4_t v0 = vld1q_f32(src + j);
|
|
float32x4_t v1 = vld1q_f32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_f32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_f32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(r0, vreinterpretq_u32_f32(vvalue8));
|
|
uint32x4_t r1a = vandq_u32(r1, vreinterpretq_u32_f32(vvalue8));
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? value : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdBinaryInv(const Size2D &size,
|
|
const f32 *srcBase, ptrdiff_t srcStride,
|
|
f32 *dstBase, ptrdiff_t dstStride,
|
|
f32 threshold, f32 value)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
float32x4_t vthreshold8 = vdupq_n_f32(threshold);
|
|
float32x4_t vvalue8 = vdupq_n_f32(value);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const f32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
f32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
float32x4_t v0 = vld1q_f32(src + j);
|
|
float32x4_t v1 = vld1q_f32(src + j + 4);
|
|
uint32x4_t r0 = vcleq_f32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcleq_f32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(r0, vreinterpretq_u32_f32(vvalue8));
|
|
uint32x4_t r1a = vandq_u32(r1, vreinterpretq_u32_f32(vvalue8));
|
|
vst1q_f32(dst + j, vreinterpretq_f32_u32(r0a));
|
|
vst1q_f32(dst + j + 4, vreinterpretq_f32_u32(r1a));
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : value;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
(void)value;
|
|
#endif
|
|
}
|
|
|
|
void thresholdTruncate(const Size2D &size,
|
|
const f32 *srcBase, ptrdiff_t srcStride,
|
|
f32 *dstBase, ptrdiff_t dstStride,
|
|
f32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
float32x4_t vthreshold8 = vdupq_n_f32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const f32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
f32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
float32x4_t v0 = vld1q_f32(src + j);
|
|
float32x4_t v1 = vld1q_f32(src + j + 4);
|
|
float32x4_t r0 = vminq_f32(v0, vthreshold8);
|
|
float32x4_t r1 = vminq_f32(v1, vthreshold8);
|
|
vst1q_f32(dst + j, r0);
|
|
vst1q_f32(dst + j + 4, r1);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? threshold : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZero(const Size2D &size,
|
|
const f32 *srcBase, ptrdiff_t srcStride,
|
|
f32 *dstBase, ptrdiff_t dstStride,
|
|
f32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
float32x4_t vthreshold8 = vdupq_n_f32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const f32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
f32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
float32x4_t v0 = vld1q_f32(src + j);
|
|
float32x4_t v1 = vld1q_f32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_f32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_f32(v1, vthreshold8);
|
|
uint32x4_t r0a = vandq_u32(vreinterpretq_u32_f32(v0), r0);
|
|
uint32x4_t r1a = vandq_u32(vreinterpretq_u32_f32(v1), r1);
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? *(src + j) : 0;
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
void thresholdToZeroInv(const Size2D &size,
|
|
const f32 *srcBase, ptrdiff_t srcStride,
|
|
f32 *dstBase, ptrdiff_t dstStride,
|
|
f32 threshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
float32x4_t vthreshold8 = vdupq_n_f32(threshold);
|
|
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
|
|
|
|
for (size_t i = 0; i < size.height; ++i)
|
|
{
|
|
const f32* src = internal::getRowPtr(srcBase, srcStride, i);
|
|
f32* dst = internal::getRowPtr(dstBase, dstStride, i);
|
|
size_t j = 0;
|
|
|
|
for (; j < roiw8; j += 8)
|
|
{
|
|
internal::prefetch(src + j);
|
|
float32x4_t v0 = vld1q_f32(src + j);
|
|
float32x4_t v1 = vld1q_f32(src + j + 4);
|
|
uint32x4_t r0 = vcgtq_f32(v0, vthreshold8);
|
|
uint32x4_t r1 = vcgtq_f32(v1, vthreshold8);
|
|
uint32x4_t r0a = vbicq_u32(vreinterpretq_u32_f32(v0), r0);
|
|
uint32x4_t r1a = vbicq_u32(vreinterpretq_u32_f32(v1), r1);
|
|
vst1q_u32((u32*)dst + j, r0a);
|
|
vst1q_u32((u32*)dst + j + 4, r1a);
|
|
}
|
|
for (; j < size.width; j++)
|
|
{
|
|
*(dst + j) = *(src + j) > threshold ? 0 : *(src + j);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)threshold;
|
|
#endif
|
|
}
|
|
|
|
} // namespace CAROTENE_NS
|