mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
core: add broadcast (#23965)
* add broadcast_to with tests * change name * fix test * fix implicit type conversion * replace type of shape with InputArray * add perf test * add perf tests which takes care of axis * v2 from ficus expand * rename to broadcast * use randu in place of declare * doc improvement; smaller scale in perf * capture get_index by reference
This commit is contained in:
parent
8a1b998b2b
commit
a308dfca98
@ -1118,6 +1118,13 @@ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode);
|
||||
*/
|
||||
CV_EXPORTS_W void flipND(InputArray src, OutputArray dst, int axis);
|
||||
|
||||
/** @brief Broadcast the given Mat to the given shape.
|
||||
* @param src input array
|
||||
* @param shape target shape. Should be a list of CV_32S numbers. Note that negative values are not supported.
|
||||
* @param dst output array that has the given shape
|
||||
*/
|
||||
CV_EXPORTS_W void broadcast(InputArray src, InputArray shape, OutputArray dst);
|
||||
|
||||
enum RotateFlags {
|
||||
ROTATE_90_CLOCKWISE = 0, //!<Rotate 90 degrees clockwise
|
||||
ROTATE_180 = 1, //!<Rotate 180 degrees clockwise
|
||||
|
@ -5,8 +5,35 @@ namespace opencv_test
|
||||
{
|
||||
using namespace perf;
|
||||
|
||||
using BroadcastTest = perf::TestBaseWithParam<std::tuple<std::vector<int>, perf::MatType, std::vector<int>>>;
|
||||
typedef Size_MatType BinaryOpTest;
|
||||
|
||||
PERF_TEST_P_(BroadcastTest, basic)
|
||||
{
|
||||
std::vector<int> shape_src = get<0>(GetParam());
|
||||
int dt_type = get<1>(GetParam());
|
||||
std::vector<int> shape_dst = get<2>(GetParam());
|
||||
|
||||
cv::Mat src(static_cast<int>(shape_src.size()), shape_src.data(), dt_type);
|
||||
cv::Mat dst(static_cast<int>(shape_dst.size()), shape_dst.data(), dt_type);
|
||||
|
||||
cv::randu(src, -1.f, 1.f);
|
||||
|
||||
TEST_CYCLE() cv::broadcast(src, shape_dst, dst);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest,
|
||||
testing::Combine(
|
||||
testing::Values(std::vector<int>{1, 100, 800},
|
||||
std::vector<int>{10, 1, 800},
|
||||
std::vector<int>{10, 100, 1}),
|
||||
testing::Values(CV_32FC1),
|
||||
testing::Values(std::vector<int>{10, 100, 800})
|
||||
)
|
||||
);
|
||||
|
||||
PERF_TEST_P_(BinaryOpTest, min)
|
||||
{
|
||||
Size sz = get<0>(GetParam());
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "opencv2/core/detail/dispatch_helper.impl.hpp"
|
||||
|
||||
#include <algorithm> // std::swap_ranges
|
||||
#include <numeric> // std::accumulate
|
||||
|
||||
namespace cv {
|
||||
|
||||
@ -857,6 +858,223 @@ void flipND(InputArray _src, OutputArray _dst, int _axis)
|
||||
flipNDImpl(dst.ptr(), dst.size.p, dst.step.p, axis);
|
||||
}
|
||||
|
||||
/*
|
||||
This function first prepends 1 to each tensor shape to have a common max_ndims dimension, then flatten non-broadcast dimensions.
|
||||
*/
|
||||
static bool _flatten_for_broadcast(int narrays, int max_ndims, const int* ndims, const int** orig_shape,
|
||||
int** flatten_shape, size_t** flatten_step) {
|
||||
int i, j, k;
|
||||
|
||||
// step 1.
|
||||
// * make all inputs and the output max_ndims-dimensional.
|
||||
// * compute proper step's
|
||||
for (i = max_ndims - 1; i >= 0; i-- ) {
|
||||
for (k = 0; k < narrays; k++) {
|
||||
j = ndims[k] - (max_ndims - i);
|
||||
int sz_i = j >= 0 ? orig_shape[k][j] : 1;
|
||||
size_t st_i = i == max_ndims - 1 ? 1 : flatten_step[k][i+1] * flatten_shape[k][i+1];
|
||||
flatten_shape[k][i] = sz_i;
|
||||
flatten_step[k][i] = st_i;
|
||||
if (flatten_shape[k][i] == 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// step 2. Let's do the flattening first,
|
||||
// since we'd need proper values of steps to check continuity.
|
||||
// this loop is probably the most tricky part
|
||||
// in the whole implementation of broadcasting.
|
||||
j = max_ndims-1;
|
||||
for (i = j - 1; i >= 0; i--) {
|
||||
bool all_contiguous = true, all_scalars = true, all_consistent = true;
|
||||
for(k = 0; k < narrays; k++) {
|
||||
size_t st = flatten_step[k][j] * flatten_shape[k][j];
|
||||
bool prev_scalar = flatten_shape[k][j] == 1;
|
||||
bool scalar = flatten_shape[k][i] == 1;
|
||||
all_contiguous = all_contiguous && (st == flatten_step[k][i]);
|
||||
all_scalars = all_scalars && scalar;
|
||||
all_consistent = all_consistent && (scalar == prev_scalar);
|
||||
}
|
||||
if (all_contiguous && (all_consistent || all_scalars)) {
|
||||
for(k = 0; k < narrays; k++)
|
||||
flatten_shape[k][j] *= flatten_shape[k][i];
|
||||
} else {
|
||||
j--;
|
||||
if (i < j) {
|
||||
for(k = 0; k < narrays; k++) {
|
||||
flatten_shape[k][j] = flatten_shape[k][i];
|
||||
flatten_step[k][j] = flatten_step[k][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// step 3. Set some step's to 0's.
|
||||
for (i = max_ndims-1; i >= j; i--) {
|
||||
for (k = 0; k < narrays; k++)
|
||||
flatten_step[k][i] = flatten_shape[k][i] == 1 ? 0 : flatten_step[k][i];
|
||||
}
|
||||
for (; i >= 0; i--) {
|
||||
for (k = 0; k < narrays; k++) {
|
||||
flatten_step[k][i] = 0;
|
||||
flatten_shape[k][i] = 1;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) {
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
Mat src = _src.getMat();
|
||||
CV_CheckTrue(src.isContinuous(), "broadcast: input array must be contiguous");
|
||||
CV_CheckChannelsEQ(src.channels(), 1, "broadcast: input array must be single channel");
|
||||
|
||||
Mat shape = _shape.getMat();
|
||||
CV_CheckTypeEQ(shape.type(), CV_32S, "broadcast: target shape must be of type int32");
|
||||
const auto dims_shape = static_cast<int>(shape.total());
|
||||
const auto *ptr_shape = shape.ptr<int>();
|
||||
|
||||
// check valid shape, 1D/0D Mat would fail in the following checks
|
||||
const auto dims_src = src.dims;
|
||||
CV_CheckLE(dims_src, dims_shape,
|
||||
"broadcast: dimension of input array must be less than or equal to dimension of target shape");
|
||||
std::vector<int> shape_src{src.size.p, src.size.p + dims_src};
|
||||
if (shape_src.size() < static_cast<size_t>(dims_shape)) {
|
||||
shape_src.insert(shape_src.begin(), dims_shape - shape_src.size(), 1);
|
||||
}
|
||||
for (int i = 0; i < static_cast<int>(shape_src.size()); ++i) {
|
||||
const auto *shape_target = ptr_shape;
|
||||
if (shape_src[i] != 1) {
|
||||
CV_CheckEQ(shape_src[i], shape_target[i], "target shape must be equal to input shape or 1");
|
||||
}
|
||||
}
|
||||
|
||||
// impl
|
||||
_dst.create(dims_shape, shape.ptr<int>(), src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
std::vector<int> is_same_shape(dims_shape, 0);
|
||||
for (int i = 0; i < static_cast<int>(shape_src.size()); ++i) {
|
||||
if (shape_src[i] == ptr_shape[i]) {
|
||||
is_same_shape[i] = 1;
|
||||
}
|
||||
}
|
||||
// copy if same shape
|
||||
if (std::accumulate(is_same_shape.begin(), is_same_shape.end(), 1, std::multiplies<int>()) != 0) {
|
||||
const auto *p_src = src.ptr<const char>();
|
||||
auto *p_dst = dst.ptr<char>();
|
||||
std::memcpy(p_dst, p_src, dst.total() * dst.elemSize());
|
||||
return;
|
||||
}
|
||||
// other cases
|
||||
int max_ndims = std::max(dims_src, dims_shape);
|
||||
const int all_ndims[2] = {src.dims, dst.dims};
|
||||
const int* orig_shapes[2] = {src.size.p, dst.size.p};
|
||||
cv::AutoBuffer<size_t> buff(max_ndims * 4);
|
||||
int* flatten_shapes[2] = {(int*)buff.data(), (int*)(buff.data() + max_ndims)};
|
||||
size_t* flatten_steps[2] = {(size_t*)(buff.data() + 2 * max_ndims), (size_t*)(buff.data() + 3 * max_ndims)};
|
||||
if (_flatten_for_broadcast(2, max_ndims, all_ndims, orig_shapes, flatten_shapes, flatten_steps)) {
|
||||
size_t src_dp = flatten_steps[0][max_ndims - 1];
|
||||
size_t dst_dp = flatten_steps[1][max_ndims - 1];
|
||||
CV_Assert(dst_dp == 1);
|
||||
CV_Assert(max_ndims >= 2); // >= 3?
|
||||
size_t rowstep_src = flatten_steps[0][max_ndims - 2];
|
||||
size_t rowstep_dst = flatten_steps[1][max_ndims - 2];
|
||||
const char* ptr_src = src.ptr<const char>();
|
||||
char* ptr_dst = dst.ptr<char>();
|
||||
size_t esz = src.elemSize();
|
||||
int nrows = flatten_shapes[1][max_ndims - 2];
|
||||
int ncols = flatten_shapes[1][max_ndims - 1];
|
||||
int nplanes = 1;
|
||||
CV_Check(esz, esz == 1 || esz == 2 || esz == 4 || esz == 8, "broadcast: not supported data type");
|
||||
|
||||
for (int k = 0; k < max_ndims - 2; k++) {
|
||||
nplanes *= flatten_shapes[1][k];
|
||||
}
|
||||
for (int plane_idx = 0; plane_idx < nplanes; plane_idx++) {
|
||||
size_t offset_src = 0, offset_dst = 0;
|
||||
size_t idx = (size_t)plane_idx;
|
||||
for (int k = max_ndims - 3; k >= 0; k--) {
|
||||
size_t prev_idx = idx / flatten_shapes[1][k];
|
||||
size_t i_k = (int)(idx - prev_idx * flatten_shapes[1][k]);
|
||||
offset_src += i_k * flatten_steps[0][k];
|
||||
offset_dst += i_k * flatten_steps[1][k];
|
||||
idx = prev_idx;
|
||||
}
|
||||
|
||||
#define OPENCV_CORE_BROADCAST_LOOP(_Tp) \
|
||||
for (int i = 0; i < nrows; i++) { \
|
||||
const _Tp *ptr_src_ = (const _Tp*)ptr_src + offset_src + rowstep_src * i; \
|
||||
_Tp *ptr_dst_ = (_Tp*)ptr_dst + offset_dst + rowstep_dst * i; \
|
||||
if (src_dp == 1) { \
|
||||
for (int j = 0; j < ncols; j++) { \
|
||||
ptr_dst_[j] = ptr_src_[j]; \
|
||||
} \
|
||||
} else { \
|
||||
_Tp x = *ptr_src_; \
|
||||
for (int j = 0; j < ncols; j++) { \
|
||||
ptr_dst_[j] = x; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
if (esz == 1) {
|
||||
OPENCV_CORE_BROADCAST_LOOP(int8_t);
|
||||
} else if (esz == 2) {
|
||||
OPENCV_CORE_BROADCAST_LOOP(int16_t);
|
||||
} else if (esz == 4) {
|
||||
OPENCV_CORE_BROADCAST_LOOP(int32_t);
|
||||
} else if (esz == 8) {
|
||||
OPENCV_CORE_BROADCAST_LOOP(int64_t);
|
||||
} else {
|
||||
CV_Error(cv::Error::StsNotImplemented, "");
|
||||
}
|
||||
#undef OPENCV_CORE_BROADCAST_LOOP
|
||||
}
|
||||
} else {
|
||||
// initial copy (src to dst)
|
||||
std::vector<size_t> step_src{src.step.p, src.step.p + dims_src};
|
||||
if (step_src.size() < static_cast<size_t>(dims_shape)) {
|
||||
step_src.insert(step_src.begin(), dims_shape - step_src.size(), step_src[0]);
|
||||
}
|
||||
for (size_t i = 0; i < src.total(); ++i) {
|
||||
size_t t = i;
|
||||
size_t src_offset = 0, dst_offset = 0;
|
||||
for (int j = static_cast<int>(shape_src.size() - 1); j >= 0; --j) {
|
||||
size_t idx = t / shape_src[j];
|
||||
size_t offset = static_cast<size_t>(t - idx * shape_src[j]);
|
||||
src_offset += offset * step_src[j];
|
||||
dst_offset += offset * dst.step[j];
|
||||
t = idx;
|
||||
}
|
||||
const auto *p_src = src.ptr<const char>();
|
||||
auto *p_dst = dst.ptr<char>();
|
||||
std::memcpy(p_dst + dst_offset, p_src + src_offset, dst.elemSize());
|
||||
}
|
||||
// broadcast copy (dst inplace)
|
||||
std::vector<int> cumulative_shape(dims_shape, 1);
|
||||
int total = static_cast<int>(dst.total());
|
||||
for (int i = dims_shape - 1; i >= 0; --i) {
|
||||
cumulative_shape[i] = static_cast<int>(total / ptr_shape[i]);
|
||||
total = cumulative_shape[i];
|
||||
}
|
||||
for (int i = dims_shape - 1; i >= 0; --i) {
|
||||
if (is_same_shape[i] == 1) {
|
||||
continue;
|
||||
}
|
||||
auto step = dst.step[i];
|
||||
auto *p_dst = dst.ptr<char>();
|
||||
for (int j = 0; j < cumulative_shape[i]; j++) {
|
||||
for (int k = 0; k < ptr_shape[i] - 1; k++) {
|
||||
std::memcpy(p_dst + step, p_dst, step);
|
||||
p_dst += step;
|
||||
}
|
||||
p_dst += step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
||||
{
|
||||
CV_Assert(_src.dims() <= 2);
|
||||
|
@ -2268,6 +2268,139 @@ INSTANTIATE_TEST_CASE_P(Arithm, FlipND, testing::Combine(
|
||||
testing::Values(perf::MatType(CV_8UC1), CV_32FC1)
|
||||
));
|
||||
|
||||
TEST(BroadcastTo, basic) {
|
||||
std::vector<int> shape_src{2, 1};
|
||||
std::vector<int> data_src{1, 2};
|
||||
Mat src(static_cast<int>(shape_src.size()), shape_src.data(), CV_32SC1, data_src.data());
|
||||
|
||||
auto get_index = [](const std::vector<int>& shape, size_t cnt) {
|
||||
std::vector<int> index(shape.size());
|
||||
size_t t = cnt;
|
||||
for (int i = static_cast<int>(shape.size() - 1); i >= 0; --i) {
|
||||
size_t idx = t / shape[i];
|
||||
index[i] = static_cast<int>(t - idx * shape[i]);
|
||||
t = idx;
|
||||
}
|
||||
return index;
|
||||
};
|
||||
|
||||
auto fn_verify = [&get_index](const Mat& ref, const Mat& res) {
|
||||
// check type
|
||||
EXPECT_EQ(ref.type(), res.type());
|
||||
// check shape
|
||||
EXPECT_EQ(ref.dims, res.dims);
|
||||
for (int i = 0; i < ref.dims; ++i) {
|
||||
EXPECT_EQ(ref.size[i], res.size[i]);
|
||||
}
|
||||
// check value
|
||||
std::vector<int> shape{ref.size.p, ref.size.p + ref.dims};
|
||||
for (size_t i = 0; i < ref.total(); ++i) {
|
||||
auto index = get_index(shape, i);
|
||||
switch (ref.type()) {
|
||||
case CV_32SC1: {
|
||||
ASSERT_EQ(ref.at<int>(index.data()), res.at<int>(index.data()));
|
||||
} break;
|
||||
case CV_8UC1: {
|
||||
ASSERT_EQ(ref.at<uint8_t>(index.data()), res.at<uint8_t>(index.data()));
|
||||
} break;
|
||||
case CV_32FC1: {
|
||||
ASSERT_EQ(ref.at<float>(index.data()), res.at<float>(index.data()));
|
||||
} break;
|
||||
default: FAIL() << "Unsupported type: " << ref.type();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
std::vector<int> shape{4, 2, 3};
|
||||
std::vector<int> data_ref{
|
||||
1, 1, 1, // [0, 0, :]
|
||||
2, 2, 2, // [0, 1, :]
|
||||
1, 1, 1, // [1, 0, :]
|
||||
2, 2, 2, // [1, 1, :]
|
||||
1, 1, 1, // [2, 0, :]
|
||||
2, 2, 2, // [2, 1, :]
|
||||
1, 1, 1, // [3, 0, :]
|
||||
2, 2, 2 // [3, 1, :]
|
||||
};
|
||||
Mat ref(static_cast<int>(shape.size()), shape.data(), src.type(), data_ref.data());
|
||||
Mat dst;
|
||||
broadcast(src, shape, dst);
|
||||
fn_verify(ref, dst);
|
||||
}
|
||||
|
||||
{
|
||||
Mat _src;
|
||||
src.convertTo(_src, CV_8U);
|
||||
std::vector<int> shape{4, 2, 3};
|
||||
std::vector<uint8_t> data_ref{
|
||||
1, 1, 1, // [0, 0, :]
|
||||
2, 2, 2, // [0, 1, :]
|
||||
1, 1, 1, // [1, 0, :]
|
||||
2, 2, 2, // [1, 1, :]
|
||||
1, 1, 1, // [2, 0, :]
|
||||
2, 2, 2, // [2, 1, :]
|
||||
1, 1, 1, // [3, 0, :]
|
||||
2, 2, 2 // [3, 1, :]
|
||||
};
|
||||
Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
|
||||
Mat dst;
|
||||
broadcast(_src, shape, dst);
|
||||
fn_verify(ref, dst);
|
||||
}
|
||||
|
||||
{
|
||||
Mat _src;
|
||||
src.convertTo(_src, CV_32F);
|
||||
std::vector<int> shape{1, 1, 2, 1}; // {2, 1}
|
||||
std::vector<float> data_ref{
|
||||
1.f, // [0, 0, 0, 0]
|
||||
2.f, // [0, 0, 1, 0]
|
||||
};
|
||||
Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
|
||||
Mat dst;
|
||||
broadcast(_src, shape, dst);
|
||||
fn_verify(ref, dst);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<int> _shape_src{2, 3, 4};
|
||||
std::vector<float> _data_src{
|
||||
1.f, 2.f, 3.f, 4.f, // [0, 0, :]
|
||||
2.f, 3.f, 4.f, 5.f, // [0, 1, :]
|
||||
3.f, 4.f, 5.f, 6.f, // [0, 2, :]
|
||||
|
||||
4.f, 5.f, 6.f, 7.f, // [1, 0, :]
|
||||
5.f, 6.f, 7.f, 8.f, // [1, 1, :]
|
||||
6.f, 7.f, 8.f, 9.f, // [1, 2, :]
|
||||
};
|
||||
Mat _src(static_cast<int>(_shape_src.size()), _shape_src.data(), CV_32FC1, _data_src.data());
|
||||
|
||||
std::vector<int> shape{2, 1, 2, 3, 4};
|
||||
std::vector<float> data_ref{
|
||||
1.f, 2.f, 3.f, 4.f, // [0, 0, 0, 0, :]
|
||||
2.f, 3.f, 4.f, 5.f, // [0, 0, 0, 1, :]
|
||||
3.f, 4.f, 5.f, 6.f, // [0, 0, 0, 2, :]
|
||||
|
||||
4.f, 5.f, 6.f, 7.f, // [0, 0, 1, 0, :]
|
||||
5.f, 6.f, 7.f, 8.f, // [0, 0, 1, 1, :]
|
||||
6.f, 7.f, 8.f, 9.f, // [0, 0, 1, 2, :]
|
||||
|
||||
1.f, 2.f, 3.f, 4.f, // [1, 0, 0, 0, :]
|
||||
2.f, 3.f, 4.f, 5.f, // [1, 0, 0, 1, :]
|
||||
3.f, 4.f, 5.f, 6.f, // [1, 0, 0, 2, :]
|
||||
|
||||
4.f, 5.f, 6.f, 7.f, // [1, 0, 1, 0, :]
|
||||
5.f, 6.f, 7.f, 8.f, // [1, 0, 1, 1, :]
|
||||
6.f, 7.f, 8.f, 9.f, // [1, 0, 1, 2, :]
|
||||
};
|
||||
Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
|
||||
Mat dst;
|
||||
broadcast(_src, shape, dst);
|
||||
fn_verify(ref, dst);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Core_minMaxIdx, regression_9207_2)
|
||||
{
|
||||
const int rows = 13;
|
||||
|
Loading…
Reference in New Issue
Block a user