mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
multi-threaded scatterND and refactor perf
This commit is contained in:
parent
2997b4c5fe
commit
2ed97b9ef3
@ -324,103 +324,95 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, Combine(
|
|||||||
/* withCann= */ false) // only test on CPU
|
/* withCann= */ false) // only test on CPU
|
||||||
));
|
));
|
||||||
|
|
||||||
struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
using Layer_ScatterND = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
|
||||||
{
|
PERF_TEST_P_(Layer_ScatterND, scatterND) {
|
||||||
void test_layer(const std::vector<int>& shape, const String reduction = "none")
|
std::vector<int> shape = get<0>(GetParam());
|
||||||
|
std::string reduction = get<1>(GetParam());
|
||||||
|
int backend_id = get<0>(get<2>(GetParam()));
|
||||||
|
int target_id = get<1>(get<2>(GetParam()));
|
||||||
|
|
||||||
|
std::vector<int> indices_shape(shape);
|
||||||
|
indices_shape.push_back(int(shape.size()));
|
||||||
|
Mat data(shape, CV_32FC1);
|
||||||
|
Mat indices(indices_shape, CV_32FC1);
|
||||||
|
Mat updates(shape, CV_32FC1);
|
||||||
|
|
||||||
|
randn(data, 0.f, 1.f);
|
||||||
|
randn(updates, 0.f, 1.f);
|
||||||
|
|
||||||
|
// initialize the indices with index tuples like [0...N, 0...C, 0...H, 0...W]
|
||||||
|
std::vector<int> current_index_tuple(shape.size());
|
||||||
|
int total = data.total();
|
||||||
|
std::vector<int> indices_step;
|
||||||
|
for (int i = 0; i < indices.dims; i++)
|
||||||
{
|
{
|
||||||
int backendId = get<0>(GetParam());
|
int step = indices.step.p[i] / sizeof(float);
|
||||||
int targetId = get<1>(GetParam());
|
indices_step.push_back(step);
|
||||||
|
}
|
||||||
std::vector<int> indices_shape(shape);
|
int t, j, idx, offset_at_idx, offset;
|
||||||
indices_shape.push_back(int(shape.size()));
|
for (int i = 0; i < total; i++)
|
||||||
Mat data(shape, CV_32FC1);
|
{
|
||||||
Mat indices(indices_shape, CV_32FC1);
|
t = i;
|
||||||
Mat updates(shape, CV_32FC1);
|
for (j = shape.size() - 1; j >= 0; j--)
|
||||||
|
|
||||||
Scalar mean = 0.f;
|
|
||||||
Scalar std = 1.f;
|
|
||||||
randn(data, mean, std);
|
|
||||||
randn(updates, mean, std);
|
|
||||||
|
|
||||||
// initialize the indices with index tuples like [0...N, 0...C, 0...H, 0...W]
|
|
||||||
std::vector<int> current_index_tuple(shape.size());
|
|
||||||
int total = data.total();
|
|
||||||
std::vector<int> indices_step;
|
|
||||||
for (int i = 0; i < indices.dims; i++)
|
|
||||||
{
|
{
|
||||||
int step = indices.step.p[i] / sizeof(float);
|
idx = t / shape[j];
|
||||||
indices_step.push_back(step);
|
offset_at_idx = (int)(t - idx * shape[j]);
|
||||||
}
|
current_index_tuple[j] = offset_at_idx;
|
||||||
int t, j, idx, offset_at_idx, offset;
|
t = idx;
|
||||||
for (int i = 0; i < total; i++)
|
|
||||||
{
|
|
||||||
t = i;
|
|
||||||
for (j = shape.size() - 1; j >= 0; j--)
|
|
||||||
{
|
|
||||||
idx = t / shape[j];
|
|
||||||
offset_at_idx = (int)(t - idx * shape[j]);
|
|
||||||
current_index_tuple[j] = offset_at_idx;
|
|
||||||
t = idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = 0;
|
|
||||||
for (j = 0; j < shape.size(); j++)
|
|
||||||
offset += current_index_tuple[j] * indices_step[j];
|
|
||||||
|
|
||||||
for (j = 0; j < shape.size(); j++)
|
|
||||||
indices.at<float>(offset + j) = current_index_tuple[j];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Net net;
|
offset = 0;
|
||||||
LayerParams lp;
|
for (j = 0; j < shape.size(); j++)
|
||||||
lp.type = "ScatterND";
|
offset += current_index_tuple[j] * indices_step[j];
|
||||||
lp.name = "testLayer";
|
|
||||||
lp.set("reduction", reduction);
|
|
||||||
|
|
||||||
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
for (j = 0; j < shape.size(); j++)
|
||||||
net.connect(0, 0, id, 0);
|
indices.at<float>(offset + j) = current_index_tuple[j];
|
||||||
net.connect(0, 1, id, 1);
|
|
||||||
net.connect(0, 2, id, 2);
|
|
||||||
|
|
||||||
// warmup
|
|
||||||
{
|
|
||||||
std::vector<String> inpNames(3);
|
|
||||||
inpNames[0] = "data";
|
|
||||||
inpNames[1] = "indices";
|
|
||||||
inpNames[2] = "updates";
|
|
||||||
net.setInputsNames(inpNames);
|
|
||||||
net.setInput(data, inpNames[0]);
|
|
||||||
net.setInput(indices, inpNames[1]);
|
|
||||||
net.setInput(updates, inpNames[2]);
|
|
||||||
|
|
||||||
net.setPreferableBackend(backendId);
|
|
||||||
net.setPreferableTarget(targetId);
|
|
||||||
Mat out = net.forward();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_CYCLE()
|
|
||||||
{
|
|
||||||
Mat res = net.forward();
|
|
||||||
}
|
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int N = 8;
|
Net net;
|
||||||
int C = 256;
|
LayerParams lp;
|
||||||
int H = 128;
|
lp.type = "ScatterND";
|
||||||
int W = 100;
|
lp.name = "testLayer";
|
||||||
};
|
lp.set("reduction", reduction);
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND)
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
||||||
{
|
net.connect(0, 0, id, 0);
|
||||||
test_layer({N, C, H ,W});
|
net.connect(0, 1, id, 1);
|
||||||
|
net.connect(0, 2, id, 2);
|
||||||
|
|
||||||
|
// warmup
|
||||||
|
{
|
||||||
|
std::vector<String> input_names{"data", "indices", "updates"};
|
||||||
|
net.setInputsNames(input_names);
|
||||||
|
net.setInput(data, input_names[0]);
|
||||||
|
net.setInput(indices, input_names[1]);
|
||||||
|
net.setInput(updates, input_names[2]);
|
||||||
|
|
||||||
|
net.setPreferableBackend(backend_id);
|
||||||
|
net.setPreferableTarget(target_id);
|
||||||
|
Mat out = net.forward();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CYCLE()
|
||||||
|
{
|
||||||
|
Mat res = net.forward();
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add)
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, Combine(
|
||||||
{
|
Values(std::vector<int>{2, 128, 64, 50}),
|
||||||
test_layer({N, C, H , W}, "add");
|
Values(std::string("none"), std::string("add")),
|
||||||
}
|
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
|
||||||
|
/* withHalide= */ false,
|
||||||
|
/* withCpuOCV= */ true,
|
||||||
|
/* withVkCom= */ false,
|
||||||
|
/* withCUDA= */ false,
|
||||||
|
/* withNgraph= */ false,
|
||||||
|
/* withWebnn= */ false,
|
||||||
|
/* withCann= */ false) // only test on CPU
|
||||||
|
));
|
||||||
|
|
||||||
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
||||||
{
|
{
|
||||||
@ -795,8 +787,6 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple
|
|||||||
#ifdef HAVE_CUDA
|
#ifdef HAVE_CUDA
|
||||||
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
|
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
|
||||||
#endif
|
#endif
|
||||||
// INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
|
@ -89,49 +89,59 @@ public:
|
|||||||
// NOTE: This impl does not check whether indices have duplicate entries.
|
// NOTE: This impl does not check whether indices have duplicate entries.
|
||||||
// The last duplicate entry will overwrite the previous.
|
// The last duplicate entry will overwrite the previous.
|
||||||
template<typename T, typename Functor>
|
template<typename T, typename Functor>
|
||||||
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
|
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat& output_mat) {
|
||||||
{
|
input_mat.copyTo(output_mat);
|
||||||
data.copyTo(out);
|
|
||||||
|
|
||||||
const int* shape = data.size.p;
|
const auto &input_mat_shape = shape(input_mat);
|
||||||
const size_t* step = data.step.p;
|
std::vector<size_t> input_mat_step(input_mat_shape.size());
|
||||||
|
for (int i = 0; i < input_mat.dims; i++) {
|
||||||
|
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
const int ind_ndims = indices.dims;
|
const int indices_mat_ndims = indices_mat.dims;
|
||||||
const int* ind_shape = indices.size.p;
|
const auto &indices_mat_shape = shape(indices_mat);
|
||||||
const T* p_indices = indices.ptr<const T>();
|
|
||||||
|
|
||||||
const int upd_ndims = updates.dims;
|
const int updates_mat_ndims = updates_mat.dims;
|
||||||
const int* upd_shape = updates.size.p;
|
const auto &updates_mat_shape = shape(updates_mat);
|
||||||
const T* p_updates = updates.ptr<const T>();
|
|
||||||
|
|
||||||
T* p_out = out.ptr<T>();
|
int indices_last_dim = indices_mat_shape[indices_mat_ndims - 1]; // last dim of indices
|
||||||
|
|
||||||
int k = ind_shape[ind_ndims - 1]; // last dim of indices
|
|
||||||
size_t total = (size_t)(indices.total() / k);
|
|
||||||
|
|
||||||
size_t updates_size = 1;
|
size_t updates_size = 1;
|
||||||
for (int i = ind_ndims - 1; i < upd_ndims; i++)
|
for (int i = indices_mat_ndims - 1; i < updates_mat_ndims; i++)
|
||||||
updates_size *= upd_shape[i];
|
updates_size *= updates_mat_shape[i];
|
||||||
|
|
||||||
size_t inp_start_offset = 0;
|
auto fn = [&](const Range &r) {
|
||||||
size_t ind_start_offset = 0;
|
size_t input_offset = 0,
|
||||||
size_t upd_start_offset = 0;
|
indices_offset = r.start * indices_last_dim,
|
||||||
for (size_t i = 0; i < total; i++, ind_start_offset += k, upd_start_offset += updates_size)
|
updates_offset = r.start * updates_size;
|
||||||
{
|
for (int i = r.start; i < r.end; i++) {
|
||||||
const T* tmp_p_indices = p_indices + ind_start_offset;
|
const T* indices = indices_mat.ptr<const T>();
|
||||||
inp_start_offset = 0;
|
const T* updates = updates_mat.ptr<const T>();
|
||||||
for (int j = 0; j < k; j++)
|
T* output = output_mat.ptr<T>();
|
||||||
{
|
|
||||||
CV_Assert(tmp_p_indices[j] < shape[j] && tmp_p_indices[j] > -shape[j]);
|
input_offset = 0;
|
||||||
inp_start_offset += (((int)tmp_p_indices[j] + shape[j]) % shape[j]) * step[j];
|
indices += indices_offset;
|
||||||
|
for (int j = 0; j < indices_last_dim; j++) {
|
||||||
|
int index = static_cast<int>(*(indices + j));
|
||||||
|
index = (index + input_mat_shape[j]) % input_mat_shape[j];
|
||||||
|
CV_Assert(index < input_mat_shape[j] && index >= 0);
|
||||||
|
input_offset += index * input_mat_step[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
updates += updates_offset;
|
||||||
|
output += input_offset;
|
||||||
|
for (int j = 0; j < updates_size; j++) {
|
||||||
|
output[j] = reduce_operation(output[j], updates[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
indices_offset += indices_last_dim;
|
||||||
|
updates_offset += updates_size;
|
||||||
}
|
}
|
||||||
inp_start_offset /= sizeof(T);
|
};
|
||||||
|
|
||||||
const T* tmp_p_updates = p_updates + upd_start_offset;
|
size_t total = (size_t)(indices_mat.total() / indices_last_dim);
|
||||||
T* tmp_p_out = p_out + inp_start_offset;
|
double nstripes = (size_t)total * (indices_last_dim + updates_size) * (1 / 1024.0);
|
||||||
for (int j = 0; j < updates_size; j++)
|
parallel_for_(Range(0, total), fn, nstripes);
|
||||||
tmp_p_out[j] = rd(tmp_p_out[j], tmp_p_updates[j]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename... Args>
|
template<typename... Args>
|
||||||
|
Loading…
Reference in New Issue
Block a user