From 8936d55675d862440f1e9a1d0576778060df1912 Mon Sep 17 00:00:00 2001 From: Anton Potapov Date: Thu, 18 Jul 2019 23:24:56 +0300 Subject: [PATCH] Fluid Internal Parallelism - added ability to use custom implementation of "parallel for" function --- .../opencv2/gapi/fluid/gfluidkernel.hpp | 11 ++ .../gapi/src/backends/fluid/gfluidbackend.cpp | 22 ++- .../gapi/src/backends/fluid/gfluidbackend.hpp | 4 +- .../test/gapi_fluid_parallel_rois_test.cpp | 171 +++++++++++++++--- 4 files changed, 171 insertions(+), 37 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp b/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp index 18c00d9913..79c5c5f347 100644 --- a/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp +++ b/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp @@ -104,12 +104,23 @@ struct GFluidParallelOutputRois std::vector parallel_rois; }; +struct GFluidParallelFor +{ + std::function)> parallel_for; +}; + namespace detail { template<> struct CompileArgTag { static const char* tag() { return "gapi.fluid.outputRois"; } }; + +template<> struct CompileArgTag +{ + static const char* tag() { return "gapi.fluid.parallelFor"; } +}; + template<> struct CompileArgTag { static const char* tag() { return "gapi.fluid.parallelOutputRois"; } diff --git a/modules/gapi/src/backends/fluid/gfluidbackend.cpp b/modules/gapi/src/backends/fluid/gfluidbackend.cpp index 9115824587..49bc5845c3 100644 --- a/modules/gapi/src/backends/fluid/gfluidbackend.cpp +++ b/modules/gapi/src/backends/fluid/gfluidbackend.cpp @@ -94,8 +94,16 @@ namespace auto graph_data = fluidExtractInputDataFromGraph(graph, nodes); const auto parallel_out_rois = cv::gimpl::getCompileArg(args); + const auto gpfor = cv::gimpl::getCompileArg(args); + + auto serial_for = [](std::size_t count, std::function f){ + for (std::size_t i = 0; i < count; ++i){ + f(i); + } + }; + auto pfor = gpfor.has_value() ? gpfor.value().parallel_for : serial_for; return parallel_out_rois.has_value() ? - EPtr{new cv::gimpl::GParallelFluidExecutable (graph, graph_data, std::move(parallel_out_rois.value().parallel_rois))} + EPtr{new cv::gimpl::GParallelFluidExecutable (graph, graph_data, std::move(parallel_out_rois.value().parallel_rois), pfor)} : EPtr{new cv::gimpl::GFluidExecutable (graph, graph_data, std::move(rois.rois))} ; } @@ -1325,7 +1333,9 @@ void cv::gimpl::GFluidExecutable::run(std::vector &input_objs, cv::gimpl::GParallelFluidExecutable::GParallelFluidExecutable(const ade::Graph &g, const FluidGraphInputData &graph_data, - const std::vector ¶llelOutputRois) + const std::vector ¶llelOutputRois, + const decltype(parallel_for) &pfor) +: parallel_for(pfor) { for (auto&& rois : parallelOutputRois){ tiles.emplace_back(new GFluidExecutable(g, graph_data, rois.rois)); @@ -1342,10 +1352,10 @@ void cv::gimpl::GParallelFluidExecutable::reshape(ade::Graph&, const GCompileArg void cv::gimpl::GParallelFluidExecutable::run(std::vector &&input_objs, std::vector &&output_objs) { - for (auto& tile : tiles ){ - GAPI_Assert((bool)tile); - tile->run(input_objs, output_objs); - } + parallel_for(tiles.size(), [&, this](std::size_t index){ + GAPI_Assert((bool)tiles[index]); + tiles[index]->run(input_objs, output_objs); + }); } diff --git a/modules/gapi/src/backends/fluid/gfluidbackend.hpp b/modules/gapi/src/backends/fluid/gfluidbackend.hpp index 4862b14f56..7923f0c003 100644 --- a/modules/gapi/src/backends/fluid/gfluidbackend.hpp +++ b/modules/gapi/src/backends/fluid/gfluidbackend.hpp @@ -166,10 +166,12 @@ class GParallelFluidExecutable final: public GIslandExecutable { GParallelFluidExecutable(const GParallelFluidExecutable&) = delete; // due std::unique_ptr in members list std::vector> tiles; + decltype(GFluidParallelFor::parallel_for) parallel_for; public: GParallelFluidExecutable(const ade::Graph &g, const FluidGraphInputData &graph_data, - const std::vector ¶llelOutputRois); + const std::vector ¶llelOutputRois, + const decltype(parallel_for) &pfor); virtual inline bool canReshape() const override { return false; } diff --git a/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp b/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp index 9a8d07149c..2275dba0da 100644 --- a/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp +++ b/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp @@ -30,13 +30,24 @@ namespace { void adjust_empty_roi(cv::Rect& roi, cv::Size size){ if (roi.empty()) roi = cv::Rect{{0,0}, size}; } + + cv::GCompileArgs combine(cv::GCompileArgs&& lhs, cv::GCompileArgs const& rhs){ + lhs.insert(lhs.end(), rhs.begin(), rhs.end()); + return std::move(lhs); + } } using namespace cv::gapi_test_kernels; //As GTest can not simultaneously parameterize test with both types and values - lets use type-erasure and virtual interfaces //to use different computation pipelines struct ComputationPair { - virtual void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat) = 0; + void run_with_gapi(const cv::Mat& in_mat, cv::GCompileArgs const& compile_args, cv::Mat& out_mat){ + run_with_gapi_impl(in_mat, combine(cv::compile_args(fluidTestPackage), compile_args), out_mat); + } + void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat){ + run_with_gapi_impl(in_mat, cv::compile_args(fluidTestPackage, parallel_rois), out_mat); + } + virtual void run_with_ocv (const cv::Mat& in_mat, const std::vector& rois, cv::Mat& out_mat) = 0; virtual std::string name() const { return {}; } @@ -47,6 +58,9 @@ struct ComputationPair { std::string custom_name = cp->name(); return o << (custom_name.empty() ? typeid(cp).name() : custom_name ); } + +private: + virtual void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat) = 0; }; struct Blur3x3CP : ComputationPair{ @@ -54,13 +68,13 @@ struct Blur3x3CP : ComputationPair{ static constexpr int kernelSize = 3; std::string name() const override { return "Blur3x3"; } - void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat_gapi) override { + void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat_gapi) override { cv::GMat in; cv::GMat out = TBlur3x3::on(in, borderType, {}); cv::GComputation c(cv::GIn(in), cv::GOut(out)); // Run G-API - auto cc = c.compile(cv::descr_of(in_mat), cv::compile_args(fluidTestPackage, parallel_rois)); + auto cc = c.compile(cv::descr_of(in_mat), comp_args); cc(cv::gin(in_mat), cv::gout(out_mat_gapi)); } @@ -76,13 +90,13 @@ struct Blur3x3CP : ComputationPair{ struct AddCCP : ComputationPair{ std::string name() const override { return "AddC"; } - void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat_gapi) override { + void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat_gapi) override { cv::GMat in; cv::GMat out = TAddCSimple::on(in, 1); cv::GComputation c(cv::GIn(in), cv::GOut(out)); // Run G-API - auto cc = c.compile(cv::descr_of(in_mat), cv::compile_args(fluidTestPackage, parallel_rois)); + auto cc = c.compile(cv::descr_of(in_mat), comp_args); cc(cv::gin(in_mat), cv::gout(out_mat_gapi)); } @@ -100,7 +114,7 @@ struct SequenceOfBlursCP : ComputationPair{ BorderTypes borderType = _borderType; std::string name() const override { return "SequenceOfBlurs, border type: " + std::to_string(static_cast(borderType)); } - void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat) override { + void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat) override { cv::Scalar borderValue(0); GMat in; @@ -108,10 +122,10 @@ struct SequenceOfBlursCP : ComputationPair{ auto out = TBlur5x5::on(mid, borderType, borderValue); GComputation c(GIn(in), GOut(out)); - auto cc = c.compile(descr_of(in_mat), cv::compile_args(fluidTestPackage, parallel_rois)); + auto cc = c.compile(descr_of(in_mat), comp_args); cc(cv::gin(in_mat), cv::gout(out_mat)); } - void run_with_ocv (const cv::Mat& in_mat, const std::vector& rois, cv::Mat& out_mat) override { + void run_with_ocv(const cv::Mat& in_mat, const std::vector& rois, cv::Mat& out_mat) override { cv::Mat mid_mat_ocv = Mat::zeros(in_mat.size(), in_mat.type()); cv::Point anchor = {-1, -1}; @@ -123,29 +137,33 @@ struct SequenceOfBlursCP : ComputationPair{ } }; -struct TiledComputation : public TestWithParam >> {}; +struct TiledComputation : public TestWithParam , decltype(cv::GFluidParallelFor::parallel_for)>> {}; TEST_P(TiledComputation, Test) { ComputationPair* cp; cv::Size img_sz; std::vector rois ; + decltype(cv::GFluidParallelFor::parallel_for) pfor; auto mat_type = CV_8UC1; - std::tie(cp, img_sz, rois) = GetParam(); + std::tie(cp, img_sz, rois, pfor) = GetParam(); cv::Mat in_mat = randomMat(img_sz, mat_type); cv::Mat out_mat_gapi = cv::Mat::zeros(img_sz, mat_type); cv::Mat out_mat_ocv = cv::Mat::zeros(img_sz, mat_type); - cp->run_with_gapi(in_mat, asGFluidParallelOutputRois(rois), out_mat_gapi); - cp->run_with_ocv (in_mat, rois, out_mat_ocv); + auto comp_args = combine(cv::compile_args(asGFluidParallelOutputRois(rois)), pfor ? cv::compile_args(cv::GFluidParallelFor{pfor}) : cv::GCompileArgs{}); + cp->run_with_gapi(in_mat, comp_args, out_mat_gapi); + cp->run_with_ocv (in_mat, rois, out_mat_ocv); EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv)) << "in_mat : \n" << in_mat << std::endl << "diff matrix :\n " << (out_mat_gapi != out_mat_ocv) << std::endl << "out_mat_gapi: \n" << out_mat_gapi << std::endl - << "out_mat_ocv: \n" << out_mat_ocv << std::endl; + << "out_mat_ocv: \n" << out_mat_ocv << std::endl;; } + + namespace { //this is ugly but other variants (like using shared_ptr) are IMHO even more ugly :) template @@ -165,40 +183,133 @@ auto single_arg_computations = [](){ }; +auto tilesets_8x10 = [](){ + return Values(std::vector{cv::Rect{}}, + std::vector{cv::Rect{0,0,8,5}, cv::Rect{0,5,8,5}}, + std::vector{cv::Rect{0,1,8,3}, cv::Rect{0,4,8,3}}, + std::vector{cv::Rect{0,2,8,3}, cv::Rect{0,5,8,2}}, + std::vector{cv::Rect{0,3,8,4}, cv::Rect{0,9,8,1}}); +}; + +auto tilesets_20x15 = [](){ + return Values(std::vector{cv::Rect{}}, + std::vector{cv::Rect{{0,0},cv::Size{20,7}}, + cv::Rect{{0,7},cv::Size{20,8}}}); +}; + +auto tilesets_320x240 = [](){ + return Values(std::vector{cv::Rect{{0,0}, cv::Size{320,120}}, + cv::Rect{{0,120}, cv::Size{320,120}}}, + + std::vector{cv::Rect{{0,0}, cv::Size{320,120}}, + cv::Rect{{0,120}, cv::Size{320,120}}}, + + std::vector{cv::Rect{{0,0}, cv::Size{320,60}}, + cv::Rect{{0,60}, cv::Size{320,60}}, + cv::Rect{{0,120},cv::Size{320,120}}}); +}; + +namespace{ + auto no_custom_pfor = decltype(cv::GFluidParallelFor::parallel_for){}; +} + INSTANTIATE_TEST_CASE_P(FluidTiledSerial8x10, TiledComputation, Combine( single_arg_computations(), Values(cv::Size(8, 10)), - Values(std::vector{cv::Rect{}}, - std::vector{cv::Rect{0,0,8,5}, cv::Rect{0,5,8,5}}, - std::vector{cv::Rect{0,1,8,3}, cv::Rect{0,4,8,3}}, - std::vector{cv::Rect{0,2,8,3}, cv::Rect{0,5,8,2}}, - std::vector{cv::Rect{0,3,8,4}, cv::Rect{0,9,8,1}})) + tilesets_8x10(), + Values(no_custom_pfor)) ); INSTANTIATE_TEST_CASE_P(FluidTiledSerial20x15, TiledComputation, Combine( single_arg_computations(), Values(cv::Size(20, 15)), - Values(std::vector{cv::Rect{}}, - std::vector{cv::Rect{{0,0},cv::Size{20,7}}, - cv::Rect{{0,7},cv::Size{20,8}}})) + tilesets_20x15(), + Values(no_custom_pfor)) ); INSTANTIATE_TEST_CASE_P(FluidTiledSerial320x240, TiledComputation, Combine( single_arg_computations(), Values(cv::Size(320, 240)), - Values(std::vector{cv::Rect{{0,0}, cv::Size{320,120}}, - cv::Rect{{0,120}, cv::Size{320,120}}}, - - std::vector{cv::Rect{{0,0}, cv::Size{320,120}}, - cv::Rect{{0,120}, cv::Size{320,120}}}, - - std::vector{cv::Rect{{0,0}, cv::Size{320,60}}, - cv::Rect{{0,60}, cv::Size{320,60}}, - cv::Rect{{0,120},cv::Size{320,120}}})) + tilesets_320x240(), + Values(no_custom_pfor)) ); //FIXME: add multiple outputs tests + +TEST(FluidTiledParallelFor, basic) +{ + cv::Size img_sz{8,20}; + auto mat_type = CV_8UC1; + + cv::GMat in; + cv::GMat out = TAddCSimple::on(in, 1); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + cv::Mat in_mat = randomMat(img_sz, mat_type); + cv::Mat out_mat_gapi = cv::Mat::zeros(img_sz, mat_type); + + auto parallel_rois = asGFluidParallelOutputRois( std::vector{cv::Rect{0,0,8,5}, cv::Rect{0,5,8,5}}); + + std::size_t items_count = 0; + auto pfor = [&items_count](std::size_t count, std::function ){ + items_count = count; + }; + + // Run G-API + auto cc = c.compile(cv::descr_of(in_mat), cv::compile_args(fluidTestPackage, parallel_rois, GFluidParallelFor{pfor})); + cc(cv::gin(in_mat), cv::gout(out_mat_gapi)); + ASSERT_EQ(parallel_rois.parallel_rois.size(), items_count); +} + +namespace { + auto serial_for = [](std::size_t count, std::function f){ + for (std::size_t i = 0; i < count; ++i){ + f(i); + } + }; + + auto cv_parallel_for = [](std::size_t count, std::function f){ + cv::parallel_for_(cv::Range(0, static_cast(count)), [f](const cv::Range& r){ + for (auto i = r.start; i < r.end; ++i){ + f(i); + } }); + }; +} + +INSTANTIATE_TEST_CASE_P(FluidTiledParallel8x10, TiledComputation, + Combine( + single_arg_computations(), + Values(cv::Size(8, 10)), + tilesets_8x10(), + Values(serial_for, cv_parallel_for)) +); } // namespace opencv_test + +//define custom printer for "parallel_for" test parameter +namespace std { + void PrintTo(decltype(cv::GFluidParallelFor::parallel_for) const& f, std::ostream* o); +} + +//separate declaration and definition are needed to please the compiler +void std::PrintTo(decltype(cv::GFluidParallelFor::parallel_for) const& f, std::ostream* o){ + if (f) { + using namespace opencv_test; + if (f.target()){ + *o <<"serial_for"; + } + else if (f.target()){ + *o <<"cv_parallel_for"; + } + else { + *o <<"parallel_for of type: " << f.target_type().name(); + } + } + else + { + *o << "default parallel_for"; + } + +}