mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #17851 from anton-potapov:sole_tbb_executor
* TBB executor for GAPI - the sole executor - unit tests for it - no usage in the GAPI at the momnet * TBB executor for GAPI - introduced new overload of execute to explicitly accept tbb::arena argument - added more basic tests - moved arena creation code into tests - * TBB executor for GAPI - fixed compie errors & warnings * TBB executor for GAPI - split all-in-one execute() function into logicaly independant parts * TBB executor for GAPI - used util::variant in in the tile_node * TBB executor for GAPI - moved copy_through_move to separate header - rearranged details staff in proper namespaces - moved all implementation into detail namespace * TBB executor for GAPI - fixed build error with TBB 4.4. - fixed build warnings * TBB executor for GAPI - aligned strings width - fixed spaces in expressions - fixed english grammar - minor improvements * TBB executor for GAPI - added more comments - minor improvements * TBB executor for GAPI - changed ITT_ prefix for macroses to GAPI_ITT * TBB executor for GAPI - no more "unused" warning for GAPI_DbgAssert - changed local assert macro to man onto GAPI_DbgAssert * TBB executor for GAPI - file renamings - changed local assert macro to man onto GAPI_DbgAsse * TBB executor for GAPI - test file renamed - add more comments * TBB executor for GAPI - minor clenups and cosmetic changes * TBB executor for GAPI - minor clenups and cosmetic changes * TBB executor for GAPI - changed spaces and curly braces alignment * TBB executor for GAPI - minor cleanups * TBB executor for GAPI - minor cleanups
This commit is contained in:
parent
7521f207b1
commit
95ce8f45ea
@ -107,6 +107,7 @@ set(gapi_srcs
|
||||
|
||||
# Executor
|
||||
src/executor/gexecutor.cpp
|
||||
src/executor/gtbbexecutor.cpp
|
||||
src/executor/gstreamingexecutor.cpp
|
||||
src/executor/gasync.cpp
|
||||
|
||||
@ -196,6 +197,10 @@ if(TARGET opencv_test_gapi)
|
||||
target_link_libraries(opencv_test_gapi PRIVATE ade)
|
||||
endif()
|
||||
|
||||
if(HAVE_TBB AND TARGET opencv_test_gapi)
|
||||
ocv_target_link_libraries(opencv_test_gapi PRIVATE tbb)
|
||||
endif()
|
||||
|
||||
if(HAVE_FREETYPE)
|
||||
ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_FREETYPE)
|
||||
if(TARGET opencv_test_gapi)
|
||||
|
@ -2,16 +2,28 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018 Intel Corporation
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
|
||||
#ifndef OPENCV_GAPI_OWN_ASSERT_HPP
|
||||
#define OPENCV_GAPI_OWN_ASSERT_HPP
|
||||
|
||||
#include <opencv2/gapi/util/compiler_hints.hpp>
|
||||
|
||||
#define GAPI_DbgAssertNoOp(expr) { \
|
||||
constexpr bool _assert_tmp = false && (expr); \
|
||||
cv::util::suppress_unused_warning(_assert_tmp); \
|
||||
}
|
||||
|
||||
#if !defined(GAPI_STANDALONE)
|
||||
#include <opencv2/core/base.hpp>
|
||||
#define GAPI_Assert CV_Assert
|
||||
#define GAPI_DbgAssert CV_DbgAssert
|
||||
|
||||
#if defined _DEBUG || defined CV_STATIC_ANALYSIS
|
||||
# define GAPI_DbgAssert CV_DbgAssert
|
||||
#else
|
||||
# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include <stdexcept>
|
||||
@ -33,7 +45,7 @@ namespace detail
|
||||
|
||||
|
||||
#ifdef NDEBUG
|
||||
# define GAPI_DbgAssert(expr)
|
||||
# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr)
|
||||
#else
|
||||
# define GAPI_DbgAssert(expr) GAPI_Assert(expr)
|
||||
#endif
|
||||
|
34
modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp
Normal file
34
modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp
Normal file
@ -0,0 +1,34 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#ifndef OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP
|
||||
#define OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP
|
||||
|
||||
#include <opencv2/gapi/util/type_traits.hpp> //decay_t
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
//This is a tool to move initialize captures of a lambda in C++11
|
||||
template<typename T>
|
||||
struct copy_through_move_t{
|
||||
T value;
|
||||
const T& get() const {return value;}
|
||||
T& get() {return value;}
|
||||
copy_through_move_t(T&& g) : value(std::move(g)) {}
|
||||
copy_through_move_t(copy_through_move_t&&) = default;
|
||||
copy_through_move_t(copy_through_move_t const& lhs) : copy_through_move_t(std::move(const_cast<copy_through_move_t&>(lhs))) {}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
copy_through_move_t<util::decay_t<T>> copy_through_move(T&& t){
|
||||
return std::forward<T>(t);
|
||||
}
|
||||
} // namespace util
|
||||
} // namespace cv
|
||||
|
||||
#endif /* OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP */
|
59
modules/gapi/src/executor/gapi_itt.hpp
Normal file
59
modules/gapi/src/executor/gapi_itt.hpp
Normal file
@ -0,0 +1,59 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#ifndef OPENCV_GAPI_GAPI_ITT_HPP
|
||||
#define OPENCV_GAPI_GAPI_ITT_HPP
|
||||
|
||||
//for ITT_NAMED_TRACE_GUARD
|
||||
#include <type_traits>
|
||||
#include <memory>
|
||||
|
||||
// FIXME: It seems that this macro is not propagated here by the OpenCV cmake (as this is not core module).
|
||||
// (Consider using OpenCV's trace.hpp )
|
||||
#ifdef OPENCV_WITH_ITT
|
||||
#include <ittnotify.h>
|
||||
#endif
|
||||
|
||||
#include <opencv2/gapi/util/compiler_hints.hpp>
|
||||
namespace cv {
|
||||
namespace util {
|
||||
template< class T >
|
||||
using remove_reference_t = typename std::remove_reference<T>::type;
|
||||
|
||||
// Home brew ScopeGuard
|
||||
// D will be called automatically with p as argument when ScopeGuard goes out of scope.
|
||||
// call release() on the ScopeGuard object to revoke guard action
|
||||
template<typename T, typename D>
|
||||
auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr<T, util::remove_reference_t<D>> {
|
||||
return {p, std::forward<D>(d)};
|
||||
}
|
||||
} // namespace util
|
||||
|
||||
// FIXME: make it more reusable (and move to other place and other namespace)
|
||||
namespace gimpl { namespace parallel {
|
||||
#ifdef OPENCV_WITH_ITT
|
||||
extern const __itt_domain* gapi_itt_domain;
|
||||
|
||||
namespace {
|
||||
auto make_itt_guard = [](__itt_string_handle* h) {
|
||||
__itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h));
|
||||
return util::make_ptr_guard(reinterpret_cast<int*>(1), [](int* ) { __itt_task_end(gapi_itt_domain); });
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::parallel::make_itt_guard(h); cv::util::suppress_unused_warning(name)
|
||||
#else
|
||||
struct dumb_guard {void reset(){}};
|
||||
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::parallel::dumb_guard name; cv::util::suppress_unused_warning(name)
|
||||
#endif
|
||||
|
||||
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD(itt_trace_guard_##LINE, h)
|
||||
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h)
|
||||
#define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h)
|
||||
}} //gimpl::parallel
|
||||
} //namespace cv
|
||||
|
||||
#endif /* OPENCV_GAPI_GAPI_ITT_HPP */
|
445
modules/gapi/src/executor/gtbbexecutor.cpp
Normal file
445
modules/gapi/src/executor/gtbbexecutor.cpp
Normal file
@ -0,0 +1,445 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#include "gtbbexecutor.hpp"
|
||||
|
||||
#if defined(HAVE_TBB)
|
||||
#include "gapi_itt.hpp"
|
||||
|
||||
#include <opencv2/gapi/own/assert.hpp>
|
||||
#include <opencv2/gapi/util/copy_through_move.hpp>
|
||||
#include "logger.hpp" // GAPI_LOG
|
||||
|
||||
#include <tbb/task.h>
|
||||
#include <memory> // unique_ptr
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#define ASSERT(expr) GAPI_DbgAssert(expr)
|
||||
|
||||
#define LOG_INFO(tag, ...) GAPI_LOG_INFO(tag, __VA_ARGS__)
|
||||
#define LOG_WARNING(tag, ...) GAPI_LOG_WARNING(tag, __VA_ARGS__)
|
||||
#define LOG_DEBUG(tag, ...) GAPI_LOG_DEBUG(tag, __VA_ARGS__)
|
||||
|
||||
|
||||
#ifdef OPENCV_WITH_ITT
|
||||
const __itt_domain* cv::gimpl::parallel::gapi_itt_domain = __itt_domain_create("GAPI Context");
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gimpl { namespace parallel {
|
||||
|
||||
namespace detail {
|
||||
// some helper staff to deal with tbb::task related entities
|
||||
namespace tasking {
|
||||
|
||||
enum class use_tbb_scheduler_bypass {
|
||||
NO,
|
||||
YES
|
||||
};
|
||||
|
||||
inline void assert_graph_is_running(tbb::task* root) {
|
||||
// tbb::task::wait_for_all block calling thread until task ref_count is dropped to 1
|
||||
// So if the root task ref_count is greater than 1 graph still has a job to do and
|
||||
// according wait_for_all() has not yet returned
|
||||
ASSERT(root->ref_count() > 1);
|
||||
}
|
||||
|
||||
// made template to break circular dependencies
|
||||
template<typename body_t>
|
||||
struct functor_task : tbb::task {
|
||||
body_t body;
|
||||
|
||||
template<typename arg_t>
|
||||
functor_task(arg_t&& a) : body(std::forward<arg_t>(a)) {}
|
||||
|
||||
tbb::task * execute() override {
|
||||
assert_graph_is_running(parent());
|
||||
|
||||
auto reuse_current_task = body();
|
||||
// if needed, say TBB to execute current task once again
|
||||
return (use_tbb_scheduler_bypass::YES == reuse_current_task) ? (recycle_as_continuation(), this) : nullptr;
|
||||
}
|
||||
~functor_task() {
|
||||
assert_graph_is_running(parent());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename body_t>
|
||||
auto allocate_task(tbb::task* root, body_t const& body) -> functor_task<body_t>* {
|
||||
return new(tbb::task::allocate_additional_child_of(*root)) functor_task<body_t>{body};
|
||||
}
|
||||
|
||||
template<typename body_t>
|
||||
void spawn_no_assert(tbb::task* root, body_t const& body) {
|
||||
tbb::task::spawn(* allocate_task(root, body));
|
||||
}
|
||||
|
||||
#ifdef OPENCV_WITH_ITT
|
||||
namespace {
|
||||
static __itt_string_handle* ittTbbAddReadyBlocksToQueue = __itt_string_handle_create("add ready blocks to queue");
|
||||
static __itt_string_handle* ittTbbSpawnReadyBlocks = __itt_string_handle_create("spawn ready blocks");
|
||||
static __itt_string_handle* ittTbbEnqueueSpawnReadyBlocks = __itt_string_handle_create("enqueueing a spawn of ready blocks");
|
||||
static __itt_string_handle* ittTbbUnlockMasterThread = __itt_string_handle_create("Unlocking master thread");
|
||||
}
|
||||
#endif // OPENCV_WITH_ITT
|
||||
|
||||
|
||||
template<typename body_t>
|
||||
void batch_spawn(size_t count, tbb::task* root, body_t const& body, bool do_assert_graph_is_running = true) {
|
||||
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbSpawnReadyBlocks);
|
||||
if (do_assert_graph_is_running) {
|
||||
assert_graph_is_running(root);
|
||||
}
|
||||
|
||||
for (size_t i=0; i<count; i++) {
|
||||
spawn_no_assert(root, body);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct destroy_tbb_task {
|
||||
void operator()(tbb::task* t) const { if (t) tbb::task::destroy(*t);};
|
||||
};
|
||||
|
||||
using root_t = std::unique_ptr<tbb::task, destroy_tbb_task>;
|
||||
|
||||
root_t inline create_root(tbb::task_group_context& ctx) {
|
||||
root_t root{new (tbb::task::allocate_root(ctx)) tbb::empty_task};
|
||||
root->set_ref_count(1); // required by wait_for_all, as it waits until counter drops to 1
|
||||
return root;
|
||||
}
|
||||
|
||||
std::size_t inline tg_context_traits() {
|
||||
// Specify tbb::task_group_context::concurrent_wait in the traits to ask TBB scheduler not to change
|
||||
// ref_count of the task we wait on (root) when wait is complete.
|
||||
return tbb::task_group_context::default_traits | tbb::task_group_context::concurrent_wait;
|
||||
}
|
||||
|
||||
} // namespace tasking
|
||||
|
||||
namespace async {
|
||||
struct async_tasks_t {
|
||||
std::atomic<size_t> count {0};
|
||||
std::condition_variable cv;
|
||||
std::mutex mtx;
|
||||
};
|
||||
|
||||
enum class wake_tbb_master {
|
||||
NO,
|
||||
YES
|
||||
};
|
||||
|
||||
void inline wake_master(async_tasks_t& async_tasks, wake_tbb_master wake_master) {
|
||||
// TODO: seems that this can be relaxed
|
||||
auto active_async_tasks = --async_tasks.count;
|
||||
|
||||
if ((active_async_tasks == 0) || (wake_master == wake_tbb_master::YES)) {
|
||||
// Was the last async task or asked to wake TBB master up(e.g. there are new TBB tasks to execute)
|
||||
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbUnlockMasterThread);
|
||||
// While decrement of async_tasks_t::count is atomic, it might occur after the waiting
|
||||
// thread has read its value but _before_ it actually starts waiting on the condition variable.
|
||||
// So, lock acquire is needed to guarantee that current condition check (if any) in the waiting thread
|
||||
// (possibly ran in parallel to async_tasks_t::count decrement above) is completed _before_ signal is issued.
|
||||
// Therefore when notify_one is called, waiting thread is either sleeping on the condition variable or
|
||||
// running a new check which is guaranteed to pick the new value and return from wait().
|
||||
|
||||
// There is no need to _hold_ the lock while signaling, only to acquire it.
|
||||
std::unique_lock<std::mutex> {async_tasks.mtx}; // Acquire and release the lock.
|
||||
async_tasks.cv.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
struct master_thread_sleep_lock_t
|
||||
{
|
||||
struct sleep_unlock {
|
||||
void operator()(async_tasks_t* t) const {
|
||||
ASSERT(t);
|
||||
wake_master(*t, wake_tbb_master::NO);
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<async_tasks_t, sleep_unlock> guard;
|
||||
|
||||
master_thread_sleep_lock_t() = default;
|
||||
master_thread_sleep_lock_t(async_tasks_t* async_tasks_ptr) : guard(async_tasks_ptr) {
|
||||
// TODO: seems that this can be relaxed
|
||||
++(guard->count);
|
||||
}
|
||||
|
||||
void unlock(wake_tbb_master wake) {
|
||||
if (auto* p = guard.release()) {
|
||||
wake_master(*p, wake);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
master_thread_sleep_lock_t inline lock_sleep_master(async_tasks_t& async_tasks) {
|
||||
return {&async_tasks};
|
||||
}
|
||||
|
||||
enum class is_tbb_work_present {
|
||||
NO,
|
||||
YES
|
||||
};
|
||||
|
||||
//RAII object to block TBB master thread (one that does wait_for_all())
|
||||
//N.B. :wait_for_all() return control when root ref_count drops to 1,
|
||||
struct root_wait_lock_t {
|
||||
struct root_decrement_ref_count{
|
||||
void operator()(tbb::task* t) const {
|
||||
ASSERT(t);
|
||||
auto result = t->decrement_ref_count();
|
||||
ASSERT(result >= 1);
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<tbb::task, root_decrement_ref_count> guard;
|
||||
|
||||
root_wait_lock_t() = default;
|
||||
root_wait_lock_t(tasking::root_t& root, is_tbb_work_present& previous_state) : guard{root.get()} {
|
||||
// Block the master thread while the *this object is alive.
|
||||
auto new_root_ref_count = root->add_ref_count(1);
|
||||
previous_state = (new_root_ref_count == 2) ? is_tbb_work_present::NO : is_tbb_work_present::YES;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
root_wait_lock_t inline lock_wait_master(tasking::root_t& root, is_tbb_work_present& previous_state) {
|
||||
return root_wait_lock_t{root, previous_state};
|
||||
}
|
||||
|
||||
} // namespace async
|
||||
|
||||
inline tile_node* pop(prio_items_queue_t& q) {
|
||||
tile_node* node = nullptr;
|
||||
bool popped = q.try_pop(node);
|
||||
ASSERT(popped && "queue should be non empty as we push items to it before we spawn");
|
||||
return node;
|
||||
}
|
||||
|
||||
namespace graph {
|
||||
// Returns : number of items actually pushed into the q
|
||||
std::size_t inline push_ready_dependants(prio_items_queue_t& q, tile_node* node) {
|
||||
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbAddReadyBlocksToQueue);
|
||||
std::size_t ready_items = 0;
|
||||
// enable dependent tasks
|
||||
for (auto* dependant : node->dependants) {
|
||||
// fetch_and_sub returns previous value
|
||||
if (1 == dependant->dependency_count.fetch_sub(1)) {
|
||||
// tile node is ready for execution, add it to the queue
|
||||
q.push(dependant);
|
||||
++ready_items;
|
||||
}
|
||||
}
|
||||
return ready_items;
|
||||
}
|
||||
|
||||
struct exec_ctx {
|
||||
tbb::task_arena& arena;
|
||||
prio_items_queue_t& q;
|
||||
tbb::task_group_context tg_ctx;
|
||||
tasking::root_t root;
|
||||
detail::async::async_tasks_t async_tasks;
|
||||
std::atomic<size_t> executed {0};
|
||||
|
||||
exec_ctx(tbb::task_arena& arena_, prio_items_queue_t& q_)
|
||||
: arena(arena_), q(q_),
|
||||
// As the traits is last argument, explicitly specify (default) value for first argument
|
||||
tg_ctx{tbb::task_group_context::bound, tasking::tg_context_traits()},
|
||||
root(tasking::create_root(tg_ctx))
|
||||
{}
|
||||
};
|
||||
|
||||
// At the moment there are no suitable tools to manage TBB priorities on task by task basis.
|
||||
// Instead priority queue is used to respect tile_node priorities.
|
||||
// As well, TBB task is not bound to any particular tile_node until actually executed.
|
||||
|
||||
// Strictly speaking there are two graphs here:
|
||||
// - G-API one, described by the connected tile_node instances.
|
||||
// This graph is :
|
||||
// - Known beforehand, and do not change during the execution (i.e. static)
|
||||
// - Contains both TBB non-TBB parts
|
||||
// - prioritized, (i.e. all nodes has assigned priority of execution)
|
||||
//
|
||||
// - TBB task tree, which is :
|
||||
// - flat (Has only two levels : root and leaves)
|
||||
// - dynamic, i.e. new leaves are added on demand when new tbb tasks are spawned
|
||||
// - describes only TBB/CPU part of the whole graph
|
||||
// - non-prioritized (i.e. all tasks are created equal)
|
||||
|
||||
// Class below represents TBB task payload.
|
||||
//
|
||||
// Each instance basically does the three things :
|
||||
// 1. Gets the tile_node item from the top of the queue
|
||||
// 2. Executes its body
|
||||
// 3. Pushes dependent tile_nodes to the queue once they are ready
|
||||
//
|
||||
struct task_body {
|
||||
exec_ctx& ctx;
|
||||
|
||||
std::size_t push_ready_dependants(tile_node* node) const {
|
||||
return graph::push_ready_dependants(ctx.q, node);
|
||||
}
|
||||
|
||||
void spawn_clones(std::size_t items) const {
|
||||
tasking::batch_spawn(items, ctx.root.get(), *this);
|
||||
}
|
||||
|
||||
task_body(exec_ctx& ctx_) : ctx(ctx_) {}
|
||||
tasking::use_tbb_scheduler_bypass operator()() const {
|
||||
ASSERT(!ctx.q.empty() && "Spawned task with no job to do ? ");
|
||||
|
||||
tile_node* node = detail::pop(ctx.q);
|
||||
|
||||
auto result = tasking::use_tbb_scheduler_bypass::NO;
|
||||
// execute the task
|
||||
|
||||
if (auto p = util::get_if<tile_node::sync_task_body>(&(node->task_body))) {
|
||||
// synchronous task
|
||||
p->body();
|
||||
|
||||
std::size_t ready_items = push_ready_dependants(node);
|
||||
|
||||
if (ready_items > 0) {
|
||||
// spawn one less tasks and say TBB to reuse(recycle) current task
|
||||
spawn_clones(ready_items - 1);
|
||||
result = tasking::use_tbb_scheduler_bypass::YES;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LOG_DEBUG(NULL, "Async task");
|
||||
using namespace detail::async;
|
||||
using util::copy_through_move;
|
||||
|
||||
auto block_master = copy_through_move(lock_sleep_master(ctx.async_tasks));
|
||||
|
||||
auto self_copy = *this;
|
||||
auto callback = [node, block_master, self_copy] () mutable /*due to block_master.get().unlock()*/ {
|
||||
LOG_DEBUG(NULL, "Async task callback is called");
|
||||
// Implicitly unlock master right in the end of callback
|
||||
auto master_sleep_lock = std::move(block_master);
|
||||
std::size_t ready_items = self_copy.push_ready_dependants(node);
|
||||
if (ready_items > 0) {
|
||||
auto master_was_active = is_tbb_work_present::NO;
|
||||
{
|
||||
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbEnqueueSpawnReadyBlocks);
|
||||
// Force master thread (one that does wait_for_all()) to (actively) wait for enqueued tasks
|
||||
// and unlock it right after all dependent tasks are spawned.
|
||||
|
||||
auto root_wait_lock = copy_through_move(lock_wait_master(self_copy.ctx.root, master_was_active));
|
||||
|
||||
// TODO: add test to cover proper holding of root_wait_lock
|
||||
// As the calling thread most likely is not TBB one, instead of spawning TBB tasks directly we
|
||||
// enqueue a task which will spawn them.
|
||||
// For master thread to not leave wait_for_all() prematurely,
|
||||
// hold the root_wait_lock until need tasks are actually spawned.
|
||||
self_copy.ctx.arena.enqueue([ready_items, self_copy, root_wait_lock]() {
|
||||
self_copy.spawn_clones(ready_items);
|
||||
// TODO: why we need this? Either write a descriptive comment or remove it
|
||||
volatile auto unused = root_wait_lock.get().guard.get();
|
||||
util::suppress_unused_warning(unused);
|
||||
});
|
||||
}
|
||||
// Wake master thread (if any) to pick up the enqueued tasks iff:
|
||||
// 1. there is new TBB work to do, and
|
||||
// 2. Master thread was sleeping on condition variable waiting for async tasks to complete
|
||||
// (There was no active work before (i.e. root->ref_count() was == 1))
|
||||
auto wake_master = (master_was_active == is_tbb_work_present::NO) ?
|
||||
wake_tbb_master::YES : wake_tbb_master::NO;
|
||||
master_sleep_lock.get().unlock(wake_master);
|
||||
}
|
||||
};
|
||||
|
||||
auto& body = util::get<tile_node::async_task_body>(node->task_body).body;
|
||||
body(std::move(callback), node->total_order_index);
|
||||
}
|
||||
|
||||
ctx.executed++;
|
||||
// reset dependecy_count to initial state to simplify re-execution of the same graph
|
||||
node->dependency_count = node->dependencies;
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
} // namespace detail
|
||||
}}} // namespace cv::gimpl::parallel
|
||||
|
||||
void cv::gimpl::parallel::execute(prio_items_queue_t& q) {
|
||||
// get the reference to current task_arena (i.e. one we are running in)
|
||||
#if TBB_INTERFACE_VERSION > 9002
|
||||
using attach_t = tbb::task_arena::attach;
|
||||
#else
|
||||
using attach_t = tbb::internal::attach;
|
||||
#endif
|
||||
|
||||
tbb::task_arena arena{attach_t{}};
|
||||
execute(q, arena);
|
||||
}
|
||||
|
||||
void cv::gimpl::parallel::execute(prio_items_queue_t& q, tbb::task_arena& arena) {
|
||||
using namespace detail;
|
||||
graph::exec_ctx ctx{arena, q};
|
||||
|
||||
arena.execute(
|
||||
[&]() {
|
||||
// Passed in queue is assumed to contain starting tasks, i.e. ones with no (or resolved) dependencies
|
||||
auto num_start_tasks = q.size();
|
||||
|
||||
// TODO: use recursive spawning and task soft affinity for faster task distribution
|
||||
// As graph is starting and no task has been spawned yet
|
||||
// assert_graph_is_running(root) will not hold, so spawn without assert
|
||||
tasking::batch_spawn(num_start_tasks, ctx.root.get(), graph::task_body{ctx}, /* assert_graph_is_running*/false);
|
||||
|
||||
using namespace std::chrono;
|
||||
high_resolution_clock timer;
|
||||
|
||||
auto tbb_work_done = [&ctx]() { return 1 == ctx.root->ref_count(); };
|
||||
auto async_work_done = [&ctx]() { return 0 == ctx.async_tasks.count; };
|
||||
do {
|
||||
// First participate in execution of TBB graph till there are no more ready tasks.
|
||||
ctx.root->wait_for_all();
|
||||
|
||||
if (!async_work_done()) { // Wait on the conditional variable iff there is active async work
|
||||
auto start = timer.now();
|
||||
std::unique_lock<std::mutex> lk(ctx.async_tasks.mtx);
|
||||
// Wait (probably by sleeping) until all async tasks are completed or new TBB tasks are created.
|
||||
// FIXME: Use TBB resumable tasks here to avoid blocking TBB thread
|
||||
ctx.async_tasks.cv.wait(lk, [&]{return async_work_done() || !tbb_work_done() ;});
|
||||
|
||||
LOG_INFO(NULL, "Slept for " << duration_cast<milliseconds>(timer.now() - start).count() << " ms \n");
|
||||
}
|
||||
}
|
||||
while(!tbb_work_done() || !async_work_done());
|
||||
|
||||
ASSERT(tbb_work_done() && async_work_done() && "Graph is still running?");
|
||||
}
|
||||
);
|
||||
|
||||
LOG_INFO(NULL, "Done. Executed " << ctx.executed << " tasks");
|
||||
}
|
||||
|
||||
std::ostream& cv::gimpl::parallel::operator<<(std::ostream& o, tile_node const& n) {
|
||||
o << "("
|
||||
<< " at:" << &n << ","
|
||||
<< "indx: " << n.total_order_index << ","
|
||||
<< "deps #:" << n.dependency_count.value << ", "
|
||||
<< "prods:" << n.dependants.size();
|
||||
|
||||
o << "[";
|
||||
for (auto* d: n.dependants) {
|
||||
o << d << ",";
|
||||
}
|
||||
o << "]";
|
||||
|
||||
o << ")";
|
||||
return o;
|
||||
}
|
||||
|
||||
#endif // HAVE_TBB
|
103
modules/gapi/src/executor/gtbbexecutor.hpp
Normal file
103
modules/gapi/src/executor/gtbbexecutor.hpp
Normal file
@ -0,0 +1,103 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#ifndef OPENCV_GAPI_TBB_EXECUTOR_HPP
|
||||
#define OPENCV_GAPI_TBB_EXECUTOR_HPP
|
||||
|
||||
#if !defined(GAPI_STANDALONE)
|
||||
#include <opencv2/cvconfig.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_TBB)
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <iosfwd>
|
||||
|
||||
#include <tbb/concurrent_priority_queue.h>
|
||||
#include <tbb/task_arena.h>
|
||||
|
||||
#include <opencv2/gapi/util/variant.hpp>
|
||||
|
||||
namespace cv { namespace gimpl { namespace parallel {
|
||||
|
||||
// simple wrapper to allow copies of std::atomic
|
||||
template<typename count_t>
|
||||
struct atomic_copyable_wrapper {
|
||||
std::atomic<count_t> value;
|
||||
|
||||
atomic_copyable_wrapper(count_t val) : value(val) {}
|
||||
atomic_copyable_wrapper(atomic_copyable_wrapper const& lhs) : value (lhs.value.load(std::memory_order_relaxed)) {}
|
||||
|
||||
atomic_copyable_wrapper& operator=(count_t val) {
|
||||
value.store(val, std::memory_order_relaxed);
|
||||
return *this;
|
||||
}
|
||||
|
||||
count_t fetch_sub(count_t val) {
|
||||
return value.fetch_sub(val);
|
||||
}
|
||||
|
||||
count_t fetch_add(count_t val) {
|
||||
return value.fetch_add(val);
|
||||
}
|
||||
};
|
||||
|
||||
struct async_tag {};
|
||||
constexpr async_tag async;
|
||||
|
||||
// Class describing a piece of work in the node in the tasks graph.
|
||||
// Most of the fields are set only once during graph compilation and never changes.
|
||||
// (However at the moment they can not be made const due to two phase initialization
|
||||
// of the tile_node objects)
|
||||
// FIXME: refactor the code to make the const?
|
||||
struct tile_node {
|
||||
// place in totally ordered queue of tasks to execute. Inverse to priority, i.e.
|
||||
// lower index means higher priority
|
||||
size_t total_order_index = 0;
|
||||
|
||||
// FIXME: use templates here instead of std::function
|
||||
struct sync_task_body {
|
||||
std::function<void()> body;
|
||||
};
|
||||
struct async_task_body {
|
||||
std::function<void(std::function<void()>&& callback, size_t total_order_index)> body;
|
||||
};
|
||||
|
||||
util::variant<sync_task_body, async_task_body> task_body;
|
||||
|
||||
// number of dependencies according to a dependency graph (i.e. number of "input" edges).
|
||||
size_t dependencies = 0;
|
||||
|
||||
// number of unsatisfied dependencies. When drops to zero task is ready for execution.
|
||||
// Initially equal to "dependencies"
|
||||
atomic_copyable_wrapper<size_t> dependency_count = 0;
|
||||
|
||||
std::vector<tile_node*> dependants;
|
||||
|
||||
tile_node(decltype(sync_task_body::body)&& f) : task_body(sync_task_body{std::move(f)}) {};
|
||||
tile_node(async_tag, decltype(async_task_body::body)&& f) : task_body(async_task_body{std::move(f)}) {};
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, tile_node const& n);
|
||||
|
||||
struct tile_node_indirect_priority_comparator {
|
||||
bool operator()(tile_node const * lhs, tile_node const * rhs) const {
|
||||
return lhs->total_order_index > rhs->total_order_index;
|
||||
}
|
||||
};
|
||||
|
||||
using prio_items_queue_t = tbb::concurrent_priority_queue<tile_node*, tile_node_indirect_priority_comparator>;
|
||||
|
||||
void execute(prio_items_queue_t& q);
|
||||
void execute(prio_items_queue_t& q, tbb::task_arena& arena);
|
||||
|
||||
}}} // namespace cv::gimpl::parallel
|
||||
|
||||
#endif // HAVE_TBB
|
||||
|
||||
#endif // OPENCV_GAPI_TBB_EXECUTOR_HPP
|
172
modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp
Normal file
172
modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
// Deliberately include .cpp file instead of header as we use non exported function (execute)
|
||||
#include <executor/gtbbexecutor.cpp>
|
||||
|
||||
#if defined(HAVE_TBB)
|
||||
|
||||
#include "../test_precomp.hpp"
|
||||
#include <tbb/task_arena.h>
|
||||
#include <thread>
|
||||
|
||||
namespace {
|
||||
tbb::task_arena create_task_arena(int max_concurrency = tbb::task_arena::automatic /* set to 1 for single thread */) {
|
||||
unsigned int reserved_for_master_threads = 1;
|
||||
if (max_concurrency == 1) {
|
||||
// Leave no room for TBB worker threads, by reserving all to masters.
|
||||
// TBB runtime guarantees that no worker threads will join the arena
|
||||
// if max_concurrency is equal to reserved_for_master_threads
|
||||
// except 1:1 + use of enqueued tasks for safety guarantee.
|
||||
// So deliberately make it 2:2 to force TBB not to create extra thread.
|
||||
//
|
||||
// N.B. one slot will left empty as only one master thread(one that
|
||||
// calls root->wait_for_all()) will join the arena.
|
||||
|
||||
// FIXME: strictly speaking master can take any free slot, not the first one.
|
||||
// However at the moment master seems to pick 0 slot all the time.
|
||||
max_concurrency = 2;
|
||||
reserved_for_master_threads = 2;
|
||||
}
|
||||
return tbb::task_arena{max_concurrency, reserved_for_master_threads};
|
||||
}
|
||||
}
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
TEST(TBBExecutor, Basic) {
|
||||
using namespace cv::gimpl::parallel;
|
||||
bool executed = false;
|
||||
prio_items_queue_t q;
|
||||
tile_node n([&]() {
|
||||
executed = true;
|
||||
});
|
||||
q.push(&n);
|
||||
execute(q);
|
||||
EXPECT_EQ(true, executed);
|
||||
}
|
||||
|
||||
TEST(TBBExecutor, SerialExecution) {
|
||||
using namespace cv::gimpl::parallel;
|
||||
const int n = 10;
|
||||
prio_items_queue_t q;
|
||||
std::vector<tile_node> nodes; nodes.reserve(n+1);
|
||||
std::vector<std::thread::id> thread_id(n);
|
||||
for (int i=0; i <n; i++) {
|
||||
nodes.push_back(tile_node([&, i]() {
|
||||
thread_id[i] = std::this_thread::get_id();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
|
||||
}));
|
||||
q.push(&nodes.back());
|
||||
}
|
||||
|
||||
auto serial_arena = create_task_arena(1);
|
||||
execute(q, serial_arena);
|
||||
auto print_thread_ids = [&] {
|
||||
std::stringstream str;
|
||||
for (auto& i : thread_id) { str << i <<" ";}
|
||||
return str.str();
|
||||
};
|
||||
EXPECT_NE(thread_id[0], std::thread::id{}) << print_thread_ids();
|
||||
EXPECT_EQ(thread_id.size(), static_cast<size_t>(std::count(thread_id.begin(), thread_id.end(), thread_id[0])))
|
||||
<< print_thread_ids();
|
||||
}
|
||||
|
||||
TEST(TBBExecutor, AsyncBasic) {
|
||||
using namespace cv::gimpl::parallel;
|
||||
|
||||
std::atomic<bool> callback_ready {false};
|
||||
std::function<void()> callback;
|
||||
|
||||
std::atomic<bool> callback_called {false};
|
||||
std::atomic<bool> master_is_waiting {true};
|
||||
std::atomic<bool> master_was_blocked_until_callback_called {false};
|
||||
|
||||
auto async_thread = std::thread([&] {
|
||||
bool slept = false;
|
||||
while (!callback_ready) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
slept = true;
|
||||
}
|
||||
if (!slept) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
callback();
|
||||
callback_called = true;
|
||||
master_was_blocked_until_callback_called = (master_is_waiting == true);
|
||||
});
|
||||
|
||||
auto async_task_body = [&](std::function<void()>&& cb, size_t /*total_order_index*/) {
|
||||
callback = std::move(cb);
|
||||
callback_ready = true;
|
||||
};
|
||||
tile_node n(async, std::move(async_task_body));
|
||||
|
||||
prio_items_queue_t q;
|
||||
q.push(&n);
|
||||
execute(q);
|
||||
master_is_waiting = false;
|
||||
|
||||
async_thread.join();
|
||||
|
||||
EXPECT_EQ(true, callback_called);
|
||||
EXPECT_EQ(true, master_was_blocked_until_callback_called);
|
||||
}
|
||||
|
||||
TEST(TBBExecutor, Dependencies) {
|
||||
using namespace cv::gimpl::parallel;
|
||||
const int n = 10;
|
||||
bool serial = true;
|
||||
std::atomic<int> counter {0};
|
||||
prio_items_queue_t q;
|
||||
std::vector<tile_node> nodes; nodes.reserve(n+1);
|
||||
const int invalid_order = -10;
|
||||
std::vector<int> tiles_exec_order(n, invalid_order);
|
||||
|
||||
auto add_dependency_to = [](tile_node& node, tile_node& dependency) {
|
||||
dependency.dependants.push_back(&node);
|
||||
node.dependencies++;
|
||||
node.dependency_count.fetch_add(1);
|
||||
};
|
||||
for (int i=0; i <n; i++) {
|
||||
nodes.push_back(tile_node([&, i]() {
|
||||
tiles_exec_order[i] = counter++;
|
||||
if (!serial) {
|
||||
//sleep gives a better chance for other threads to take part in the execution
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
}));
|
||||
if (i >0) {
|
||||
auto last_node = nodes.end() - 1;
|
||||
add_dependency_to(*last_node, *(last_node -1));
|
||||
}
|
||||
}
|
||||
|
||||
q.push(&nodes.front());
|
||||
|
||||
auto arena = serial ? create_task_arena(1) : create_task_arena();
|
||||
execute(q, arena);
|
||||
auto print_execution_order = [&] {
|
||||
std::stringstream str;
|
||||
for (auto& i : tiles_exec_order) { str << i <<" ";}
|
||||
return str.str();
|
||||
};
|
||||
ASSERT_EQ(0, std::count(tiles_exec_order.begin(), tiles_exec_order.end(), invalid_order))
|
||||
<< "Not all " << n << " task executed ?\n"
|
||||
<<" execution order : " << print_execution_order();
|
||||
|
||||
for (size_t i=0; i <nodes.size(); i++) {
|
||||
auto node_exec_order = tiles_exec_order[i];
|
||||
for (auto* dependee : nodes[i].dependants) {
|
||||
auto index = std::distance(&nodes.front(), dependee);
|
||||
auto dependee_execution_order = tiles_exec_order[index];
|
||||
ASSERT_LT(node_exec_order, dependee_execution_order) << "node number " << index << " is executed earlier than it's dependency " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace opencv_test
|
||||
#endif //HAVE_TBB
|
Loading…
Reference in New Issue
Block a user