Merge pull request #19917 from AsyaPronina:asyadev/itt_traces_in_gstreamingexecutor

This commit is contained in:
Alexander Alekhin 2021-05-11 14:44:59 +03:00 committed by GitHub
commit df05bc65c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 272 additions and 106 deletions

View File

@ -162,6 +162,9 @@ set(gapi_srcs
# Python bridge
src/backends/ie/bindings_ie.cpp
src/backends/python/gpythonbackend.cpp
# Utils (ITT tracing)
src/utils/itt.cpp
)
ocv_add_dispatched_file(backends/fluid/gfluidimgproc_func SSE4_1 AVX2)
@ -178,13 +181,22 @@ ocv_module_include_directories("${CMAKE_CURRENT_LIST_DIR}/src")
ocv_create_module()
ocv_target_link_libraries(${the_module} PRIVATE ade)
if(OPENCV_GAPI_INF_ENGINE)
ocv_target_link_libraries(${the_module} PRIVATE ${INF_ENGINE_TARGET})
endif()
if(HAVE_TBB)
ocv_target_link_libraries(${the_module} PRIVATE tbb)
endif()
# TODO: Consider support of ITT in G-API standalone mode.
if(CV_TRACE AND HAVE_ITT)
ocv_target_compile_definitions(${the_module} PRIVATE -DOPENCV_WITH_ITT=1)
ocv_module_include_directories(${ITT_INCLUDE_DIRS})
ocv_target_link_libraries(${the_module} PRIVATE ${ITT_LIBRARIES})
endif()
set(__test_extra_deps "")
if(OPENCV_GAPI_INF_ENGINE)
list(APPEND __test_extra_deps ${INF_ENGINE_TARGET})

View File

@ -71,7 +71,7 @@ void GraphMetaExecutable::run(std::vector<InObj> &&input_objs,
cv::util::get<cv::detail::OpaqueRef>(out_arg) = it->second;
}
class GraphMetaBackendImpl final: public cv::gapi::GBackend::Priv {
class GGraphMetaBackendImpl final: public cv::gapi::GBackend::Priv {
virtual void unpackKernel(ade::Graph &,
const ade::NodeHandle &,
const cv::GKernelImpl &) override {
@ -88,7 +88,7 @@ class GraphMetaBackendImpl final: public cv::gapi::GBackend::Priv {
};
cv::gapi::GBackend graph_meta_backend() {
static cv::gapi::GBackend this_backend(std::make_shared<GraphMetaBackendImpl>());
static cv::gapi::GBackend this_backend(std::make_shared<GGraphMetaBackendImpl>());
return this_backend;
}

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
#include "precomp.hpp"
@ -26,6 +26,8 @@
#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
#include "utils/itt.hpp"
// FIXME: Is there a way to take a typed graph (our GModel),
// and create a new typed graph _ATOP_ of that (by extending with a couple of
// new types?).
@ -251,8 +253,13 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
context.m_state = m_nodesToStates.at(op_info.nh);
}
// Now trigger the executable unit
k.m_runF(context);
{
GAPI_ITT_DYNAMIC_LOCAL_HANDLE(op_hndl, op.k.name.c_str());
GAPI_ITT_AUTO_TRACE_GUARD(op_hndl);
// Now trigger the executable unit
k.m_runF(context);
}
//As Kernels are forbidden to allocate memory for (Mat) outputs,
//this code seems redundant, at least for Mats

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
#include "precomp.hpp"
@ -60,6 +60,8 @@ template<typename T> using QueueClass = tbb::concurrent_bounded_queue<T>;
template<typename T> using QueueClass = cv::gapi::own::concurrent_bounded_queue<T>;
#endif // TBB
#include "utils/itt.hpp"
namespace IE = InferenceEngine;
namespace {
@ -757,6 +759,9 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg
// to post outputs blobs (cv::GMat's).
static void PostOutputs(InferenceEngine::InferRequest &request,
std::shared_ptr<IECallContext> ctx) {
GAPI_ITT_STATIC_LOCAL_HANDLE(ie_cb_post_outputs_hndl, "IE_async_callback_PostOutputs");
GAPI_ITT_AUTO_TRACE_GUARD(ie_cb_post_outputs_hndl);
for (auto i : ade::util::iota(ctx->uu.params.num_out))
{
auto& out_mat = ctx->outMatR(i);

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
#include "precomp.hpp"
@ -10,6 +10,8 @@
#include <sstream>
#include <unordered_set>
#include <unordered_map>
#include <typeinfo> // typeid
#include <cctype> // std::isdigit
#include <ade/util/checked_cast.hpp>
#include <ade/util/zip_range.hpp> // zip_range, indexed
@ -335,6 +337,53 @@ ade::NodeHandle GIslandModel::producerOf(const ConstGraph &g, ade::NodeHandle &d
return ade::NodeHandle();
}
std::string GIslandModel::traceIslandName(const ade::NodeHandle& island_nh, const Graph& g) {
auto island_ptr = g.metadata(island_nh).get<FusedIsland>().object;
std::string island_name = island_ptr->name();
std::string backend_name = "";
auto& backend_impl = island_ptr->backend().priv();
std::string backend_impl_type_name = typeid(backend_impl).name();
// NOTE: Major part of already existing backends implementaion classes are called using
// "*G[Name]BackendImpl*" pattern.
// We are trying to match against this pattern and retrive just [Name] part.
// If matching isn't successful, full mangled class name will be used.
//
// To match we use following algorithm:
// 1) Find "BackendImpl" substring, if it doesn't exist, go to step 5.
// 2) Let from_pos be second character in a string.
// 3) Starting from from_pos, seek for "G" symbol in a string.
// If it doesn't exist or exists after "BackendImpl" position, go to step 5.
// 4) Check that previous character before found "G" is digit, means that this is
// part of characters number in a new word in a string (previous words may be
// namespaces).
// If it is so, match is found. Return name between found "G" and "BackendImpl".
// If it isn't so, assign from_pos to found "G" position + 1 and loop to step 3.
// 5) Matching is not successful, return full class name.
bool matched = false;
bool stop = false;
auto to_pos = backend_impl_type_name.find("BackendImpl");
std::size_t from_pos = 0UL;
if (to_pos != std::string::npos) {
while (!matched && !stop) {
from_pos = backend_impl_type_name.find("G", from_pos + 1);
stop = from_pos == std::string::npos || from_pos >= to_pos;
matched = !stop && std::isdigit(backend_impl_type_name[from_pos - 1]);
}
}
if (matched) {
backend_name = backend_impl_type_name.substr(from_pos + 1, to_pos - from_pos - 1);
}
else {
backend_name = backend_impl_type_name;
}
return island_name + "_" + backend_name;
}
void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IOutput &out)
{
// Default implementation: just reuse the existing old-fashioned run

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2019 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
#ifndef OPENCV_GAPI_GISLANDMODEL_HPP
@ -290,7 +290,11 @@ namespace GIslandModel
// from the original model (! don't mix with DataSlot)
// FIXME: GAPI_EXPORTS because of tests only!
ade::NodeHandle GAPI_EXPORTS producerOf(const ConstGraph &g, ade::NodeHandle &data_nh);
// traceIslandName - returns pretty island name for passed island node.
// Function uses RTTI to assembly name.
// In case if name of backend implementation class doesn't fit *G[Name]BackendImpl* pattern,
// raw mangled name of class will be used.
std::string traceIslandName(const ade::NodeHandle& op_nh, const Graph& g);
} // namespace GIslandModel
}} // namespace cv::gimpl

View File

@ -1,59 +0,0 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2020 Intel Corporation
#ifndef OPENCV_GAPI_GAPI_ITT_HPP
#define OPENCV_GAPI_GAPI_ITT_HPP
//for ITT_NAMED_TRACE_GUARD
#include <type_traits>
#include <memory>
// FIXME: It seems that this macro is not propagated here by the OpenCV cmake (as this is not core module).
// (Consider using OpenCV's trace.hpp )
#ifdef OPENCV_WITH_ITT
#include <ittnotify.h>
#endif
#include <opencv2/gapi/util/compiler_hints.hpp>
namespace cv {
namespace util {
template< class T >
using remove_reference_t = typename std::remove_reference<T>::type;
// Home brew ScopeGuard
// D will be called automatically with p as argument when ScopeGuard goes out of scope.
// call release() on the ScopeGuard object to revoke guard action
template<typename T, typename D>
auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr<T, util::remove_reference_t<D>> {
return {p, std::forward<D>(d)};
}
} // namespace util
// FIXME: make it more reusable (and move to other place and other namespace)
namespace gimpl { namespace parallel {
#ifdef OPENCV_WITH_ITT
extern const __itt_domain* gapi_itt_domain;
namespace {
auto make_itt_guard = [](__itt_string_handle* h) {
__itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h));
return util::make_ptr_guard(reinterpret_cast<int*>(1), [](int* ) { __itt_task_end(gapi_itt_domain); });
};
} // namespace
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::parallel::make_itt_guard(h); cv::util::suppress_unused_warning(name)
#else
struct dumb_guard {void reset(){}};
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::parallel::dumb_guard name; cv::util::suppress_unused_warning(name)
#endif
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD(itt_trace_guard_##LINE, h)
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h)
#define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h)
}} //gimpl::parallel
} //namespace cv
#endif /* OPENCV_GAPI_GAPI_ITT_HPP */

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019-2020 Intel Corporation
// Copyright (C) 2019-2021 Intel Corporation
#include "precomp.hpp"
@ -16,6 +16,8 @@
#include <opencv2/gapi/core.hpp> // GCopy -- FIXME - to be removed!
#endif // GAPI_STANDALONE
#include "utils/itt.hpp"
#include "api/gproto_priv.hpp" // ptr(GRunArgP)
#include "compiler/passes/passes.hpp"
#include "backends/common/gbackend.hpp" // createMat
@ -492,9 +494,15 @@ void emitterActorThread(std::shared_ptr<cv::gimpl::GIslandEmitter> emitter,
return;
}
GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_hndl, "emitter");
GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_pull_hndl, "emitter_pull");
GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_push_hndl, "emitter_push");
// Now start emitting the data from the source to the pipeline.
while (true)
{
GAPI_ITT_AUTO_TRACE_GUARD(emitter_hndl);
Cmd cancel;
if (in_queue.try_pop(cancel))
{
@ -507,8 +515,15 @@ void emitterActorThread(std::shared_ptr<cv::gimpl::GIslandEmitter> emitter,
// Try to obtain next data chunk from the source
cv::GRunArg data;
if (emitter->pull(data))
const bool result = [&](){
GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl);
return emitter->pull(data);
}();
if (result)
{
GAPI_ITT_AUTO_TRACE_GUARD(emitter_push_hndl);
// // On success, broadcast it to our readers
for (auto &&oq : out_queues)
{
@ -539,7 +554,11 @@ void syncActorThread(std::vector<Q*> in_queues,
std::vector<bool> pop_nexts(in_queues.size());
std::vector<Cmd> cmds(in_queues.size());
GAPI_ITT_STATIC_LOCAL_HANDLE(sync_hndl, "sync_actor");
GAPI_ITT_STATIC_LOCAL_HANDLE(sync_pull_1_queue_hndl, "sync_actor_pull_from_1_queue");
GAPI_ITT_STATIC_LOCAL_HANDLE(sync_push_hndl, "sync_actor_push");
while (true) {
GAPI_ITT_AUTO_TRACE_GUARD(sync_hndl);
// pop_nexts indicates which queue still contains earlier timestamps and
// needs to be popped at least one more time.
// For each iteration (frame) we need to pull from each input queue at least once,
@ -562,7 +581,10 @@ void syncActorThread(std::vector<Q*> in_queues,
auto& q = std::get<1>(val);
auto& cmd = std::get<2>(val);
q->pop(cmd);
{
GAPI_ITT_AUTO_TRACE_GUARD(sync_pull_1_queue_hndl);
q->pop(cmd);
}
if (cv::util::holds_alternative<Stop>(cmd)) {
// We got a stop command from one of the input queues.
// Rewind all input queues till Stop command,
@ -603,9 +625,12 @@ void syncActorThread(std::vector<Q*> in_queues,
} while (ade::util::any_of(pop_nexts, [](bool v){ return v; }));
// Finally we got all our inputs synchronized, push them further down the graph
for (auto &&it : ade::util::zip(out_queues, cmds)) {
for (auto &&q : std::get<0>(it)) {
q->push(std::get<1>(it));
{
GAPI_ITT_AUTO_TRACE_GUARD(sync_push_hndl);
for (auto &&it : ade::util::zip(out_queues, cmds)) {
for (auto &&q : std::get<0>(it)) {
q->push(std::get<1>(it));
}
}
}
}
@ -619,7 +644,11 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput
virtual cv::gimpl::StreamMsg get() override
{
GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get");
GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl);
cv::GRunArgs isl_input_args;
if (!qr.getInputVector(in_queues, in_constants, isl_input_args))
{
// Stop case
@ -680,6 +709,9 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
// Prepare this object for posting
virtual cv::GRunArgP get(int idx) override
{
GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_get_hndl, "StreamingOutput::get (alloc)");
GAPI_ITT_AUTO_TRACE_GUARD(outputs_get_hndl);
std::lock_guard<std::mutex> lock{m_mutex};
using MatType = cv::Mat;
@ -756,8 +788,12 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
m_postIdx[cv::gimpl::proto::ptr(ret_val)] = std::make_pair(idx, iter);
return ret_val;
}
virtual void post(cv::GRunArgP&& argp) override
{
GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_post_hndl, "StreamingOutput::post");
GAPI_ITT_AUTO_TRACE_GUARD(outputs_post_hndl);
std::lock_guard<std::mutex> lock{m_mutex};
// Mark the output ready for posting. If it is the first in the line,
@ -795,6 +831,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
post_iter = m_postings[out_idx].erase(post_iter);
}
}
virtual void post(cv::gimpl::EndOfStream&&) override
{
std::lock_guard<std::mutex> lock{m_mutex};
@ -859,23 +896,27 @@ public:
// executable for processing.
// - Pushes processing results down to consumers - to the subsequent queues.
// Note: Every data object consumer has its own queue.
void islandActorThread(std::vector<cv::gimpl::RcDesc> in_rcs, // FIXME: this is...
std::vector<cv::gimpl::RcDesc> out_rcs, // FIXME: ...basically just...
cv::GMetaArgs out_metas, // ...
std::shared_ptr<cv::gimpl::GIslandExecutable> island, // FIXME: ...a copy of OpDesc{}.
void islandActorThread(std::vector<cv::gimpl::RcDesc> in_rcs, // FIXME: this is...
std::vector<cv::gimpl::RcDesc> out_rcs, // FIXME: ...basically just...
cv::GMetaArgs out_metas, // ...
std::shared_ptr<cv::gimpl::GIslandExecutable> island_exec, // FIXME: ...a copy of OpDesc{}.
std::vector<Q*> in_queues,
cv::GRunArgs in_constants,
std::vector< std::vector<Q*> > out_queues)
std::vector< std::vector<Q*> > out_queues,
const std::string& island_meta_info)
{
GAPI_Assert(in_queues.size() == in_rcs.size());
GAPI_Assert(out_queues.size() == out_rcs.size());
GAPI_Assert(out_queues.size() == out_metas.size());
QueueReader qr;
StreamingInput input(qr, in_queues, in_constants, in_rcs);
StreamingOutput output(out_metas, out_queues, out_rcs, island);
StreamingOutput output(out_metas, out_queues, out_rcs, island_exec);
GAPI_ITT_DYNAMIC_LOCAL_HANDLE(island_hndl, island_meta_info.c_str());
while (!output.done())
{
island->run(input, output);
GAPI_ITT_AUTO_TRACE_GUARD(island_hndl);
island_exec->run(input, output);
}
}
@ -904,11 +945,21 @@ void collectorThread(std::vector<Q*> in_queues,
flags[idx] = true;
}
GAPI_ITT_STATIC_LOCAL_HANDLE(collector_hndl, "collector");
GAPI_ITT_STATIC_LOCAL_HANDLE(collector_get_results_hndl, "collector_get_results");
GAPI_ITT_STATIC_LOCAL_HANDLE(collector_push_hndl, "collector_push");
QueueReader qr;
while (true)
{
GAPI_ITT_AUTO_TRACE_GUARD(collector_hndl);
cv::GRunArgs this_result(out_size);
const bool ok = qr.getResultsVector(in_queues, in_mapping, out_size, this_result);
const bool ok = [&](){
GAPI_ITT_AUTO_TRACE_GUARD(collector_get_results_hndl);
return qr.getResultsVector(in_queues, in_mapping, out_size, this_result);
}();
if (!ok)
{
if (handle_stop)
@ -918,7 +969,11 @@ void collectorThread(std::vector<Q*> in_queues,
// Terminate the thread anyway
return;
}
out_queue.push(Cmd{Result{std::move(this_result), flags}});
{
GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl);
out_queue.push(Cmd{Result{std::move(this_result), flags}});
}
}
}
@ -1379,11 +1434,8 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
m_sync->registerVideoEmitters(std::move(video_emitters));
// FIXME: The below code assumes our graph may have only one
// real video source (and so, only one stream which may really end)
// all other inputs are "constant" generators.
// Craft here a completion callback to notify Const emitters that
// a video source is over
// any of video sources is over
GAPI_Assert(m_const_emitter_queues.size() == m_const_vals.size());
auto real_video_completion_cb = [this]()
{
@ -1431,7 +1483,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
for (auto &&op : m_ops)
{
// Prepare island thread parameters
auto island = m_gim.metadata(op.nh).get<IslandExec>().object;
auto island_exec = m_gim.metadata(op.nh).get<IslandExec>().object;
// Collect actor's input queues
auto in_queues = input_queues(*m_island_graph, op.nh);
@ -1443,6 +1495,13 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
out_queues.push_back(reader_queues(*m_island_graph, out_eh));
}
// Create just empty island meta information
std::string island_meta_info { };
#if defined(OPENCV_WITH_ITT)
// In case if ITT tracing is enabled fill meta information with the built island name
island_meta_info = GIslandModel::traceIslandName(op.nh, m_gim);
#endif // OPENCV_WITH_ITT
// If Island Executable is recompiled, all its stuff including internal kernel states
// are recreated and re-initialized automatically.
// But if not, we should notify Island Executable about new started stream to let it update
@ -1456,10 +1515,11 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
op.in_objects,
op.out_objects,
op.out_metas,
island,
island_exec,
in_queues,
op.in_constants,
out_queues);
out_queues,
island_meta_info);
}
// Finally, start collector thread(s).
@ -1536,6 +1596,9 @@ void cv::gimpl::GStreamingExecutor::wait_shutdown()
bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs)
{
GAPI_ITT_STATIC_LOCAL_HANDLE(pull_hndl, "GStreamingExecutor::pull");
GAPI_ITT_AUTO_TRACE_GUARD(pull_hndl);
// This pull() can only be called when there's no desynchronized
// parts in the graph.
GAPI_Assert(!m_desync &&

View File

@ -2,14 +2,14 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2020 Intel Corporation
// Copyright (C) 2020-2021 Intel Corporation
#include "gtbbexecutor.hpp"
#if defined(HAVE_TBB) && (TBB_INTERFACE_VERSION < 12000)
// TODO: TBB task API has been deprecated and removed in 12000
#include "gapi_itt.hpp"
#include "utils/itt.hpp"
#include <opencv2/gapi/own/assert.hpp>
#include <opencv2/gapi/util/copy_through_move.hpp>
@ -30,10 +30,6 @@
#define LOG_DEBUG(tag, ...) GAPI_LOG_DEBUG(tag, __VA_ARGS__)
#ifdef OPENCV_WITH_ITT
const __itt_domain* cv::gimpl::parallel::gapi_itt_domain = __itt_domain_create("GAPI Context");
#endif
namespace cv { namespace gimpl { namespace parallel {
namespace detail {
@ -82,18 +78,9 @@ void spawn_no_assert(tbb::task* root, body_t const& body) {
tbb::task::spawn(* allocate_task(root, body));
}
#ifdef OPENCV_WITH_ITT
namespace {
static __itt_string_handle* ittTbbAddReadyBlocksToQueue = __itt_string_handle_create("add ready blocks to queue");
static __itt_string_handle* ittTbbSpawnReadyBlocks = __itt_string_handle_create("spawn ready blocks");
static __itt_string_handle* ittTbbEnqueueSpawnReadyBlocks = __itt_string_handle_create("enqueueing a spawn of ready blocks");
static __itt_string_handle* ittTbbUnlockMasterThread = __itt_string_handle_create("Unlocking master thread");
}
#endif // OPENCV_WITH_ITT
template<typename body_t>
void batch_spawn(size_t count, tbb::task* root, body_t const& body, bool do_assert_graph_is_running = true) {
GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbSpawnReadyBlocks, "spawn ready blocks");
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbSpawnReadyBlocks);
if (do_assert_graph_is_running) {
assert_graph_is_running(root);
@ -143,6 +130,7 @@ void inline wake_master(async_tasks_t& async_tasks, wake_tbb_master wake_master)
if ((active_async_tasks == 0) || (wake_master == wake_tbb_master::YES)) {
// Was the last async task or asked to wake TBB master up(e.g. there are new TBB tasks to execute)
GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbUnlockMasterThread, "Unlocking master thread");
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbUnlockMasterThread);
// While decrement of async_tasks_t::count is atomic, it might occur after the waiting
// thread has read its value but _before_ it actually starts waiting on the condition variable.
@ -228,6 +216,7 @@ inline tile_node* pop(prio_items_queue_t& q) {
namespace graph {
// Returns : number of items actually pushed into the q
std::size_t inline push_ready_dependants(prio_items_queue_t& q, tile_node* node) {
GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbAddReadyBlocksToQueue, "add ready blocks to queue");
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbAddReadyBlocksToQueue);
std::size_t ready_items = 0;
// enable dependent tasks
@ -330,6 +319,7 @@ namespace graph {
if (ready_items > 0) {
auto master_was_active = is_tbb_work_present::NO;
{
GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbEnqueueSpawnReadyBlocks, "enqueueing a spawn of ready blocks");
GAPI_ITT_AUTO_TRACE_GUARD(ittTbbEnqueueSpawnReadyBlocks);
// Force master thread (one that does wait_for_all()) to (actively) wait for enqueued tasks
// and unlock it right after all dependent tasks are spawned.

View File

@ -0,0 +1,17 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2021 Intel Corporation
// TODO: Consider using OpenCV's trace.hpp
#if defined(OPENCV_WITH_ITT)
#include <ittnotify.h>
#include <opencv2/gapi/own/exports.hpp>
namespace cv {
namespace gimpl {
GAPI_EXPORTS __itt_domain* gapi_itt_domain = __itt_domain_create("GAPI Context");
} // namespace gimpl
} // namespace cv
#endif // OPENCV_WITH_ITT

View File

@ -0,0 +1,78 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2021 Intel Corporation
#ifndef OPENCV_GAPI_ITT_HPP
#define OPENCV_GAPI_ITT_HPP
// for GAPI_ITT_NAMED_TRACE_GUARD
#include <type_traits>
#include <memory>
#include <opencv2/gapi/util/compiler_hints.hpp>
// NOTE: OPENCV_WITH_ITT is only defined if ITT dependecy is built by OpenCV infrastructure.
// There will not be such define in G-API standalone mode.
// TODO: Consider using OpenCV's trace.hpp
#if defined(OPENCV_WITH_ITT)
#include <ittnotify.h>
namespace cv {
namespace util {
template< class T >
using remove_reference_t = typename std::remove_reference<T>::type;
// Home brew ScopeGuard
// D will be called automatically with p as argument when ScopeGuard goes out of scope.
// call release() on the ScopeGuard object to revoke guard action
template<typename T, typename D>
auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr<T, util::remove_reference_t<D>> {
return {p, std::forward<D>(d)};
}
} // namespace util
namespace gimpl {
extern __itt_domain* gapi_itt_domain;
namespace {
auto make_itt_guard = [](__itt_string_handle* h) {
__itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h));
return util::make_ptr_guard(reinterpret_cast<int*>(1),
[](int* ){ __itt_task_end(gapi_itt_domain); });
};
} // namespace
} // namespace gimpl
} // namespace cv
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::make_itt_guard(h); \
cv::util::suppress_unused_warning(name)
#define GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h) static __itt_string_handle* n = \
__itt_string_handle_create(h)
#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h) __itt_string_handle* n = \
__itt_string_handle_create(h)
#else // OPENCV_WITH_ITT
namespace cv {
namespace gimpl {
struct dumb_guard { void reset() { } };
} // namespace gimpl
} // namespace cv
#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::dumb_guard name; \
cv::util::suppress_unused_warning(name); \
cv::util::suppress_unused_warning(h)
#define GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h) static auto n = h
#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h) auto n = h
#endif // OPENCV_WITH_ITT
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD( \
itt_trace_guard_##LINE, h)
#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h)
#define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h)
#define GAPI_ITT_STATIC_LOCAL_HANDLE(n, h) GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h)
#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE(n, h) GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h)
#endif // OPENCV_GAPI_ITT_HPP