Merge pull request #22588 from TolyaTalamanov:at/sync-ie-request-pool

G-API: Add synchronous execution for IE backend
This commit is contained in:
Alexander Smorkalov 2022-10-04 11:32:21 +03:00 committed by GitHub
commit bf5d7c0c10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 251 additions and 156 deletions

View File

@ -52,6 +52,8 @@ enum class TraitAs: int
using IEConfig = std::map<std::string, std::string>;
enum InferMode {Sync, Async};
namespace detail {
struct ParamDesc {
std::string model_path;
@ -89,6 +91,8 @@ struct ParamDesc {
cv::optional<cv::gapi::wip::onevpl::Device> vpl_preproc_device;
cv::optional<cv::gapi::wip::onevpl::Context> vpl_preproc_ctx;
InferMode mode;
using PrecisionT = int;
using PrecisionMapT = std::unordered_map<std::string, PrecisionT>;
// NB: This parameter can contain:
@ -100,7 +104,6 @@ struct ParamDesc {
PrecisionT,
PrecisionMapT>;
PrecisionVariantT output_precision;
};
} // namespace detail
@ -146,6 +149,7 @@ public:
, {}
, {}
, {}
, InferMode::Async
, {} } {
};
@ -171,6 +175,7 @@ public:
, {}
, {}
, {}
, InferMode::Async
, {} } {
};
@ -366,6 +371,22 @@ public:
return *this;
}
/** @brief Specifies which api will be used to run inference.
The function is used to specify mode for OpenVINO inference.
OpenVINO has two options to run inference:
1. Asynchronous (using StartAsync: https://docs.openvino.ai/latest/classInferenceEngine_1_1InferRequest.html#doxid-class-inference-engine-1-1-infer-request-1a405293e8423d82a5b45f642a3bef0d24)
2. Synchronous (using Infer: https://docs.openvino.ai/latest/classInferenceEngine_1_1InferRequest.html#doxid-class-inference-engine-1-1-infer-request-1a3391ce30894abde730523e9ca9371ce8)
By default asynchronous mode is used.
@param mode Inference mode which will be used.
@return reference to this parameter structure.
*/
Params<Net>& cfgInferMode(InferMode mode) {
desc.mode = mode;
return *this;
}
/** @brief Specifies the output precision for model.
The function is used to set an output precision for model.
@ -425,7 +446,7 @@ public:
const std::string &device)
: desc{ model, weights, device, {}, {}, {}, 0u, 0u,
detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u,
{}, {}, {}, {}, {}},
{}, {}, {}, {}, InferMode::Async, {} },
m_tag(tag) {
};
@ -443,7 +464,7 @@ public:
const std::string &device)
: desc{ model, {}, device, {}, {}, {}, 0u, 0u,
detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u,
{}, {}, {}, {}, {}},
{}, {}, {}, {}, InferMode::Async, {} },
m_tag(tag) {
};
@ -516,6 +537,12 @@ public:
return *this;
}
/** @see ie::Params::cfgInferAPI */
Params& cfgInferMode(InferMode mode) {
desc.mode = mode;
return *this;
}
/** @see ie::Params::cfgOutputPrecision */
Params& cfgOutputPrecision(detail::ParamDesc::PrecisionT precision) {
desc.output_precision = precision;

View File

@ -175,6 +175,17 @@ static PLMode strToPLMode(const std::string& mode_str) {
}
}
static cv::gapi::ie::InferMode strToInferMode(const std::string& infer_mode) {
if (infer_mode == "async") {
return cv::gapi::ie::InferMode::Async;
} else if (infer_mode == "sync") {
return cv::gapi::ie::InferMode::Sync;
} else {
throw std::logic_error("Unsupported Infer mode: " + infer_mode +
"\nPlease chose between: async and sync");
}
}
template <>
CallParams read<CallParams>(const cv::FileNode& fn) {
auto name =
@ -288,7 +299,8 @@ int main(int argc, char* argv[]) {
"{ drop_frames | false | Drop frames if they come earlier than pipeline is completed. }"
"{ exec_list | | A comma-separated list of pipelines that"
" will be executed. Spaces around commas"
" are prohibited. }";
" are prohibited. }"
"{ infer_mode | async | OpenVINO inference mode (async/sync). }";
cv::CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help")) {
@ -304,6 +316,7 @@ int main(int argc, char* argv[]) {
const auto qc = cmd.get<int>("qc");
const auto app_mode = strToAppMode(cmd.get<std::string>("app_mode"));
const auto exec_str = cmd.get<std::string>("exec_list");
const auto infer_mode = strToInferMode(cmd.get<std::string>("infer_mode"));
const auto drop_frames = cmd.get<bool>("drop_frames");
cv::FileStorage fs;
@ -394,6 +407,7 @@ int main(int argc, char* argv[]) {
<< call_params.name << std::endl << e.what();
throw std::logic_error(ss.str());
}
infer_params.mode = infer_mode;
builder.addInfer(call_params, infer_params);
} else {
throw std::logic_error("Unsupported node type: " + node_type);

View File

@ -258,6 +258,7 @@ struct InferParams {
std::vector<std::string> input_layers;
std::vector<std::string> output_layers;
std::map<std::string, std::string> config;
cv::gapi::ie::InferMode mode;
cv::util::optional<int> out_precision;
};
@ -363,6 +364,7 @@ void PipelineBuilder::addInfer(const CallParams& call_params,
}
pp->pluginConfig(infer_params.config);
pp->cfgInferMode(infer_params.mode);
if (infer_params.out_precision) {
pp->cfgOutputPrecision(infer_params.out_precision.value());
}

View File

@ -392,6 +392,12 @@ struct IEUnit {
params.vpl_preproc_ctx.value());
GAPI_LOG_INFO(nullptr, "VPP preproc created successfuly");
}
if (params.mode == cv::gapi::ie::InferMode::Sync &&
params.nireq != 1u) {
throw std::logic_error(
"Failed: cv::gapi::ie::InferMode::Sync works only with nireq equal to 1.");
}
}
// This method is [supposed to be] called at Island compilation stage
@ -843,38 +849,128 @@ std::vector<InferenceEngine::InferRequest> cv::gimpl::ie::IECompiled::createInfe
return requests;
}
class cv::gimpl::ie::RequestPool {
class IInferExecutor {
public:
using RunF = std::function<void(InferenceEngine::InferRequest&)>;
using CallbackF = std::function<void(InferenceEngine::InferRequest&, InferenceEngine::StatusCode)>;
using Ptr = std::shared_ptr<IInferExecutor>;
using NotifyCallbackF = std::function<void()>;
using SetInputDataF = std::function<void(InferenceEngine::InferRequest&)>;
using ReadOutputDataF = std::function<void(InferenceEngine::InferRequest&, InferenceEngine::StatusCode)>;
// NB: The task is represented by:
// RunF - function which is set blobs and run async inference.
// CallbackF - function which is obtain output blobs and post it to output.
// SetInputDataF - function which set input data.
// ReadOutputDataF - function which read output data.
struct Task {
RunF run;
CallbackF callback;
SetInputDataF set_input_data;
ReadOutputDataF read_output_data;
};
explicit RequestPool(std::vector<InferenceEngine::InferRequest>&& requests);
IInferExecutor(IE::InferRequest request, NotifyCallbackF notify)
: m_request(std::move(request)),
m_notify(std::move(notify)) {
};
void execute(Task&& t);
void waitAll();
virtual void execute(const Task& task) = 0;
virtual ~IInferExecutor() = default;
protected:
IE::InferRequest m_request;
NotifyCallbackF m_notify;
};
class SyncInferExecutor : public IInferExecutor {
using IInferExecutor::IInferExecutor;
virtual void execute(const IInferExecutor::Task& task) override;
};
void SyncInferExecutor::execute(const IInferExecutor::Task& task) {
try {
task.set_input_data(m_request);
m_request.Infer();
task.read_output_data(m_request, IE::StatusCode::OK);
} catch (...) {
m_notify();
throw;
}
// NB: Notify pool that executor has finished.
m_notify();
}
class AsyncInferExecutor : public IInferExecutor {
public:
using IInferExecutor::IInferExecutor;
virtual void execute(const IInferExecutor::Task& task) override;
private:
void callback(Task task,
size_t id,
IE::InferRequest request,
IE::StatusCode code) noexcept;
void setup();
QueueClass<size_t> m_idle_ids;
std::vector<InferenceEngine::InferRequest> m_requests;
};
void AsyncInferExecutor::execute(const IInferExecutor::Task& task) {
using namespace std::placeholders;
using callback_t = std::function<void(IE::InferRequest, IE::StatusCode)>;
m_request.SetCompletionCallback(
static_cast<callback_t>(
std::bind(&AsyncInferExecutor::callback, this, task, _1, _2)));
try {
task.set_input_data(m_request);
m_request.StartAsync();
} catch (...) {
m_request.SetCompletionCallback([](){});
m_notify();
throw;
}
}
void AsyncInferExecutor::callback(IInferExecutor::Task task,
IE::InferRequest request,
IE::StatusCode code) noexcept {
task.read_output_data(request, code);
request.SetCompletionCallback([](){});
// NB: Notify pool that executor has finished.
m_notify();
}
class cv::gimpl::ie::RequestPool {
public:
explicit RequestPool(cv::gapi::ie::InferMode mode,
std::vector<InferenceEngine::InferRequest>&& requests);
IInferExecutor::Ptr getIdleRequest();
void waitAll();
private:
void setup();
void release(const size_t id);
QueueClass<size_t> m_idle_ids;
std::vector<IInferExecutor::Ptr> m_requests;
};
void cv::gimpl::ie::RequestPool::release(const size_t id) {
m_idle_ids.push(id);
}
// RequestPool implementation //////////////////////////////////////////////
cv::gimpl::ie::RequestPool::RequestPool(std::vector<InferenceEngine::InferRequest>&& requests)
: m_requests(std::move(requests)) {
cv::gimpl::ie::RequestPool::RequestPool(cv::gapi::ie::InferMode mode,
std::vector<InferenceEngine::InferRequest>&& requests) {
for (size_t i = 0; i < requests.size(); ++i) {
IInferExecutor::Ptr iexec = nullptr;
switch (mode) {
case cv::gapi::ie::InferMode::Async:
iexec = std::make_shared<AsyncInferExecutor>(std::move(requests[i]),
std::bind(&RequestPool::release, this, i));
break;
case cv::gapi::ie::InferMode::Sync:
iexec = std::make_shared<SyncInferExecutor>(std::move(requests[i]),
std::bind(&RequestPool::release, this, i));
break;
default:
GAPI_Assert(false && "Unsupported cv::gapi::ie::InferMode");
}
m_requests.emplace_back(std::move(iexec));
}
setup();
}
@ -884,40 +980,10 @@ void cv::gimpl::ie::RequestPool::setup() {
}
}
void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) {
IInferExecutor::Ptr cv::gimpl::ie::RequestPool::getIdleRequest() {
size_t id = 0u;
m_idle_ids.pop(id);
auto& request = m_requests[id];
using namespace std::placeholders;
using callback_t = std::function<void(IE::InferRequest, IE::StatusCode)>;
request.SetCompletionCallback(
static_cast<callback_t>(
std::bind(&cv::gimpl::ie::RequestPool::callback, this,
t, id, _1, _2)));
// NB: InferRequest is already marked as busy
// in case of exception need to return it back to the idle.
try {
t.run(request);
} catch (...) {
request.SetCompletionCallback([](){});
m_idle_ids.push(id);
throw;
}
}
void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task,
size_t id,
IE::InferRequest request,
IE::StatusCode code) noexcept {
// NB: Inference is over.
// 1. Run callback
// 2. Destroy callback to free resources.
// 3. Mark InferRequest as idle.
task.callback(request, code);
request.SetCompletionCallback([](){});
m_idle_ids.push(id);
return m_requests[id];
}
// NB: Not thread-safe.
@ -944,7 +1010,7 @@ cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g,
if (this_nh == nullptr) {
this_nh = nh;
this_iec = iem.metadata(this_nh).get<IEUnit>().compile();
m_reqPool.reset(new RequestPool(this_iec.createInferRequests()));
m_reqPool.reset(new RequestPool(this_iec.params.mode, this_iec.createInferRequests()));
}
else
util::throw_error(std::logic_error("Multi-node inference is not supported!"));
@ -1356,8 +1422,8 @@ struct Infer: public cv::detail::KernelTag {
static void run(std::shared_ptr<IECallContext> ctx,
cv::gimpl::ie::RequestPool &reqPool) {
using namespace std::placeholders;
reqPool.execute(
cv::gimpl::ie::RequestPool::Task {
reqPool.getIdleRequest()->execute(
IInferExecutor::Task {
[ctx](InferenceEngine::InferRequest &req) {
// non-generic version for now:
// - assumes all inputs/outputs are always Mats
@ -1375,9 +1441,6 @@ struct Infer: public cv::detail::KernelTag {
cv::util::optional<cv::Rect>{});
setBlob(req, layer_name, this_blob, *ctx);
}
// FIXME: Should it be done by kernel ?
// What about to do that in RequestPool ?
req.StartAsync();
},
std::bind(PostOutputs, _1, _2, ctx)
}
@ -1470,8 +1533,8 @@ struct InferROI: public cv::detail::KernelTag {
static void run(std::shared_ptr<IECallContext> ctx,
cv::gimpl::ie::RequestPool &reqPool) {
using namespace std::placeholders;
reqPool.execute(
cv::gimpl::ie::RequestPool::Task {
reqPool.getIdleRequest()->execute(
IInferExecutor::Task {
[ctx](InferenceEngine::InferRequest &req) {
GAPI_Assert(ctx->uu.params.num_in == 1);
auto&& this_roi = ctx->inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
@ -1496,9 +1559,6 @@ struct InferROI: public cv::detail::KernelTag {
*(ctx->uu.params.input_names.begin()),
this_blob, *ctx);
}
// FIXME: Should it be done by kernel ?
// What about to do that in RequestPool ?
req.StartAsync();
},
std::bind(PostOutputs, _1, _2, ctx)
}
@ -1613,11 +1673,10 @@ struct InferList: public cv::detail::KernelTag {
for (auto&& it : ade::util::indexed(in_roi_vec)) {
auto pos = ade::util::index(it);
const auto& rc = ade::util::value(it);
reqPool.execute(
cv::gimpl::ie::RequestPool::Task {
reqPool.getIdleRequest()->execute(
IInferExecutor::Task {
[ctx, rc, this_blob](InferenceEngine::InferRequest &req) {
setROIBlob(req, ctx->uu.params.input_names[0u], this_blob, rc, *ctx);
req.StartAsync();
},
std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos)
}
@ -1770,8 +1829,8 @@ struct InferList2: public cv::detail::KernelTag {
PostOutputsList callback(list_size, ctx, std::move(cached_dims));
for (const auto &list_idx : ade::util::iota(list_size)) {
reqPool.execute(
cv::gimpl::ie::RequestPool::Task {
reqPool.getIdleRequest()->execute(
IInferExecutor::Task {
[ctx, list_idx, list_size, blob_0](InferenceEngine::InferRequest &req) {
for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) {
const auto &this_vec = ctx->inArg<cv::detail::VectorRef>(in_idx+1u);
@ -1791,7 +1850,6 @@ struct InferList2: public cv::detail::KernelTag {
"Only Rect and Mat types are supported for infer list 2!");
}
}
req.StartAsync();
},
std::bind(callback, std::placeholders::_1, std::placeholders::_2, list_idx)
} // task

View File

@ -2956,109 +2956,103 @@ TEST(TestAgeGender, ThrowBlobAndInputPrecisionMismatchStreaming)
}
}
TEST(TestAgeGenderIE, ChangeOutputPrecision)
{
struct AgeGenderInferTest: public ::testing::Test {
cv::Mat m_in_mat;
cv::Mat m_gapi_age;
cv::Mat m_gapi_gender;
cv::gimpl::ie::wrap::Plugin m_plugin;
IE::CNNNetwork m_net;
cv::gapi::ie::detail::ParamDesc m_params;
using AGInfo = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
void SetUp() {
initDLDTDataPath();
m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
m_params.device_id = "CPU";
cv::gapi::ie::detail::ParamDesc params;
params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
params.device_id = "CPU";
m_plugin = cv::gimpl::ie::wrap::getPlugin(m_params);
m_net = cv::gimpl::ie::wrap::readNetwork(m_params);
setNetParameters(m_net);
cv::Mat in_mat(cv::Size(320, 240), CV_8UC3);
cv::randu(in_mat, 0, 255);
m_in_mat = cv::Mat(cv::Size(320, 240), CV_8UC3);
cv::randu(m_in_mat, 0, 255);
}
cv::Mat gapi_age, gapi_gender;
cv::GComputation buildGraph() {
cv::GMat in, age, gender;
std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
return cv::GComputation(cv::GIn(in), cv::GOut(age, gender));
}
// Load & run IE network
void validate() {
IE::Blob::Ptr ie_age, ie_gender;
{
auto plugin = cv::gimpl::ie::wrap::getPlugin(params);
auto net = cv::gimpl::ie::wrap::readNetwork(params);
setNetParameters(net);
for (auto it : net.getOutputsInfo()) {
it.second->setPrecision(IE::Precision::U8);
}
auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params);
auto this_network = cv::gimpl::ie::wrap::loadNetwork(m_plugin, m_net, m_params);
auto infer_request = this_network.CreateInferRequest();
infer_request.SetBlob("data", cv::gapi::ie::util::to_ie(in_mat));
infer_request.SetBlob("data", cv::gapi::ie::util::to_ie(m_in_mat));
infer_request.Infer();
ie_age = infer_request.GetBlob("age_conv3");
ie_gender = infer_request.GetBlob("prob");
}
// Validate with IE itself (avoid DNN module dependency here)
normAssert(cv::gapi::ie::util::to_ocv(ie_age), m_gapi_age, "Test age output" );
normAssert(cv::gapi::ie::util::to_ocv(ie_gender), m_gapi_gender, "Test gender output");
}
};
// Configure & run G-API
using AGInfo = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
cv::GMat in;
cv::GMat age, gender;
std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
TEST_F(AgeGenderInferTest, SyncExecution) {
auto pp = cv::gapi::ie::Params<AgeGender> {
params.model_path, params.weights_path, params.device_id
m_params.model_path, m_params.weights_path, m_params.device_id
}.cfgOutputLayers({ "age_conv3", "prob" })
.cfgInferMode(cv::gapi::ie::InferMode::Sync);
buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
validate();
}
TEST_F(AgeGenderInferTest, ThrowSyncWithNireqNotEqualToOne) {
auto pp = cv::gapi::ie::Params<AgeGender> {
m_params.model_path, m_params.weights_path, m_params.device_id
}.cfgOutputLayers({ "age_conv3", "prob" })
.cfgInferMode(cv::gapi::ie::InferMode::Sync)
.cfgNumRequests(4u);
EXPECT_ANY_THROW(buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender),
cv::compile_args(cv::gapi::networks(pp))));
}
TEST_F(AgeGenderInferTest, ChangeOutputPrecision) {
auto pp = cv::gapi::ie::Params<AgeGender> {
m_params.model_path, m_params.weights_path, m_params.device_id
}.cfgOutputLayers({ "age_conv3", "prob" })
.cfgOutputPrecision(CV_8U);
comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender),
for (auto it : m_net.getOutputsInfo()) {
it.second->setPrecision(IE::Precision::U8);
}
buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
// Validate with IE itself (avoid DNN module dependency here)
normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" );
normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output");
validate();
}
TEST(TestAgeGenderIE, ChangeSpecificOutputPrecison)
{
initDLDTDataPath();
cv::gapi::ie::detail::ParamDesc params;
params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
params.device_id = "CPU";
cv::Mat in_mat(cv::Size(320, 240), CV_8UC3);
cv::randu(in_mat, 0, 255);
cv::Mat gapi_age, gapi_gender;
// Load & run IE network
IE::Blob::Ptr ie_age, ie_gender;
{
auto plugin = cv::gimpl::ie::wrap::getPlugin(params);
auto net = cv::gimpl::ie::wrap::readNetwork(params);
setNetParameters(net);
// NB: Specify precision only for "prob" output.
net.getOutputsInfo().at("prob")->setPrecision(IE::Precision::U8);
auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params);
auto infer_request = this_network.CreateInferRequest();
infer_request.SetBlob("data", cv::gapi::ie::util::to_ie(in_mat));
infer_request.Infer();
ie_age = infer_request.GetBlob("age_conv3");
ie_gender = infer_request.GetBlob("prob");
}
// Configure & run G-API
using AGInfo = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
cv::GMat in;
cv::GMat age, gender;
std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
TEST_F(AgeGenderInferTest, ChangeSpecificOutputPrecison) {
auto pp = cv::gapi::ie::Params<AgeGender> {
params.model_path, params.weights_path, params.device_id
m_params.model_path, m_params.weights_path, m_params.device_id
}.cfgOutputLayers({ "age_conv3", "prob" })
.cfgOutputPrecision({{"prob", CV_8U}});
comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
// Validate with IE itself (avoid DNN module dependency here)
normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" );
normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output");
m_net.getOutputsInfo().at("prob")->setPrecision(IE::Precision::U8);
buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
validate();
}
} // namespace opencv_test