mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #19709 from TolyaTalamanov:at/async-inferlist-infer2
G-API: Implement async version for InferList & Infer2 * Implement async version for InferList & Infer2 * Fix warning * Fix bug with roi ordering * Post input meta instead of empty * Fix comments to review
This commit is contained in:
parent
af63fffb63
commit
dc31e20925
@ -18,6 +18,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include <unordered_set>
|
||||
#include <atomic>
|
||||
|
||||
#include <ade/util/algorithm.hpp>
|
||||
|
||||
@ -289,7 +290,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<T>& outVecR(std::size_t output) {
|
||||
std::vector<T>& outVecR(std::size_t output) {
|
||||
return outVecRef(output).wref<T>();
|
||||
}
|
||||
|
||||
@ -298,8 +299,9 @@ public:
|
||||
const cv::Mat& inMat (std::size_t input) const;
|
||||
const cv::MediaFrame& inFrame(std::size_t input) const;
|
||||
|
||||
cv::Mat& outMatR(std::size_t idx);
|
||||
cv::GRunArgP output (std::size_t idx);
|
||||
const cv::GRunArg& input (std::size_t idx) const;
|
||||
cv::GRunArgP output (std::size_t idx);
|
||||
cv::Mat& outMatR(std::size_t idx);
|
||||
|
||||
const IEUnit &uu;
|
||||
cv::gimpl::GIslandExecutable::IOutput &out;
|
||||
@ -385,6 +387,10 @@ cv::GRunArgP IECallContext::output(std::size_t idx) {
|
||||
return m_output_objs[idx].second;
|
||||
};
|
||||
|
||||
const cv::GRunArg& IECallContext::input(std::size_t idx) const {
|
||||
return m_input_objs[idx].second;
|
||||
}
|
||||
|
||||
cv::detail::VectorRef& IECallContext::outVecRef(std::size_t idx) {
|
||||
return cv::util::get<cv::detail::VectorRef>(m_results.at(idx));
|
||||
}
|
||||
@ -424,7 +430,6 @@ cv::GArg IECallContext::packArg(const cv::GArg &arg) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct IECallable {
|
||||
static const char *name() { return "IERequestCallable"; }
|
||||
using Run = std::function<void(std::shared_ptr<IECallContext>, cv::gimpl::ie::RequestPool&)>;
|
||||
@ -513,7 +518,7 @@ public:
|
||||
|
||||
explicit RequestPool(std::vector<InferenceEngine::InferRequest>&& requests);
|
||||
|
||||
void execute(Task&& t, bool async = true);
|
||||
void execute(Task&& t);
|
||||
void waitAndShutdown();
|
||||
|
||||
private:
|
||||
@ -531,23 +536,12 @@ cv::gimpl::ie::RequestPool::RequestPool(std::vector<InferenceEngine::InferReques
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t, bool async) {
|
||||
void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) {
|
||||
size_t id = 0u;
|
||||
m_idle_ids.pop(id);
|
||||
|
||||
auto& request = m_requests[id];
|
||||
|
||||
// FIXME: This WA should be removed after supporting async mode for InferList and Infer2.
|
||||
// InferList and Infer2 work synchronously without calling callback,
|
||||
// therefore don't release InferRequest idle id.
|
||||
if (!async) {
|
||||
// NB: Synchronous execution.
|
||||
t.run(request);
|
||||
// NB: Explicitly call callback to release id.
|
||||
callback(t, request, id);
|
||||
return;
|
||||
}
|
||||
|
||||
request.SetCompletionCallback(
|
||||
std::bind(&cv::gimpl::ie::RequestPool::callback, this, t, std::ref(request), id));
|
||||
t.run(request);
|
||||
@ -638,7 +632,7 @@ void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in
|
||||
// (1) Collect island inputs/outputs
|
||||
input_objs.reserve(in_desc.size());
|
||||
for (auto &&it: ade::util::zip(ade::util::toRange(in_desc),
|
||||
ade::util::toRange(in_vector)))
|
||||
ade::util::toRange(in_vector)))
|
||||
{
|
||||
input_objs.emplace_back(std::get<0>(it), std::get<1>(it));
|
||||
}
|
||||
@ -752,12 +746,65 @@ static void PostOutputs(InferenceEngine::InferRequest &request,
|
||||
IE::Blob::Ptr this_blob = request.GetBlob(ctx->uu.params.output_names[i]);
|
||||
copyFromIE(this_blob, out_mat);
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, cv::GRunArg::Meta{});
|
||||
ctx->out.meta(output, ctx->input(0).meta);
|
||||
ctx->out.post(std::move(output));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
class PostOutputsList {
|
||||
public:
|
||||
PostOutputsList(size_t size,
|
||||
std::shared_ptr<IECallContext> ctx,
|
||||
std::vector<std::vector<int>>&& cached_dims);
|
||||
|
||||
void operator()(InferenceEngine::InferRequest &request, size_t pos) const;
|
||||
|
||||
private:
|
||||
struct Priv {
|
||||
size_t size;
|
||||
std::atomic<size_t> finished{0u};
|
||||
std::shared_ptr<IECallContext> ctx;
|
||||
std::vector<std::vector<int>> cached_dims;
|
||||
};
|
||||
std::shared_ptr<Priv> m_priv;
|
||||
};
|
||||
|
||||
PostOutputsList::PostOutputsList(size_t size,
|
||||
std::shared_ptr<IECallContext> ctx,
|
||||
std::vector<std::vector<int>>&& cached_dims)
|
||||
: m_priv(new Priv()) {
|
||||
m_priv->size = size;
|
||||
m_priv->ctx = ctx;
|
||||
m_priv->cached_dims = std::move(cached_dims);
|
||||
}
|
||||
|
||||
void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) const {
|
||||
auto&& ctx = m_priv->ctx;
|
||||
auto&& cached_dims = m_priv->cached_dims;
|
||||
auto&& finished = m_priv->finished;
|
||||
auto&& size = m_priv->size;
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
std::vector<cv::Mat> &out_vec = ctx->outVecR<cv::Mat>(i);
|
||||
|
||||
IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]);
|
||||
GAPI_Assert(out_blob);
|
||||
|
||||
// FIXME: Avoid data copy. Not sure if it is possible though
|
||||
out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
|
||||
copyFromIE(out_blob, out_vec[pos]);
|
||||
}
|
||||
++finished;
|
||||
|
||||
if (finished == size) {
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, ctx->input(0).meta);
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Infer: public cv::detail::KernelTag {
|
||||
using API = cv::GInferBase;
|
||||
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); }
|
||||
@ -977,65 +1024,44 @@ struct InferList: public cv::detail::KernelTag {
|
||||
static void run(std::shared_ptr<IECallContext> ctx,
|
||||
cv::gimpl::ie::RequestPool &reqPool) {
|
||||
|
||||
using namespace std::placeholders;
|
||||
reqPool.execute(
|
||||
const auto& in_roi_vec = ctx->inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
|
||||
// NB: In case there is no input data need to post output anyway
|
||||
if (in_roi_vec.empty()) {
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, ctx->input(0).meta);
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
IE::Blob::Ptr this_blob = extractBlob(*ctx, 1);
|
||||
std::vector<std::vector<int>> cached_dims(ctx->uu.params.num_out);
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
|
||||
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
|
||||
// FIXME: Isn't this should be done automatically
|
||||
// by some resetInternalData(), etc? (Probably at the GExecutor level)
|
||||
auto& out_vec = ctx->outVecR<cv::Mat>(i);
|
||||
out_vec.clear();
|
||||
out_vec.resize(in_roi_vec.size());
|
||||
}
|
||||
|
||||
PostOutputsList callback(in_roi_vec.size(), ctx, std::move(cached_dims));
|
||||
for (auto&& it : ade::util::indexed(in_roi_vec)) {
|
||||
auto pos = ade::util::index(it);
|
||||
const auto& rc = ade::util::value(it);
|
||||
reqPool.execute(
|
||||
cv::gimpl::ie::RequestPool::Task {
|
||||
[ctx](InferenceEngine::InferRequest &req) {
|
||||
// non-generic version for now:
|
||||
// - assumes zero input is always ROI list
|
||||
// - assumes all inputs/outputs are always Mats
|
||||
const auto& in_roi_vec = ctx->inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
|
||||
// NB: In case there is no input data need to post output anyway
|
||||
if (in_roi_vec.empty()) {
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, cv::GRunArg::Meta{});
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
IE::Blob::Ptr this_blob = extractBlob(*ctx, 1);
|
||||
|
||||
// FIXME: This could be done ONCE at graph compile stage!
|
||||
std::vector<std::vector<int>> cached_dims(ctx->uu.params.num_out);
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
|
||||
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
|
||||
// FIXME: Isn't this should be done automatically
|
||||
// by some resetInternalData(), etc? (Probably at the GExecutor level)
|
||||
ctx->outVecR<cv::Mat>(i).clear();
|
||||
}
|
||||
|
||||
for (auto&& rc : in_roi_vec) {
|
||||
IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc));
|
||||
req.SetBlob(ctx->uu.params.input_names[0u], roi_blob);
|
||||
|
||||
req.Infer();
|
||||
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
std::vector<cv::Mat> &out_vec = ctx->outVecR<cv::Mat>(i);
|
||||
|
||||
IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]);
|
||||
GAPI_Assert(out_blob);
|
||||
|
||||
cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
|
||||
// FIXME: Avoid data copy. Not sure if it is possible though
|
||||
copyFromIE(out_blob, out_mat);
|
||||
out_vec.push_back(std::move(out_mat));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, cv::GRunArg::Meta{});
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
[ctx, rc, this_blob](InferenceEngine::InferRequest &req) {
|
||||
IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc));
|
||||
req.SetBlob(ctx->uu.params.input_names[0u], roi_blob);
|
||||
req.StartAsync();
|
||||
},
|
||||
[](InferenceEngine::InferRequest &) { /* do nothing */ }
|
||||
},
|
||||
false /* not async */
|
||||
);
|
||||
std::bind(callback, std::placeholders::_1, pos)
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1136,86 +1162,59 @@ struct InferList2: public cv::detail::KernelTag {
|
||||
|
||||
static void run(std::shared_ptr<IECallContext> ctx,
|
||||
cv::gimpl::ie::RequestPool &reqPool) {
|
||||
GAPI_Assert(ctx->inArgs().size() > 1u
|
||||
&& "This operation must have at least two arguments");
|
||||
IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0);
|
||||
const auto list_size = ctx->inArg<cv::detail::VectorRef>(1u).size();
|
||||
if (list_size == 0u) {
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, ctx->input(0).meta);
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
return;
|
||||
}
|
||||
// FIXME: This could be done ONCE at graph compile stage!
|
||||
std::vector< std::vector<int> > cached_dims(ctx->uu.params.num_out);
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
|
||||
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
|
||||
// FIXME: Isn't this should be done automatically
|
||||
// by some resetInternalData(), etc? (Probably at the GExecutor level)
|
||||
auto& out_vec = ctx->outVecR<cv::Mat>(i);
|
||||
out_vec.clear();
|
||||
out_vec.resize(list_size);
|
||||
}
|
||||
|
||||
PostOutputsList callback(list_size, ctx, std::move(cached_dims));
|
||||
for (const auto &list_idx : ade::util::iota(list_size)) {
|
||||
reqPool.execute(
|
||||
cv::gimpl::ie::RequestPool::Task {
|
||||
[ctx](InferenceEngine::InferRequest &req) {
|
||||
GAPI_Assert(ctx->inArgs().size() > 1u
|
||||
&& "This operation must have at least two arguments");
|
||||
|
||||
IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0);
|
||||
|
||||
// Take the next argument, which must be vector (of any kind).
|
||||
// Use it only to obtain the ROI list size (sizes of all other
|
||||
// vectors must be equal to this one)
|
||||
const auto list_size = ctx->inArg<cv::detail::VectorRef>(1u).size();
|
||||
if (list_size == 0u) {
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, cv::GRunArg::Meta{});
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
return;
|
||||
cv::gimpl::ie::RequestPool::Task {
|
||||
[ctx, list_idx, list_size, blob_0](InferenceEngine::InferRequest &req) {
|
||||
for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) {
|
||||
const auto &this_vec = ctx->inArg<cv::detail::VectorRef>(in_idx+1u);
|
||||
GAPI_Assert(this_vec.size() == list_size);
|
||||
IE::Blob::Ptr this_blob;
|
||||
if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) {
|
||||
const auto &vec = this_vec.rref<cv::Rect>();
|
||||
this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx]));
|
||||
} else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) {
|
||||
const auto &vec = this_vec.rref<cv::Mat>();
|
||||
const auto &mat = vec[list_idx];
|
||||
this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR);
|
||||
} else {
|
||||
GAPI_Assert(false &&
|
||||
"Only Rect and Mat types are supported for infer list 2!");
|
||||
}
|
||||
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
ctx->outVecR<cv::Mat>(i).resize(list_size);
|
||||
}
|
||||
|
||||
// FIXME: This could be done ONCE at graph compile stage!
|
||||
std::vector< std::vector<int> > cached_dims(ctx->uu.params.num_out);
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
|
||||
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
|
||||
// FIXME: Isn't this should be done automatically
|
||||
// by some resetInternalData(), etc? (Probably at the GExecutor level)
|
||||
ctx->outVecR<cv::Mat>(i).clear();
|
||||
}
|
||||
|
||||
for (const auto &list_idx : ade::util::iota(list_size)) {
|
||||
for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) {
|
||||
const auto &this_vec = ctx->inArg<cv::detail::VectorRef>(in_idx+1u);
|
||||
GAPI_Assert(this_vec.size() == list_size);
|
||||
IE::Blob::Ptr this_blob;
|
||||
if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) {
|
||||
const auto &vec = this_vec.rref<cv::Rect>();
|
||||
this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx]));
|
||||
} else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) {
|
||||
const auto &vec = this_vec.rref<cv::Mat>();
|
||||
const auto &mat = vec[list_idx];
|
||||
this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR);
|
||||
} else {
|
||||
GAPI_Assert(false &&
|
||||
"Only Rect and Mat types are supported for infer list 2!");
|
||||
}
|
||||
|
||||
req.SetBlob(ctx->uu.params.input_names[in_idx], this_blob);
|
||||
}
|
||||
|
||||
req.Infer();
|
||||
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
std::vector<cv::Mat> &out_vec = ctx->outVecR<cv::Mat>(i);
|
||||
|
||||
IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]);
|
||||
GAPI_Assert(out_blob);
|
||||
|
||||
cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
|
||||
// FIXME: Avoid data copy. Not sure if it is possible though
|
||||
copyFromIE(out_blob, out_mat);
|
||||
out_vec.push_back(std::move(out_mat));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
|
||||
auto output = ctx->output(i);
|
||||
ctx->out.meta(output, cv::GRunArg::Meta{});
|
||||
ctx->out.post(std::move(output));
|
||||
}
|
||||
},
|
||||
[](InferenceEngine::InferRequest &) { /* do nothing */ }
|
||||
req.SetBlob(ctx->uu.params.input_names[in_idx], this_blob);
|
||||
}
|
||||
req.StartAsync();
|
||||
},
|
||||
false /* not async */
|
||||
std::bind(callback, std::placeholders::_1, list_idx)
|
||||
} // task
|
||||
);
|
||||
} // for
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1255,7 +1255,6 @@ TEST(InferList, TestStreamingInfer)
|
||||
}.cfgOutputLayers({ "age_conv3", "prob" })
|
||||
.cfgNumRequests(4u);
|
||||
|
||||
|
||||
std::size_t num_frames = 0u;
|
||||
std::size_t max_frames = 10u;
|
||||
|
||||
@ -1308,7 +1307,6 @@ TEST(InferList, TestStreamingInfer)
|
||||
++num_frames;
|
||||
cap >> in_mat;
|
||||
}
|
||||
pipeline.stop();
|
||||
}
|
||||
|
||||
TEST(Infer2, TestStreamingInfer)
|
||||
|
Loading…
Reference in New Issue
Block a user