// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // // Copyright (C) 2020 Intel Corporation #ifndef OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP #define OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP #include "gapi_tests_common.hpp" #include "../../include/opencv2/gapi/infer/parsers.hpp" namespace opencv_test { class ParserSSDTest { public: cv::Mat generateSSDoutput(const cv::Size& in_sz) { constexpr int maxN = 200; constexpr int objSize = 7; std::vector dims{ 1, 1, maxN, objSize }; cv::Mat mat(dims, CV_32FC1); auto data = mat.ptr(); for (int i = 0; i < maxN; ++i) { float* it = data + i * objSize; auto ssdIt = generateItem(i, in_sz); it[0] = ssdIt.image_id; it[1] = ssdIt.label; it[2] = ssdIt.confidence; it[3] = ssdIt.rc_left; it[4] = ssdIt.rc_top; it[5] = ssdIt.rc_right; it[6] = ssdIt.rc_bottom; } return mat; } void parseSSDref(const cv::Mat& in_ssd_result, const cv::Size& in_size, const float confidence_threshold, const bool alignment_to_square, const bool filter_out_of_bounds, std::vector& out_boxes) { out_boxes.clear(); const auto &in_ssd_dims = in_ssd_result.size; CV_Assert(in_ssd_dims.dims() == 4u); const int MAX_PROPOSALS = in_ssd_dims[2]; const int OBJECT_SIZE = in_ssd_dims[3]; CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size const float *data = in_ssd_result.ptr(); cv::Rect surface({0,0}, in_size), rc; float image_id, confidence; int label; for (int i = 0; i < MAX_PROPOSALS; ++i) { std::tie(rc, image_id, confidence, label) = extract(data + i*OBJECT_SIZE, in_size); if (image_id < 0.f) { break; // marks end-of-detections } if (confidence < confidence_threshold) { continue; // skip objects with low confidence } if (alignment_to_square) { adjustBoundingBox(rc); } const auto clipped_rc = rc & surface; if (filter_out_of_bounds) { if (clipped_rc.area() != rc.area()) { continue; } } out_boxes.emplace_back(clipped_rc); } } void parseSSDBLref(const cv::Mat& in_ssd_result, const cv::Size& in_size, const float confidence_threshold, const int filter_label, std::vector& out_boxes, std::vector& out_labels) { out_boxes.clear(); out_labels.clear(); const auto &in_ssd_dims = in_ssd_result.size; CV_Assert(in_ssd_dims.dims() == 4u); const int MAX_PROPOSALS = in_ssd_dims[2]; const int OBJECT_SIZE = in_ssd_dims[3]; CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size cv::Rect surface({0,0}, in_size), rc; float image_id, confidence; int label; const float *data = in_ssd_result.ptr(); for (int i = 0; i < MAX_PROPOSALS; i++) { std::tie(rc, image_id, confidence, label) = extract(data + i*OBJECT_SIZE, in_size); if (image_id < 0.f) { break; // marks end-of-detections } if (confidence < confidence_threshold || (filter_label != -1 && label != filter_label)) { continue; // filter out object classes if filter is specified } out_boxes.emplace_back(rc & surface); out_labels.emplace_back(label); } } private: void adjustBoundingBox(cv::Rect& boundingBox) { auto w = boundingBox.width; auto h = boundingBox.height; boundingBox.x -= static_cast(0.067 * w); boundingBox.y -= static_cast(0.028 * h); boundingBox.width += static_cast(0.15 * w); boundingBox.height += static_cast(0.13 * h); if (boundingBox.width < boundingBox.height) { auto dx = (boundingBox.height - boundingBox.width); boundingBox.x -= dx / 2; boundingBox.width += dx; } else { auto dy = (boundingBox.width - boundingBox.height); boundingBox.y -= dy / 2; boundingBox.height += dy; } } std::tuple extract(const float* it, const cv::Size& in_size) { float image_id = it[0]; int label = static_cast(it[1]); float confidence = it[2]; float rc_left = it[3]; float rc_top = it[4]; float rc_right = it[5]; float rc_bottom = it[6]; cv::Rect rc; // map relative coordinates to the original image scale rc.x = static_cast(rc_left * in_size.width); rc.y = static_cast(rc_top * in_size.height); rc.width = static_cast(rc_right * in_size.width) - rc.x; rc.height = static_cast(rc_bottom * in_size.height) - rc.y; return std::make_tuple(rc, image_id, confidence, label); } int randInRange(const int start, const int end) { GAPI_Assert(start <= end); return start + std::rand() % (end - start + 1); } cv::Rect generateBox(const cv::Size& in_sz) { // Generated rectangle can reside outside of the initial image by border pixels constexpr int border = 10; constexpr int minW = 16; constexpr int minH = 16; cv::Rect box; box.width = randInRange(minW, in_sz.width + 2*border); box.height = randInRange(minH, in_sz.height + 2*border); box.x = randInRange(-border, in_sz.width + border - box.width); box.y = randInRange(-border, in_sz.height + border - box.height); return box; } struct SSDitem { float image_id = 0.0f; float label = 0.0f; float confidence = 0.0f; float rc_left = 0.0f; float rc_top = 0.0f; float rc_right = 0.0f; float rc_bottom = 0.0f; }; SSDitem generateItem(const int i, const cv::Size& in_sz) { const auto normalize = [](int v, int range) { return static_cast(v) / range; }; SSDitem it; it.image_id = static_cast(i); it.label = static_cast(randInRange(0, 9)); it.confidence = static_cast(std::rand()) / RAND_MAX; auto box = generateBox(in_sz); it.rc_left = normalize(box.x, in_sz.width); it.rc_right = normalize(box.x + box.width, in_sz.width); it.rc_top = normalize(box.y, in_sz.height); it.rc_bottom = normalize(box.y + box.height, in_sz.height); return it; } }; class ParserYoloTest { public: cv::Mat generateYoloOutput(const int num_classes, std::pair dims_config = {false, 4}) { bool one_dim = false; int num_dims = 0; std::tie(one_dim, num_dims) = dims_config; GAPI_Assert(num_dims <= 4); GAPI_Assert((!one_dim && num_dims >= 3) || ( one_dim && num_dims >= 1)); std::vector dims(num_dims, 1); if (one_dim) { dims.back() = (num_classes+5)*5*13*13; } else { dims.back() = (num_classes+5)*5; dims[num_dims-2] = 13; dims[num_dims-3] = 13; } cv::Mat mat(dims, CV_32FC1); auto data = mat.ptr(); const size_t range = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); for (size_t i = 0; i < range; ++i) { data[i] = static_cast(std::rand()) / RAND_MAX; } return mat; } void parseYoloRef(const cv::Mat& in_yolo_result, const cv::Size& in_size, const float confidence_threshold, const float nms_threshold, const int num_classes, const std::vector& anchors, std::vector& out_boxes, std::vector& out_labels) { YoloParams params; constexpr auto side_square = 13 * 13; this->m_out = in_yolo_result.ptr(); this->m_side = 13; this->m_lcoords = params.coords; this->m_lclasses = num_classes; std::vector detections; for (int i = 0; i < side_square; ++i) { for (int b = 0; b < params.num; ++b) { float scale = this->scale(i, b); if (scale < confidence_threshold) { continue; } double x = this->x(i, b); double y = this->y(i, b); double height = this->height(i, b, anchors[2 * b + 1]); double width = this->width(i, b, anchors[2 * b]); for (int label = 0; label < num_classes; ++label) { float prob = scale * classConf(i,b,label); if (prob < confidence_threshold) { continue; } auto box = toBox(x, y, height, width, in_size); detections.emplace_back(Detection(box, prob, label)); } } } std::stable_sort(std::begin(detections), std::end(detections), [](const Detection& a, const Detection& b) { return a.conf > b.conf; }); if (nms_threshold < 1.0f) { for (const auto& d : detections) { if (std::end(out_boxes) == std::find_if(std::begin(out_boxes), std::end(out_boxes), [&d, nms_threshold](const cv::Rect& r) { float rectOverlap = 1.f - static_cast(jaccardDistance(r, d.rect)); return rectOverlap > nms_threshold; })) { out_boxes. emplace_back(d.rect); out_labels.emplace_back(d.label); } } } else { for (const auto& d: detections) { out_boxes. emplace_back(d.rect); out_labels.emplace_back(d.label); } } } private: struct Detection { Detection(const cv::Rect& in_rect, const float in_conf, const int in_label) : rect(in_rect), conf(in_conf), label(in_label) {} cv::Rect rect; float conf = 0.0f; int label = 0; }; struct YoloParams { int num = 5; int coords = 4; }; float scale(const int i, const int b) { int obj_index = index(i, b, m_lcoords); return m_out[obj_index]; } double x(const int i, const int b) { int box_index = index(i, b, 0); int col = i % m_side; return (col + m_out[box_index]) / m_side; } double y(const int i, const int b) { int box_index = index(i, b, 0); int row = i / m_side; return (row + m_out[box_index + m_side * m_side]) / m_side; } double width(const int i, const int b, const float anchor) { int box_index = index(i, b, 0); return std::exp(m_out[box_index + 2 * m_side * m_side]) * anchor / m_side; } double height(const int i, const int b, const float anchor) { int box_index = index(i, b, 0); return std::exp(m_out[box_index + 3 * m_side * m_side]) * anchor / m_side; } float classConf(const int i, const int b, const int label) { int class_index = index(i, b, m_lcoords + 1 + label); return m_out[class_index]; } cv::Rect toBox(const double x, const double y, const double h, const double w, const cv::Size& in_sz) { auto h_scale = in_sz.height; auto w_scale = in_sz.width; cv::Rect r; r.x = static_cast((x - w / 2) * w_scale); r.y = static_cast((y - h / 2) * h_scale); r.width = static_cast(w * w_scale); r.height = static_cast(h * h_scale); return r; } int index(const int i, const int b, const int entry) { return b * m_side * m_side * (m_lcoords + m_lclasses + 1) + entry * m_side * m_side + i; } const float* m_out = nullptr; int m_side = 0, m_lcoords = 0, m_lclasses = 0; }; } // namespace opencv_test #endif // OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP