opencv/modules/objdetect/src/barcode_detector/bardetect.cpp
Maksim Shabunin 463cd09811
Merge pull request #23666 from mshabunin:barcode-move
Moved barcode from opencv_contrib #23666

Merge with https://github.com/opencv/opencv_contrib/pull/3497

##### TODO
- [x] Documentation (bib)
- [x] Tutorial (references)
- [x] Sample app (refactored)
- [x] Java (test passes)
- [x] Python (test passes)
- [x] Build without DNN
2023-06-14 22:21:38 +03:00

511 lines
18 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds
#include "../precomp.hpp"
#include "bardetect.hpp"
namespace cv {
namespace barcode {
static constexpr float PI = static_cast<float>(CV_PI);
static constexpr float HALF_PI = static_cast<float>(CV_PI / 2);
#define CALCULATE_SUM(ptr, result) \
top_left = static_cast<float>(*((ptr) + left_col + integral_cols * top_row));\
top_right = static_cast<float>(*((ptr) + integral_cols * top_row + right_col));\
bottom_right = static_cast<float>(*((ptr) + right_col + bottom_row * integral_cols));\
bottom_left = static_cast<float>(*((ptr) + bottom_row * integral_cols + left_col));\
(result) = (bottom_right - bottom_left - top_right + top_left);
inline bool Detect::isValidCoord(const Point &coord, const Size &limit)
{
if ((coord.x < 0) || (coord.y < 0))
{
return false;
}
if ((unsigned) coord.x > (unsigned) (limit.width - 1) || ((unsigned) coord.y > (unsigned) (limit.height - 1)))
{
return false;
}
return true;
}
//==============================================================================
// NMSBoxes copied from modules/dnn/src/nms.inl.hpp
// TODO: move NMSBoxes outside the dnn module to allow other modules use it
namespace
{
template <typename T>
static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2)
{
return pair1.first > pair2.first;
}
inline void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold, const int top_k,
std::vector<std::pair<float, int> >& score_index_vec)
{
CV_DbgAssert(score_index_vec.empty());
// Generate index score pairs.
for (size_t i = 0; i < scores.size(); ++i)
{
if (scores[i] > threshold)
{
score_index_vec.push_back(std::make_pair(scores[i], (int)i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > 0 && top_k < (int)score_index_vec.size())
{
score_index_vec.resize(top_k);
}
}
template <typename BoxType>
inline void NMSFast_(const std::vector<BoxType>& bboxes,
const std::vector<float>& scores, const float score_threshold,
const float nms_threshold, const float eta, const int top_k,
std::vector<int>& indices,
float (*computeOverlap)(const BoxType&, const BoxType&),
size_t limit = std::numeric_limits<int>::max())
{
CV_Assert(bboxes.size() == scores.size());
// Get top_k scores (with corresponding indices).
std::vector<std::pair<float, int> > score_index_vec;
GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);
// Do nms.
float adaptive_threshold = nms_threshold;
indices.clear();
for (size_t i = 0; i < score_index_vec.size(); ++i) {
const int idx = score_index_vec[i].second;
bool keep = true;
for (int k = 0; k < (int)indices.size() && keep; ++k) {
const int kept_idx = indices[k];
float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]);
keep = overlap <= adaptive_threshold;
}
if (keep) {
indices.push_back(idx);
if (indices.size() >= limit) {
break;
}
}
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
static inline float rotatedRectIOU(const RotatedRect& a, const RotatedRect& b)
{
std::vector<Point2f> inter;
int res = rotatedRectangleIntersection(a, b, inter);
if (inter.empty() || res == INTERSECT_NONE)
return 0.0f;
if (res == INTERSECT_FULL)
return 1.0f;
float interArea = (float)contourArea(inter);
return interArea / (a.size.area() + b.size.area() - interArea);
}
static void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
const float score_threshold, const float nms_threshold,
std::vector<int>& indices, const float eta = 1.f, const int top_k = 0)
{
CV_Assert_N(bboxes.size() == scores.size(), score_threshold >= 0,
nms_threshold >= 0, eta > 0);
NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rotatedRectIOU);
}
} // namespace <anonymous>::
//==============================================================================
void Detect::init(const Mat &src)
{
const double min_side = std::min(src.size().width, src.size().height);
if (min_side > 512.0)
{
purpose = SHRINKING;
coeff_expansion = min_side / 512.0;
width = cvRound(src.size().width / coeff_expansion);
height = cvRound(src.size().height / coeff_expansion);
Size new_size(width, height);
resize(src, resized_barcode, new_size, 0, 0, INTER_AREA);
}
// else if (min_side < 512.0)
// {
// purpose = ZOOMING;
// coeff_expansion = 512.0 / min_side;
// width = cvRound(src.size().width * coeff_expansion);
// height = cvRound(src.size().height * coeff_expansion);
// Size new_size(width, height);
// resize(src, resized_barcode, new_size, 0, 0, INTER_CUBIC);
// }
else
{
purpose = UNCHANGED;
coeff_expansion = 1.0;
width = src.size().width;
height = src.size().height;
resized_barcode = src.clone();
}
// median blur: sometimes it reduces the noise, but also reduces the recall
// medianBlur(resized_barcode, resized_barcode, 3);
}
void Detect::localization()
{
localization_bbox.clear();
bbox_scores.clear();
// get integral image
preprocess();
// empirical setting
static constexpr float SCALE_LIST[] = {0.01f, 0.03f, 0.06f, 0.08f};
const auto min_side = static_cast<float>(std::min(width, height));
int window_size;
for (const float scale:SCALE_LIST)
{
window_size = cvRound(min_side * scale);
if(window_size == 0) {
window_size = 1;
}
calCoherence(window_size);
barcodeErode();
regionGrowing(window_size);
}
}
bool Detect::computeTransformationPoints()
{
bbox_indices.clear();
transformation_points.clear();
transformation_points.reserve(bbox_indices.size());
RotatedRect rect;
Point2f temp[4];
const float THRESHOLD_SCORE = float(width * height) / 300.f;
NMSBoxes(localization_bbox, bbox_scores, THRESHOLD_SCORE, 0.1f, bbox_indices);
for (const auto &bbox_index : bbox_indices)
{
rect = localization_bbox[bbox_index];
if (purpose == ZOOMING)
{
rect.center /= coeff_expansion;
rect.size.height /= static_cast<float>(coeff_expansion);
rect.size.width /= static_cast<float>(coeff_expansion);
}
else if (purpose == SHRINKING)
{
rect.center *= coeff_expansion;
rect.size.height *= static_cast<float>(coeff_expansion);
rect.size.width *= static_cast<float>(coeff_expansion);
}
rect.points(temp);
transformation_points.emplace_back(vector<Point2f>{temp[0], temp[1], temp[2], temp[3]});
}
return !transformation_points.empty();
}
void Detect::preprocess()
{
Mat scharr_x, scharr_y, temp;
static constexpr double THRESHOLD_MAGNITUDE = 64.;
Scharr(resized_barcode, scharr_x, CV_32F, 1, 0);
Scharr(resized_barcode, scharr_y, CV_32F, 0, 1);
// calculate magnitude of gradient and truncate
magnitude(scharr_x, scharr_y, temp);
threshold(temp, temp, THRESHOLD_MAGNITUDE, 1, THRESH_BINARY);
temp.convertTo(gradient_magnitude, CV_8U);
integral(gradient_magnitude, integral_edges, CV_32F);
for (int y = 0; y < height; y++)
{
auto *const x_row = scharr_x.ptr<float_t>(y);
auto *const y_row = scharr_y.ptr<float_t>(y);
auto *const magnitude_row = gradient_magnitude.ptr<uint8_t>(y);
for (int pos = 0; pos < width; pos++)
{
if (magnitude_row[pos] == 0)
{
x_row[pos] = 0;
y_row[pos] = 0;
continue;
}
if (x_row[pos] < 0)
{
x_row[pos] *= -1;
y_row[pos] *= -1;
}
}
}
integral(scharr_x, temp, integral_x_sq, CV_32F, CV_32F);
integral(scharr_y, temp, integral_y_sq, CV_32F, CV_32F);
integral(scharr_x.mul(scharr_y), integral_xy, temp, CV_32F, CV_32F);
}
// Change coherence orientation edge_nums
// depend on width height integral_edges integral_x_sq integral_y_sq integral_xy
void Detect::calCoherence(int window_size)
{
static constexpr float THRESHOLD_COHERENCE = 0.9f;
int right_col, left_col, top_row, bottom_row;
float xy, x_sq, y_sq, d, rect_area;
const float THRESHOLD_AREA = float(window_size * window_size) * 0.42f;
Size new_size(width / window_size, height / window_size);
coherence = Mat(new_size, CV_8U), orientation = Mat(new_size, CV_32F), edge_nums = Mat(new_size, CV_32F);
float top_left, top_right, bottom_left, bottom_right;
int integral_cols = width + 1;
const auto *edges_ptr = integral_edges.ptr<float_t>(), *x_sq_ptr = integral_x_sq.ptr<float_t>(), *y_sq_ptr = integral_y_sq.ptr<float_t>(), *xy_ptr = integral_xy.ptr<float_t>();
for (int y = 0; y < new_size.height; y++)
{
auto *coherence_row = coherence.ptr<uint8_t>(y);
auto *orientation_row = orientation.ptr<float_t>(y);
auto *edge_nums_row = edge_nums.ptr<float_t>(y);
if (y * window_size >= height)
{
continue;
}
top_row = y * window_size;
bottom_row = min(height, (y + 1) * window_size);
for (int pos = 0; pos < new_size.width; pos++)
{
// then calculate the column locations of the rectangle and set them to -1
// if they are outside the matrix bounds
if (pos * window_size >= width)
{
continue;
}
left_col = pos * window_size;
right_col = min(width, (pos + 1) * window_size);
//we had an integral image to count non-zero elements
CALCULATE_SUM(edges_ptr, rect_area)
if (rect_area < THRESHOLD_AREA)
{
// smooth region
coherence_row[pos] = 0;
continue;
}
CALCULATE_SUM(x_sq_ptr, x_sq)
CALCULATE_SUM(y_sq_ptr, y_sq)
CALCULATE_SUM(xy_ptr, xy)
// get the values of the rectangle corners from the integral image - 0 if outside bounds
d = sqrt((x_sq - y_sq) * (x_sq - y_sq) + 4 * xy * xy) / (x_sq + y_sq);
if (d > THRESHOLD_COHERENCE)
{
coherence_row[pos] = 255;
orientation_row[pos] = atan2(x_sq - y_sq, 2 * xy) / 2.0f;
edge_nums_row[pos] = rect_area;
}
else
{
coherence_row[pos] = 0;
}
}
}
}
// will change localization_bbox bbox_scores
// will change coherence,
// depend on coherence orientation edge_nums
void Detect::regionGrowing(int window_size)
{
static constexpr float LOCAL_THRESHOLD_COHERENCE = 0.95f, THRESHOLD_RADIAN =
PI / 30, LOCAL_RATIO = 0.5f, EXPANSION_FACTOR = 1.2f;
static constexpr uint THRESHOLD_BLOCK_NUM = 35;
Point pt_to_grow, pt; //point to grow
float src_value;
float cur_value;
float edge_num;
float rect_orientation;
float sin_sum, cos_sum;
uint counter;
//grow direction
static constexpr int DIR[8][2] = {{-1, -1},
{0, -1},
{1, -1},
{1, 0},
{1, 1},
{0, 1},
{-1, 1},
{-1, 0}};
vector<Point2f> growingPoints, growingImgPoints;
for (int y = 0; y < coherence.rows; y++)
{
auto *coherence_row = coherence.ptr<uint8_t>(y);
for (int x = 0; x < coherence.cols; x++)
{
if (coherence_row[x] == 0)
{
continue;
}
// flag
coherence_row[x] = 0;
growingPoints.clear();
growingImgPoints.clear();
pt = Point(x, y);
cur_value = orientation.at<float_t>(pt);
sin_sum = sin(2 * cur_value);
cos_sum = cos(2 * cur_value);
counter = 1;
edge_num = edge_nums.at<float_t>(pt);
growingPoints.push_back(pt);
growingImgPoints.push_back(Point(pt));
while (!growingPoints.empty())
{
pt = growingPoints.back();
growingPoints.pop_back();
src_value = orientation.at<float_t>(pt);
//growing in eight directions
for (auto i : DIR)
{
pt_to_grow = Point(pt.x + i[0], pt.y + i[1]);
//check if out of boundary
if (!isValidCoord(pt_to_grow, coherence.size()))
{
continue;
}
if (coherence.at<uint8_t>(pt_to_grow) == 0)
{
continue;
}
cur_value = orientation.at<float_t>(pt_to_grow);
if (abs(cur_value - src_value) < THRESHOLD_RADIAN ||
abs(cur_value - src_value) > PI - THRESHOLD_RADIAN)
{
coherence.at<uint8_t>(pt_to_grow) = 0;
sin_sum += sin(2 * cur_value);
cos_sum += cos(2 * cur_value);
counter += 1;
edge_num += edge_nums.at<float_t>(pt_to_grow);
growingPoints.push_back(pt_to_grow); //push next point to grow back to stack
growingImgPoints.push_back(pt_to_grow);
}
}
}
//minimum block num
if (counter < THRESHOLD_BLOCK_NUM)
{
continue;
}
float local_coherence = (sin_sum * sin_sum + cos_sum * cos_sum) / static_cast<float>(counter * counter);
// minimum local gradient orientation_arg coherence_arg
if (local_coherence < LOCAL_THRESHOLD_COHERENCE)
{
continue;
}
RotatedRect minRect = minAreaRect(growingImgPoints);
if (edge_num < minRect.size.area() * float(window_size * window_size) * LOCAL_RATIO ||
static_cast<float>(counter) < minRect.size.area() * LOCAL_RATIO)
{
continue;
}
const float local_orientation = atan2(cos_sum, sin_sum) / 2.0f;
// only orientation_arg is approximately equal to the rectangle orientation_arg
rect_orientation = (minRect.angle) * PI / 180.f;
if (minRect.size.width < minRect.size.height)
{
rect_orientation += (rect_orientation <= 0.f ? HALF_PI : -HALF_PI);
std::swap(minRect.size.width, minRect.size.height);
}
if (abs(local_orientation - rect_orientation) > THRESHOLD_RADIAN &&
abs(local_orientation - rect_orientation) < PI - THRESHOLD_RADIAN)
{
continue;
}
minRect.angle = local_orientation * 180.f / PI;
minRect.size.width *= static_cast<float>(window_size) * EXPANSION_FACTOR;
minRect.size.height *= static_cast<float>(window_size);
minRect.center.x = (minRect.center.x + 0.5f) * static_cast<float>(window_size);
minRect.center.y = (minRect.center.y + 0.5f) * static_cast<float>(window_size);
localization_bbox.push_back(minRect);
bbox_scores.push_back(edge_num);
}
}
}
inline const std::array<Mat, 4> &getStructuringElement()
{
static const std::array<Mat, 4> structuringElement{
Mat_<uint8_t>{{3, 3},
{255, 0, 0, 0, 0, 0, 0, 0, 255}}, Mat_<uint8_t>{{3, 3},
{0, 0, 255, 0, 0, 0, 255, 0, 0}},
Mat_<uint8_t>{{3, 3},
{0, 0, 0, 255, 0, 255, 0, 0, 0}}, Mat_<uint8_t>{{3, 3},
{0, 255, 0, 0, 0, 0, 0, 255, 0}}};
return structuringElement;
}
// Change mat
void Detect::barcodeErode()
{
static const std::array<Mat, 4> &structuringElement = getStructuringElement();
Mat m0, m1, m2, m3;
dilate(coherence, m0, structuringElement[0]);
dilate(coherence, m1, structuringElement[1]);
dilate(coherence, m2, structuringElement[2]);
dilate(coherence, m3, structuringElement[3]);
int sum;
for (int y = 0; y < coherence.rows; y++)
{
auto coherence_row = coherence.ptr<uint8_t>(y);
auto m0_row = m0.ptr<uint8_t>(y);
auto m1_row = m1.ptr<uint8_t>(y);
auto m2_row = m2.ptr<uint8_t>(y);
auto m3_row = m3.ptr<uint8_t>(y);
for (int pos = 0; pos < coherence.cols; pos++)
{
if (coherence_row[pos] != 0)
{
sum = m0_row[pos] + m1_row[pos] + m2_row[pos] + m3_row[pos];
//more than 2 group
coherence_row[pos] = sum > 600 ? 255 : 0;
}
}
}
}
}
}