Merge pull request #26834 from chacha21:findContours_speedup

Find contours speedup #26834

It is an attempt, as suggested by #26775, to restore lost speed when migrating `findContours()` implementation from C to C++

The patch adds an "Arena" (a pool) of pre-allocated memory so that contours points (and TreeNodes) can be picked from the Arena.
The code of `findContours()` is mostly unchanged, the arena usage being implicit through a utility class Arena::Item that provides C++ overloaded operators and construct/destruct logic.

As mentioned in #26775, the contour points are allocated and released in order, and can be represented by ranges of indices in their arena. No range subset will be released and drill a hole, that's why the internal representation as a range of indices makes sense.

The TreeNodes use another Arena class that does not comply to that range logic.

Currently, there is a significant improvement of the run-time on the test mentioned in #26775, but it is still far from the `findContours_legacy()` performance.


- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [X] The PR is proposed to the proper branch
- [X] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Pierre Chatelier 2025-03-12 16:00:01 +01:00 committed by GitHub
parent 0db6a496ba
commit d83df66ff0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 271 additions and 51 deletions

View File

@ -31,7 +31,7 @@ static const Point chainCodeDeltas[8] =
// Restores all the digital curve points from the chain code.
// Removes the points (from the resultant polygon)
// that have zero 1-curvature
static vector<ApproxItem> pass_0(const vector<schar>& chain, Point pt, bool isApprox, bool isFull)
static vector<ApproxItem> pass_0(const ContourCodesStorage& chain, Point pt, bool isApprox, bool isFull)
{
vector<ApproxItem> res;
const size_t len = chain.size();
@ -52,17 +52,14 @@ static vector<ApproxItem> pass_0(const vector<schar>& chain, Point pt, bool isAp
return res;
}
static vector<Point> gatherPoints(const vector<ApproxItem>& ares)
static void gatherPoints(const vector<ApproxItem>& ares, ContourPointsStorage& output)
{
vector<Point> res;
res.reserve(ares.size() / 2);
output.clear();
for (const ApproxItem& item : ares)
{
if (item.removed)
continue;
res.push_back(item.pt);
if (!item.removed)
output.push_back(item.pt);
}
return res;
}
static size_t calc_support(const vector<ApproxItem>& ares, size_t i)
@ -273,11 +270,14 @@ static void pass_cleanup(vector<ApproxItem>& ares, size_t start_idx)
} // namespace
vector<Point> cv::approximateChainTC89(vector<schar> chain, const Point& origin, const int method)
void cv::approximateChainTC89(const ContourCodesStorage& chain, const Point& origin, const int method,
ContourPointsStorage& output)
{
if (chain.size() == 0)
{
return vector<Point>({origin});
output.clear();
output.push_back(origin);
return;
}
const bool isApprox = method == CHAIN_APPROX_TC89_L1 || method == CHAIN_APPROX_TC89_KCOS;
@ -349,5 +349,5 @@ vector<Point> cv::approximateChainTC89(vector<schar> chain, const Point& origin,
}
}
return gatherPoints(ares);
gatherPoints(ares, output);
}

View File

@ -0,0 +1,122 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef OPENCV_CONTOURS_BLOCKSTORAGE_HPP
#define OPENCV_CONTOURS_BLOCKSTORAGE_HPP
#include "precomp.hpp"
#include <array>
namespace cv {
// BLOCK_SIZE_ELEM - number of elements in a block
// STATIC_CAPACITY_BYTES - static memory in bytes for preallocated blocks
template <typename T, size_t BLOCK_SIZE_ELEM = 1024, size_t STATIC_CAPACITY_BYTES = 4096>
class BlockStorage {
public:
using value_type = T;
typedef struct {value_type data[BLOCK_SIZE_ELEM];} block_type;
BlockStorage()
{
const size_t minDynamicBlocks = !staticBlocksCount ? 1 : 0;
for(size_t i = 0 ; i<minDynamicBlocks ; ++i)
dynamicBlocks.push_back(new block_type);
}
BlockStorage(const BlockStorage&) = delete;
BlockStorage(BlockStorage&&) noexcept = default;
~BlockStorage() {
for(const auto & block : dynamicBlocks) {
delete block;
}
}
BlockStorage& operator=(const BlockStorage&) = delete;
BlockStorage& operator=(BlockStorage&&) noexcept = default;
void clear(void) {
const size_t minDynamicBlocks = !staticBlocksCount ? 1 : 0;
for(size_t i = minDynamicBlocks, count = dynamicBlocks.size() ; i<count ; ++i ) {
delete dynamicBlocks[i];
}
dynamicBlocks.resize(minDynamicBlocks);
sz = 0;
}
void push_back(const value_type& value) {
const size_t blockIndex = sz / BLOCK_SIZE_ELEM;
const size_t currentBlocksCount = staticBlocksCount+dynamicBlocks.size();
if (blockIndex == currentBlocksCount)
dynamicBlocks.push_back(new block_type);
block_type& cur_block =
(blockIndex < staticBlocksCount) ? staticBlocks[blockIndex] :
*dynamicBlocks[blockIndex-staticBlocksCount];
cur_block.data[sz % BLOCK_SIZE_ELEM] = value;
++sz;
}
size_t size() const { return sz; }
const value_type& at(size_t index) const {
const size_t blockIndex = index / BLOCK_SIZE_ELEM;
const block_type& cur_block =
(blockIndex < staticBlocksCount) ? staticBlocks[blockIndex] :
*dynamicBlocks[blockIndex-staticBlocksCount];
return cur_block.data[index % BLOCK_SIZE_ELEM];
}
value_type& at(size_t index) {
const size_t blockIndex = index / BLOCK_SIZE_ELEM;
block_type& cur_block =
(blockIndex < staticBlocksCount) ? staticBlocks[blockIndex] :
*dynamicBlocks[blockIndex-staticBlocksCount];
return cur_block.data[index % BLOCK_SIZE_ELEM];
}
const value_type& operator[](size_t index) const {return at(index);}
value_type& operator[](size_t index) {return at(index);}
public:
friend class RangeIterator;
class RangeIterator
{
public:
RangeIterator(const BlockStorage* _owner, size_t _first, size_t _last)
:owner(_owner),remaining(_last-_first),
blockIndex(_first/BLOCK_SIZE_ELEM),offset(_first%BLOCK_SIZE_ELEM) {
}
private:
const BlockStorage* owner = nullptr;
size_t remaining = 0;
size_t blockIndex = 0;
size_t offset = 0;
public:
bool done(void) const {return !remaining;}
std::pair<const value_type*, size_t> operator*(void) const {return get();}
std::pair<const value_type*, size_t> get(void) const {
const block_type& cur_block =
(blockIndex < owner->staticBlocksCount) ? owner->staticBlocks[blockIndex] :
*owner->dynamicBlocks[blockIndex-owner->staticBlocksCount];
const value_type* rangeStart = cur_block.data+offset;
const size_t rangeLength = std::min(remaining, BLOCK_SIZE_ELEM-offset);
return std::make_pair(rangeStart, rangeLength);
}
RangeIterator& operator++() {
std::pair<const value_type*, size_t> range = get();
remaining -= range.second;
offset = 0;
++blockIndex;
return *this;
}
};
RangeIterator getRangeIterator(size_t first, size_t last) const {
return RangeIterator(this, first, last);
}
private:
std::array<block_type, STATIC_CAPACITY_BYTES/(BLOCK_SIZE_ELEM*sizeof(value_type))> staticBlocks;
const size_t staticBlocksCount = STATIC_CAPACITY_BYTES/(BLOCK_SIZE_ELEM*sizeof(value_type));
std::vector<block_type*> dynamicBlocks;
size_t sz = 0;
};
} // namespace cv
#endif // OPENCV_CONTOURS_BLOCKSTORAGE_HPP

View File

@ -22,12 +22,11 @@ void cv::contourTreeToResults(CTree& tree,
return;
}
// mapping for indexes (original -> resulting)
map<int, int> index_mapping;
index_mapping[-1] = -1;
index_mapping[0] = -1;
CV_Assert(tree.size() < (size_t)numeric_limits<int>::max());
// mapping for indexes (original -> resulting)
// -1 - based indexing
vector<int> index_mapping(tree.size() + 1, -1);
const int total = (int)tree.size() - 1;
_contours.create(total, 1, 0, -1, true);
{
@ -39,7 +38,7 @@ void cv::contourTreeToResults(CTree& tree,
CV_Assert(elem.self() != -1);
if (elem.self() == 0)
continue;
index_mapping[elem.self()] = i;
index_mapping.at(elem.self() + 1) = i;
CV_Assert(elem.body.size() < (size_t)numeric_limits<int>::max());
const int sz = (int)elem.body.size();
_contours.create(sz, 1, res_type, i, true);
@ -65,10 +64,10 @@ void cv::contourTreeToResults(CTree& tree,
if (elem.self() == 0)
continue;
Vec4i& h_vec = h_mat.at<Vec4i>(i);
h_vec = Vec4i(index_mapping.at(elem.next),
index_mapping.at(elem.prev),
index_mapping.at(elem.first_child),
index_mapping.at(elem.parent));
h_vec = Vec4i(index_mapping.at(elem.next + 1),
index_mapping.at(elem.prev + 1),
index_mapping.at(elem.first_child + 1),
index_mapping.at(elem.parent + 1));
++i;
}
}

View File

@ -6,7 +6,8 @@
#define OPENCV_CONTOURS_COMMON_HPP
#include "precomp.hpp"
#include <stack>
#include "contours_blockstorage.hpp"
namespace cv {
@ -45,11 +46,15 @@ public:
T body;
public:
TreeNode(int self) :
self_(self), parent(-1), first_child(-1), prev(-1), next(-1), ctable_next(-1)
TreeNode(int self, T&& body_) :
self_(self), parent(-1), first_child(-1), prev(-1), next(-1), ctable_next(-1), body(std::move(body_))
{
CV_Assert(self >= 0);
}
TreeNode(const TreeNode&) = delete;
TreeNode(TreeNode&&) noexcept = default;
TreeNode& operator=(const TreeNode&) = delete;
TreeNode& operator=(TreeNode&&) noexcept = default;
int self() const
{
return self_;
@ -59,15 +64,22 @@ public:
template <typename T>
class Tree
{
public:
Tree() {}
Tree(const Tree&) = delete;
Tree(Tree&&) = delete;
Tree& operator=(const Tree&) = delete;
Tree& operator=(Tree&&) = delete;
~Tree() = default;
private:
std::vector<TreeNode<T>> nodes;
public:
TreeNode<T>& newElem()
TreeNode<T>& newElem(T && body_)
{
const size_t idx = nodes.size();
CV_DbgAssert(idx < (size_t)std::numeric_limits<int>::max());
nodes.push_back(TreeNode<T>((int)idx));
nodes.emplace_back(std::move(TreeNode<T>((int)idx, std::move(body_))));
return nodes[idx];
}
TreeNode<T>& elem(int idx)
@ -101,7 +113,7 @@ public:
child.parent = prev_item.parent;
if (prev_item.next != -1)
{
nodes[prev_item.next].prev = idx;
((TreeNode<T>&)nodes[prev_item.next]).prev = idx;
child.next = prev_item.next;
}
child.prev = prev;
@ -159,23 +171,80 @@ public:
}
private:
std::stack<int> levels;
Tree<T>& tree;
std::stack<int> levels;
};
//==============================================================================
template <typename T, size_t BLOCK_SIZE_ELEM, size_t STATIC_CAPACITY_BYTES>
class ContourDataStorage
{
public:
typedef T data_storage_t;
typedef BlockStorage<data_storage_t, BLOCK_SIZE_ELEM, STATIC_CAPACITY_BYTES> storage_t;
public:
ContourDataStorage(void) = delete;
ContourDataStorage(storage_t* _storage):storage(_storage) {}
ContourDataStorage(const ContourDataStorage&) = delete;
ContourDataStorage(ContourDataStorage&&) noexcept = default;
~ContourDataStorage() = default;
ContourDataStorage& operator=(const ContourDataStorage&) = delete;
ContourDataStorage& operator=(ContourDataStorage&&) noexcept = default;
public:
typename storage_t::RangeIterator getRangeIterator(void) const {return storage->getRangeIterator(first, last);}
public:
bool empty(void) const {return first == last;}
size_t size(void) const {return last - first;}
public:
void clear(void) {first = last;}
bool resize(size_t newSize)
{
bool ok = (newSize <= size());
if (ok)
last = first+newSize;
return ok;
}
void push_back(const data_storage_t& value)
{
if (empty())
{
first = storage->size();
}
storage->push_back(value);
last = storage->size();
}
const data_storage_t& at(size_t index) const {return storage->at(first+index);}
data_storage_t& at(size_t index) {return storage->at(first+index);}
const data_storage_t& operator[](size_t index) const {return at(index);}
data_storage_t& operator[](size_t index) {return at(index);}
private:
storage_t* storage = nullptr;
size_t first = 0;
size_t last = 0;
};
typedef ContourDataStorage<cv::Point, 1024, 0> ContourPointsStorage;
typedef ContourDataStorage<schar, 1024, 0> ContourCodesStorage;
class Contour
{
public:
ContourPointsStorage pts;
cv::Rect brect;
cv::Point origin;
std::vector<cv::Point> pts;
std::vector<schar> codes;
bool isHole;
bool isChain;
ContourCodesStorage codes;
bool isHole = false;
bool isChain = false;
Contour() : isHole(false), isChain(false) {}
explicit Contour(ContourPointsStorage::storage_t* pointStorage_,
ContourCodesStorage::storage_t* codesStorage_)
:pts(pointStorage_),codes(codesStorage_) {}
Contour(const Contour&) = delete;
Contour(Contour&& other) noexcept = default;
Contour& operator=(const Contour&) = delete;
Contour& operator=(Contour&& other) noexcept = default;
~Contour() = default;
void updateBoundingRect() {}
bool isEmpty() const
{
@ -185,17 +254,37 @@ public:
{
return isChain ? codes.size() : pts.size();
}
void addPoint(const Point& pt)
{
pts.push_back(pt);
}
void copyTo(void* data) const
{
// NOTE: Mat::copyTo doesn't work because it creates new Mat object
// instead of reusing existing vector data
if (isChain)
{
memcpy(data, &codes[0], codes.size() * sizeof(codes[0]));
/*memcpy(data, codes.data(), codes.size() * sizeof(typename decltype(codes)::value_type));*/
schar* dst = reinterpret_cast<schar*>(data);
for(auto rangeIterator = codes.getRangeIterator() ; !rangeIterator.done() ; ++rangeIterator)
{
const auto range = *rangeIterator;
memcpy(dst, range.first, range.second*sizeof(schar));
dst += range.second;
}
}
else
{
memcpy(data, &pts[0], pts.size() * sizeof(pts[0]));
/*for (size_t i = 0, count = pts.size() ; i < count ; ++i)
((Point*)data)[i] = pts.at(i);
*/
cv::Point* dst = reinterpret_cast<cv::Point*>(data);
for(auto rangeIterator = pts.getRangeIterator() ; !rangeIterator.done() ; ++rangeIterator)
{
const auto range = *rangeIterator;
memcpy(dst, range.first, range.second*sizeof(cv::Point));
dst += range.second;
}
}
}
};
@ -211,8 +300,8 @@ void contourTreeToResults(CTree& tree,
cv::OutputArray& _hierarchy);
std::vector<Point>
approximateChainTC89(std::vector<schar> chain, const Point& origin, const int method);
void approximateChainTC89(const ContourCodesStorage& chain, const Point& origin, const int method,
ContourPointsStorage& output);
} // namespace cv

View File

@ -90,10 +90,13 @@ public:
vector<int> ext_rns;
vector<int> int_rns;
ContourPointsStorage::storage_t pointsStorage;
ContourCodesStorage::storage_t codesStorage;
public:
LinkRunner()
LinkRunner(void)
{
tree.newElem();
tree.newElem(Contour(&pointsStorage, &codesStorage));
rns.reserve(100);
}
void process(Mat& image);
@ -117,12 +120,12 @@ void LinkRunner::convertLinks(int& first, int& prev, bool isHole)
if (rns[cur].link == -1)
continue;
CNode& node = tree.newElem();
CNode& node = tree.newElem(Contour(&pointsStorage, &codesStorage));
node.body.isHole = isHole;
do
{
node.body.pts.push_back(rns[cur].pt);
node.body.addPoint(rns[cur].pt);
int p_temp = cur;
cur = rns[cur].link;
rns[p_temp].link = -1;

View File

@ -197,7 +197,7 @@ static void icvFetchContourEx(Mat& image,
Trait<T>::setRightFlag(i0, i0, nbd);
if (!res_contour.isChain)
{
res_contour.pts.push_back(pt);
res_contour.addPoint(pt);
}
}
else
@ -236,7 +236,7 @@ static void icvFetchContourEx(Mat& image,
}
else if (s != prev_s || isDirect)
{
res_contour.pts.push_back(pt);
res_contour.addPoint(pt);
}
if (s != prev_s)
@ -281,6 +281,9 @@ static void icvFetchContourEx(Mat& image,
// It supports both hierarchical and plane variants of Suzuki algorithm.
struct ContourScanner_
{
ContourPointsStorage::storage_t& pointsStorage;
ContourCodesStorage::storage_t& codesStorage;
Mat image;
Point offset; // ROI offset: coordinates, added to each contour point
Point pt; // current scanner position
@ -293,7 +296,9 @@ struct ContourScanner_
array<int, 128> ctable;
public:
ContourScanner_() {}
ContourScanner_(ContourPointsStorage::storage_t& _pointsStorage,
ContourCodesStorage::storage_t& _codesStorage)
:pointsStorage(_pointsStorage),codesStorage(_codesStorage) {}
~ContourScanner_() {}
inline bool isInt() const
{
@ -310,13 +315,13 @@ public:
int findNextX(int x, int y, int& prev, int& p);
bool findNext();
static shared_ptr<ContourScanner_> create(Mat img, int mode, int method, Point offset);
static shared_ptr<ContourScanner_> create(ContourPointsStorage::storage_t& pointsStorage, ContourCodesStorage::storage_t& codesStorage, Mat img, int mode, int method, Point offset);
}; // class ContourScanner_
typedef shared_ptr<ContourScanner_> ContourScanner;
shared_ptr<ContourScanner_> ContourScanner_::create(Mat img, int mode, int method, Point offset)
shared_ptr<ContourScanner_> ContourScanner_::create(ContourPointsStorage::storage_t& pointsStorage, ContourCodesStorage::storage_t& codesStorage, Mat img, int mode, int method, Point offset)
{
if (mode == RETR_CCOMP && img.type() == CV_32SC1)
mode = RETR_FLOODFILL;
@ -342,14 +347,14 @@ shared_ptr<ContourScanner_> ContourScanner_::create(Mat img, int mode, int metho
Size size = img.size();
CV_Assert(size.height >= 1);
shared_ptr<ContourScanner_> scanner = make_shared<ContourScanner_>();
shared_ptr<ContourScanner_> scanner = make_shared<ContourScanner_>(pointsStorage, codesStorage);
scanner->image = img;
scanner->mode = mode;
scanner->offset = offset;
scanner->pt = Point(1, 1);
scanner->lnbd = Point(0, 1);
scanner->nbd = 2;
CNode& root = scanner->tree.newElem();
CNode& root = scanner->tree.newElem(Contour(&scanner->pointsStorage, &scanner->codesStorage));
CV_Assert(root.self() == 0);
root.body.isHole = true;
root.body.brect = Rect(Point(0, 0), size);
@ -367,7 +372,7 @@ CNode& ContourScanner_::makeContour(schar& nbd_, const bool is_hole, const int x
const Point start_pt(x - (is_hole ? 1 : 0), y);
CNode& res = tree.newElem();
CNode& res = tree.newElem(Contour(&pointsStorage, &codesStorage));
res.body.isHole = is_hole;
res.body.isChain = isChain;
res.body.origin = start_pt + offset;
@ -403,7 +408,7 @@ CNode& ContourScanner_::makeContour(schar& nbd_, const bool is_hole, const int x
if (this->approx_method1 != this->approx_method2)
{
CV_Assert(res.body.isChain);
res.body.pts = approximateChainTC89(res.body.codes, prev_origin, this->approx_method2);
approximateChainTC89(res.body.codes, prev_origin, this->approx_method2, res.body.pts);
res.body.isChain = false;
}
return res;
@ -674,7 +679,9 @@ void cv::findContours(InputArray _image,
threshold(image, image, 0, 1, THRESH_BINARY);
// find contours
ContourScanner scanner = ContourScanner_::create(image, mode, method, offset + Point(-1, -1));
ContourPointsStorage::storage_t pointsStorage;
ContourCodesStorage::storage_t codesStorage;
ContourScanner scanner = ContourScanner_::create(pointsStorage, codesStorage, image, mode, method, offset + Point(-1, -1));
while (scanner->findNext())
{
}