Merge pull request #22959 from feuerste:parallel_mertens

Parallelize implementation of HDR MergeMertens. * Parallelize MergeMertens. * Added performance tests for HDR. * Ran clang-format. * Optimizations. * Fix data path for Windows. * Remove compiiation warning on Windows. * Remove clang-format for existing file. * Addressing reviewer comments. * Ensure correct summation order. * Add test for determinism. * Move result pyramid into sync struct. * Reuse sync for first loop as well. * Use OpenCV's threading primitives. * Remove cout.
2025-06-27 23:11:57 +08:00 · 2022-12-21 15:10:59 +01:00 · 2022-12-21 15:10:59 +01:00 · bc8d494617
commit bc8d494617
parent 35e771daab
3 changed files with 150 additions and 69 deletions
--- a/modules/photo/perf/perf_hdr.cpp
+++ b/modules/photo/perf/perf_hdr.cpp
@ -0,0 +1,64 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test
+{
+namespace
+{
+struct ExposureSeq
+{
+    std::vector<Mat> images;
+    std::vector<float> times;
+};
+
+ExposureSeq loadExposureSeq(const std::string& list_filename)
+{
+    std::ifstream list_file(list_filename);
+    EXPECT_TRUE(list_file.is_open());
+    string name;
+    float val;
+    const String path(list_filename.substr(0, list_filename.find_last_of("\\/") + 1));
+    ExposureSeq seq;
+    while (list_file >> name >> val)
+    {
+        Mat img = imread(path + name);
+        EXPECT_FALSE(img.empty()) << "Could not load input image " << path + name;
+        seq.images.push_back(img);
+        seq.times.push_back(1 / val);
+    }
+    list_file.close();
+    return seq;
+}
+
+PERF_TEST(HDR, Mertens)
+{
+    const ExposureSeq seq = loadExposureSeq(getDataPath("cv/hdr/exposures/list.txt"));
+    Ptr<MergeMertens> merge = createMergeMertens();
+    Mat result(seq.images.front().size(), seq.images.front().type());
+    TEST_CYCLE() merge->process(seq.images, result);
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST(HDR, Debevec)
+{
+    const ExposureSeq seq = loadExposureSeq(getDataPath("cv/hdr/exposures/list.txt"));
+    Ptr<MergeDebevec> merge = createMergeDebevec();
+    Mat result(seq.images.front().size(), seq.images.front().type());
+    TEST_CYCLE() merge->process(seq.images, result, seq.times);
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST(HDR, Robertson)
+{
+    const ExposureSeq seq = loadExposureSeq(getDataPath("cv/hdr/exposures/list.txt"));
+    Ptr<MergeRobertson> merge = createMergeRobertson();
+    Mat result(seq.images.front().size(), seq.images.front().type());
+    TEST_CYCLE() merge->process(seq.images, result, seq.times);
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
+} // namespace opencv_test
--- a/modules/photo/src/merge.cpp
+++ b/modules/photo/src/merge.cpp
@ -172,87 +172,97 @@ public:

        std::vector<Mat> weights(images.size());
        Mat weight_sum = Mat::zeros(size, CV_32F);
+        Mutex weight_sum_mutex;

-        for(size_t i = 0; i < images.size(); i++) {
-            Mat img, gray, contrast, saturation, wellexp;
-            std::vector<Mat> splitted(channels);
+        parallel_for_(Range(0, static_cast<int>(images.size())), [&](const Range& range) {
+            for(int i = range.start; i < range.end; i++) {
+                Mat& img = images[i];
+                Mat gray, contrast, saturation, wellexp;
+                std::vector<Mat> splitted(channels);

-            images[i].convertTo(img, CV_32F, 1.0f/255.0f);
-            if(channels == 3) {
-                cvtColor(img, gray, COLOR_RGB2GRAY);
-            } else {
-                img.copyTo(gray);
+                img.convertTo(img, CV_32F, 1.0f/255.0f);
+                if(channels == 3) {
+                    cvtColor(img, gray, COLOR_RGB2GRAY);
+                } else {
+                    img.copyTo(gray);
+                }
+                split(img, splitted);
+
+                Laplacian(gray, contrast, CV_32F);
+                contrast = abs(contrast);
+
+                Mat mean = Mat::zeros(size, CV_32F);
+                for(int c = 0; c < channels; c++) {
+                    mean += splitted[c];
+                }
+                mean /= channels;
+
+                saturation = Mat::zeros(size, CV_32F);
+                for(int c = 0; c < channels;  c++) {
+                    Mat deviation = splitted[c] - mean;
+                    pow(deviation, 2.0f, deviation);
+                    saturation += deviation;
+                }
+                sqrt(saturation, saturation);
+
+                wellexp = Mat::ones(size, CV_32F);
+                for(int c = 0; c < channels; c++) {
+                    Mat expo = splitted[c] - 0.5f;
+                    pow(expo, 2.0f, expo);
+                    expo = -expo / 0.08f;
+                    exp(expo, expo);
+                    wellexp = wellexp.mul(expo);
+                }
+
+                pow(contrast, wcon, contrast);
+                pow(saturation, wsat, saturation);
+                pow(wellexp, wexp, wellexp);
+
+                weights[i] = contrast;
+                if(channels == 3) {
+                    weights[i] = weights[i].mul(saturation);
+                }
+                weights[i] = weights[i].mul(wellexp) + 1e-12f;
+
+                AutoLock lock(weight_sum_mutex);
+                weight_sum += weights[i];
            }
-            split(img, splitted);
+        });

-            Laplacian(gray, contrast, CV_32F);
-            contrast = abs(contrast);
-
-            Mat mean = Mat::zeros(size, CV_32F);
-            for(int c = 0; c < channels; c++) {
-                mean += splitted[c];
-            }
-            mean /= channels;
-
-            saturation = Mat::zeros(size, CV_32F);
-            for(int c = 0; c < channels;  c++) {
-                Mat deviation = splitted[c] - mean;
-                pow(deviation, 2.0f, deviation);
-                saturation += deviation;
-            }
-            sqrt(saturation, saturation);
-
-            wellexp = Mat::ones(size, CV_32F);
-            for(int c = 0; c < channels; c++) {
-                Mat expo = splitted[c] - 0.5f;
-                pow(expo, 2.0f, expo);
-                expo = -expo / 0.08f;
-                exp(expo, expo);
-                wellexp = wellexp.mul(expo);
-            }
-
-            pow(contrast, wcon, contrast);
-            pow(saturation, wsat, saturation);
-            pow(wellexp, wexp, wellexp);
-
-            weights[i] = contrast;
-            if(channels == 3) {
-                weights[i] = weights[i].mul(saturation);
-            }
-            weights[i] = weights[i].mul(wellexp) + 1e-12f;
-            weight_sum += weights[i];
-        }
        int maxlevel = static_cast<int>(logf(static_cast<float>(min(size.width, size.height))) / logf(2.0f));
        std::vector<Mat> res_pyr(maxlevel + 1);
+        std::vector<Mutex> res_pyr_mutexes(maxlevel + 1);

-        for(size_t i = 0; i < images.size(); i++) {
-            weights[i] /= weight_sum;
-            Mat img;
-            images[i].convertTo(img, CV_32F, 1.0f/255.0f);
+        parallel_for_(Range(0, static_cast<int>(images.size())), [&](const Range& range) {
+            for(int i = range.start; i < range.end; i++) {
+                weights[i] /= weight_sum;

-            std::vector<Mat> img_pyr, weight_pyr;
-            buildPyramid(img, img_pyr, maxlevel);
-            buildPyramid(weights[i], weight_pyr, maxlevel);
+                std::vector<Mat> img_pyr, weight_pyr;
+                buildPyramid(images[i], img_pyr, maxlevel);
+                buildPyramid(weights[i], weight_pyr, maxlevel);

-            for(int lvl = 0; lvl < maxlevel; lvl++) {
-                Mat up;
-                pyrUp(img_pyr[lvl + 1], up, img_pyr[lvl].size());
-                img_pyr[lvl] -= up;
-            }
-            for(int lvl = 0; lvl <= maxlevel; lvl++) {
-                std::vector<Mat> splitted(channels);
-                split(img_pyr[lvl], splitted);
-                for(int c = 0; c < channels; c++) {
-                    splitted[c] = splitted[c].mul(weight_pyr[lvl]);
+                for(int lvl = 0; lvl < maxlevel; lvl++) {
+                    Mat up;
+                    pyrUp(img_pyr[lvl + 1], up, img_pyr[lvl].size());
+                    img_pyr[lvl] -= up;
                }
-                merge(splitted, img_pyr[lvl]);
-                if(res_pyr[lvl].empty()) {
-                    res_pyr[lvl] = img_pyr[lvl];
-                } else {
-                    res_pyr[lvl] += img_pyr[lvl];
+                for(int lvl = 0; lvl <= maxlevel; lvl++) {
+                    std::vector<Mat> splitted(channels);
+                    split(img_pyr[lvl], splitted);
+                    for(int c = 0; c < channels; c++) {
+                        splitted[c] = splitted[c].mul(weight_pyr[lvl]);
+                    }
+                    merge(splitted, img_pyr[lvl]);
+
+                    AutoLock lock(res_pyr_mutexes[lvl]);
+                    if(res_pyr[lvl].empty()) {
+                        res_pyr[lvl] = img_pyr[lvl];
+                    } else {
+                        res_pyr[lvl] += img_pyr[lvl];
+                    }
                }
            }
-        }
+        });
        for(int lvl = maxlevel; lvl > 0; lvl--) {
            Mat up;
            pyrUp(res_pyr[lvl], up, res_pyr[lvl - 1].size());
--- a/modules/python/test/test_umat.py
+++ b/modules/python/test/test_umat.py
@ -107,12 +107,19 @@ class UMat(NewOpenCVTests):

        images, _ = load_exposure_seq(os.path.join(test_data_path, 'exposures'))

+        # As we want to test mat vs. umat here, we temporarily set only one worker-thread to achieve
+        # deterministic summations inside mertens' parallelized process.
+        num_threads = cv.getNumThreads()
+        cv.setNumThreads(1)
+
        merge = cv.createMergeMertens()
        mat_result = merge.process(images)

        umat_images = [cv.UMat(img) for img in images]
        umat_result = merge.process(umat_images)

+        cv.setNumThreads(num_threads)
+
        self.assertTrue(np.allclose(umat_result.get(), mat_result))