added support of different descriptor formats into gpu HOGDescriptor

2025-07-24 14:06:27 +08:00 · 2010-11-19 12:06:11 +00:00 · 2010-11-19 12:06:11 +00:00 · 6a9d022a9f
commit 6a9d022a9f
parent faf4d0bc74
4 changed files with 144 additions and 63 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1007,11 +1007,15 @@ namespace cv
            GpuMat table_space;
        };

+
+        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+
        struct CV_EXPORTS HOGDescriptor
        {
        public:
            enum { DEFAULT_WIN_SIGMA = -1 };
            enum { DEFAULT_NLEVELS = 64 };
+            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };

            HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), 
                          Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), 
@ -1029,13 +1033,14 @@ namespace cv
            void setSVMDetector(const vector<float>& detector);
            bool checkDetectorSize() const;

-            void computeBlockHistograms(const GpuMat& img);
            void detect(const GpuMat& img, vector<Point>& found_locations, double hit_threshold=0, 
                        Size win_stride=Size(), Size padding=Size());
            void detectMultiScale(const GpuMat& img, vector<Rect>& found_locations, 
                                  double hit_threshold=0, Size win_stride=Size(), Size padding=Size(),
                                  double scale0=1.05, int group_threshold=2);
-            void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors);
+
+            void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, 
+                                int descr_format=DESCR_FORMAT_COL_BY_COL);

            Size win_size;
            Size block_size;
@ -1044,9 +1049,17 @@ namespace cv
            int nbins;
            double win_sigma;
            double threshold_L2hys;
-            bool gamma_correction;
            int nlevels;

+        protected:
+            void computeBlockHistograms(const GpuMat& img);
+            void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+
+            static int numPartsWithin(int size, int part_size, int stride);
+            static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+            bool gamma_correction;
+
            // Coefficients of the separating plane
            float free_coef;
            GpuMat detector;
@ -1058,13 +1071,8 @@ namespace cv
            // Results of the last histogram evaluation step
            GpuMat block_hists;

-        private:
-            static int numPartsWithin(int size, int part_size, int stride);
-            static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-            void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
-
-            GpuMat grad, qangle;
+            // Gradients conputation results
+            GpuMat grad, qangle;            
        };    
    }

--- a/modules/gpu/src/cuda/hog.cu
+++ b/modules/gpu/src/cuda/hog.cu
@ -428,9 +428,9 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block


 template <int nthreads>
-__global__ void extract_descriptors_kernel(const int img_win_width, const int img_block_width, 
-                                           const int win_block_stride_x, const int win_block_stride_y,
-                                           const float* block_hists, PtrElemStepf descriptors)
+__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, 
+                                              const int win_block_stride_y, const float* block_hists, 
+                                              PtrElemStepf descriptors)
 {
    // Get left top corner of the window in src
    const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + 
@ -449,9 +449,9 @@ __global__ void extract_descriptors_kernel(const int img_win_width, const int im
 }


-void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, 
-                         int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
-                         DevMem2Df descriptors)
+void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, 
+                            int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
+                            DevMem2Df descriptors)
 {
    const int nthreads = 256;

@ -464,9 +464,56 @@ void extract_descriptors(int win_height, int win_width, int block_stride_y, int

    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / 
                          block_stride_x;
-    extract_descriptors_kernel<nthreads><<<grid, threads>>>(
-        img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, 
-        block_hists, descriptors);
+    extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
+        img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
+    cudaSafeCall(cudaThreadSynchronize());
+}
+
+
+template <int nthreads>
+__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x, 
+                                              const int win_block_stride_y, const float* block_hists, 
+                                              PtrElemStepf descriptors)
+{
+    // Get left top corner of the window in src
+    const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + 
+                                       blockIdx.x * win_block_stride_x) * cblock_hist_size;
+
+    // Get left top corner of the window in dst
+    float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x);
+
+    // Copy elements from src to dst
+    for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
+    {
+        int block_idx = i / cblock_hist_size;
+        int idx_in_block = i - block_idx * cblock_hist_size;
+
+        int y = block_idx / cnblocks_win_x;
+        int x = block_idx - y * cnblocks_win_x;
+
+        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] 
+            = hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
+    }
+}
+
+
+void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, 
+                            int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
+                            DevMem2Df descriptors)
+{
+    const int nthreads = 256;
+
+    int win_block_stride_x = win_stride_x / block_stride_x;
+    int win_block_stride_y = win_stride_y / block_stride_y;
+    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
+    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
+    dim3 threads(nthreads, 1);
+    dim3 grid(img_win_width, img_win_height);
+
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / 
+                          block_stride_x;
+    extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
+        img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
    cudaSafeCall(cudaThreadSynchronize());
 }

--- a/modules/gpu/src/hog.cpp
+++ b/modules/gpu/src/hog.cpp
@ -50,11 +50,9 @@ size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); re
 double cv::gpu::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
 bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
 void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
-void cv::gpu::HOGDescriptor::computeGradient(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_nogpu(); }
 void cv::gpu::HOGDescriptor::detect(const GpuMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
 void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
-void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&) { throw_nogpu(); }
+void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_nogpu(); }
 std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
 std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_48x96() { throw_nogpu(); return std::vector<float>(); }
 std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_64x128() { throw_nogpu(); return std::vector<float>(); }
@ -78,9 +76,12 @@ void classify_hists(int win_height, int win_width, int block_stride_y,
                    int width, float* block_hists, float* coefs, float free_coef, 
                    float threshold, unsigned char* labels);

-void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, 
-                         int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
-                         cv::gpu::DevMem2Df descriptors);
+void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, 
+                            int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
+                            cv::gpu::DevMem2Df descriptors);
+void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, 
+                            int win_stride_y, int win_stride_x, int height, int width, float* block_hists, 
+                            cv::gpu::DevMem2Df descriptors);

 void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, 
                            float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);
@ -218,7 +219,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 }


-void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors)
+void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
 {
    CV_Assert(win_stride.width % block_stride.width == 0 &&
              win_stride.height % block_stride.height == 0);
@ -231,9 +232,21 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,

    descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F);

-    hog::extract_descriptors(win_size.height, win_size.width, block_stride.height, block_stride.width, 
-                             win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), 
-                             descriptors);
+    switch (descr_format)
+    {
+    case DESCR_FORMAT_ROW_BY_ROW:
+        hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, 
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), 
+                                    descriptors);
+        break;
+    case DESCR_FORMAT_COL_BY_COL:
+        hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, 
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), 
+                                    descriptors);
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "Unknown descriptor format");
+    }
 }


--- a/tests/gpu/src/hog.cpp
+++ b/tests/gpu/src/hog.cpp
@ -51,9 +51,9 @@ using namespace std;
    ts->set_failed_test_info(err); \
    return; }

-struct CV_GpuHogDetectionTest: public CvTest 
+struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor
 {
-    CV_GpuHogDetectionTest(): CvTest( "GPU-HOG-detect", "HOGDescriptorDetection" ) {}
+    CV_GpuHogDetectionTest(): CvTest("GPU-HOG-detect", "HOGDescriptorDetection") {}

    void run(int) 
    {       
@ -141,54 +141,53 @@ struct CV_GpuHogDetectionTest: public CvTest
    {
        cv::gpu::GpuMat d_img(img);

-        cv::gpu::HOGDescriptor hog;
-        hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+        setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
        //cpu detector may be updated soon
        //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());

        std::vector<cv::Point> locations;

        // Test detect
-        hog.detect(d_img, locations, 0);
+        detect(d_img, locations, 0);

 #ifdef DUMP
-        dump(hog.block_hists, locations);
+        dump(block_hists, locations);
 #else
-        compare(hog.block_hists, locations);
+        compare(block_hists, locations);
 #endif

        // Test detect on smaller image
        cv::gpu::GpuMat d_img2;
        cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols / 2, d_img.rows / 2)); 
-        hog.detect(d_img2, locations, 0);
+        detect(d_img2, locations, 0);

 #ifdef DUMP
-        dump(hog.block_hists, locations);
+        dump(block_hists, locations);
 #else
-        compare(hog.block_hists, locations);
+        compare(block_hists, locations);
 #endif

        // Test detect on greater image
        cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols * 2, d_img.rows * 2)); 
-        hog.detect(d_img2, locations, 0);
+        detect(d_img2, locations, 0);
        
 #ifdef DUMP
-        dump(hog.block_hists, locations);
+        dump(block_hists, locations);
 #else
-        compare(hog.block_hists, locations);
+        compare(block_hists, locations);
 #endif

        // Test detectMultiScale
        std::vector<cv::Rect> rects;
        size_t nrects;
-        hog.detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2);
+        detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2);

 #ifdef DUMP
        nrects = rects.size();
        f.write((char*)&nrects, sizeof(nrects));
        for (size_t i = 0; i < rects.size(); ++i)
            f.write((char*)&rects[i], sizeof(rects[i]));
-        dump(hog.block_hists, std::vector<cv::Point>());
+        dump(block_hists, std::vector<cv::Point>());
 #else
        f.read((char*)&nrects, sizeof(nrects));
        CHECK(nrects == rects.size(), CvTS::FAIL_INVALID_OUTPUT)
@ -198,7 +197,7 @@ struct CV_GpuHogDetectionTest: public CvTest
            f.read((char*)&rect, sizeof(rect));
            CHECK(rect == rects[i], CvTS::FAIL_INVALID_OUTPUT);
        }
-        compare(hog.block_hists, std::vector<cv::Point>());
+        compare(block_hists, std::vector<cv::Point>());
 #endif
    }

@ -211,9 +210,10 @@ struct CV_GpuHogDetectionTest: public CvTest
 } gpu_hog_detection_test;


-struct CV_GpuHogGetDescriptorsTest: public CvTest 
+struct CV_GpuHogGetDescriptorsTest: public CvTest, public cv::gpu::HOGDescriptor
 {
-    CV_GpuHogGetDescriptorsTest(): CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors") {}
+    CV_GpuHogGetDescriptorsTest(): 
+        CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors"), HOGDescriptor(cv::Size(64, 128)) {}

    void run(int)
    {
@ -228,12 +228,11 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
            cv::gpu::GpuMat d_img(img);

-            cv::Size win_size(64, 128);
-            cv::gpu::HOGDescriptor hog(win_size);

            // Convert train images into feature vectors (train table)
-            cv::gpu::GpuMat descriptors;
-            hog.getDescriptors(d_img, win_size, descriptors);
+            cv::gpu::GpuMat descriptors, descriptors_by_cols;
+            getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
+            getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);

            // Check size of the result train table
            wins_per_img_x = 3;
@ -245,6 +244,20 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
                                                    wins_per_img_x * wins_per_img_y);                                                
            CHECK(descriptors.size() == descr_size_expected, CvTS::FAIL_INVALID_OUTPUT);

+            // Check both formats of output descriptors are handled correctly
+            cv::Mat dr(descriptors);
+            cv::Mat dc(descriptors_by_cols);
+            for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
+            {
+                const float* l = dr.rowRange(i, i + 1).ptr<float>();
+                const float* r = dc.rowRange(i, i + 1).ptr<float>();
+                for (int y = 0; y < blocks_per_win_y; ++y)
+                    for (int x = 0; x < blocks_per_win_x; ++x)
+                        for (int k = 0; k < block_hist_size; ++k)
+                            CHECK(l[(y * blocks_per_win_x + x) * block_hist_size + k] == 
+                                  r[(x * blocks_per_win_y + y) * block_hist_size + k], CvTS::FAIL_INVALID_OUTPUT);
+            }
+
            /* Now we want to extract the same feature vectors, but from single images. NOTE: results will 
            be defferent, due to border values interpolation. Using of many small images is slower, however we 
            wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms 
@ -253,39 +266,39 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive1.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
            // Everything is fine with interpolation for left top subimage
-            CHECK(cv::norm(hog.block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT);
+            CHECK(cv::norm(block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT);

            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive2.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
-            compare_inner_parts(hog.block_hists, descriptors.rowRange(1, 2));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
+            compare_inner_parts(block_hists, descriptors.rowRange(1, 2));

            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative1.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
-            compare_inner_parts(hog.block_hists, descriptors.rowRange(2, 3));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
+            compare_inner_parts(block_hists, descriptors.rowRange(2, 3));

            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative2.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
-            compare_inner_parts(hog.block_hists, descriptors.rowRange(3, 4));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
+            compare_inner_parts(block_hists, descriptors.rowRange(3, 4));

            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive3.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
-            compare_inner_parts(hog.block_hists, descriptors.rowRange(4, 5));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
+            compare_inner_parts(block_hists, descriptors.rowRange(4, 5));

            img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative3.png");
            CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
            cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-            hog.computeBlockHistograms(cv::gpu::GpuMat(img));
-            compare_inner_parts(hog.block_hists, descriptors.rowRange(5, 6));
+            computeBlockHistograms(cv::gpu::GpuMat(img));
+            compare_inner_parts(block_hists, descriptors.rowRange(5, 6));
        }
        catch (const cv::Exception& e)
        {