update soft cascade interface: - add class Detection in interface, - split sync- and async- versions, - add support for detecting at the specific scale.

2024-12-02 07:39:57 +08:00 · 2012-10-01 13:48:16 +04:00 · 2012-10-01 13:48:16 +04:00 · b52fea7fae
commit b52fea7fae
parent 612a258506
3 changed files with 103 additions and 18 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1537,6 +1537,18 @@ public:
 class CV_EXPORTS SoftCascade
 {
 public:
+
+    struct CV_EXPORTS Detection
+    {
+        ushort x;
+        ushort y;
+        ushort w;
+        ushort h;
+        float confidence;
+        int kind;
+
+        enum {PEDESTRIAN = 0};
+    };
    //! An empty cascade will be created.
    SoftCascade();

@ -1559,9 +1571,19 @@ public:
    //! Param rois is a mask
    //! Param objects 4-channel matrix thet contain detected rectangles
    //! Param rejectfactor used for final object box computing
-    //! Param stream
    virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
-    int rejectfactor = 1, Stream stream = Stream::Null());
+    int rejectfactor = 1, int specificScale = -1);
+
+    //! detect specific objects on in the input frame for all scales computed flom minScale and maxscale values.
+    //! asynchronous version.
+    //! Param image is input frame for detector. Cascade will be applied to it.
+    //! Param rois is a mask
+    //! Param objects 4-channel matrix thet contain detected rectangles
+    //! Param rejectfactor used for final object box computing
+    //! Param ndet retrieves number of detections
+    //! Param stream wrapper for CUDA stream
+    virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
+    int rejectfactor, GpuMat& ndet, Stream stream);

 private:
    struct Filds;
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@ -105,7 +105,7 @@ namespace icf {
        float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);

        const float expected_new_area = farea * relScale * relScale;
-        float approx =  sarea / expected_new_area;
+        float approx = __fdividef(sarea, expected_new_area);

        dprintf("%d: new rect: %d box %d %d %d %d  rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
        scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
@ -198,12 +198,13 @@ namespace icf {
 //     }

    __global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
-        const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr)
+        const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
+        const int downscales)
    {
        const int y = blockIdx.y * blockDim.y + threadIdx.y;
        const int x = blockIdx.x;

-        Level level = levels[blockIdx.z];
+        Level level = levels[downscales + blockIdx.z];

        if(x >= level.workRect.x || y >= level.workRect.y) return;

@ -236,7 +237,7 @@ namespace icf {
            dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact);
            dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]);
            dprintf("%d: computed  score: %f\n",threadIdx.x, impact);
-
+#pragma unroll
            // scan on shuffl functions
            for (int i = 1; i < 32; i *= 2)
            {
@ -263,13 +264,13 @@ namespace icf {

    void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
                const PtrStepSzb& nodes,  const PtrStepSzf& leaves,  const PtrStepSzi& hogluv,
-                PtrStepSz<uchar4> objects, PtrStepSzi counter)
+                PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
    {
        int fw = 160;
        int fh = 120;

        dim3 block(32, 8);
-        dim3 grid(fw, fh / 8, 47);
+        dim3 grid(fw, fh / 8, downscales);

        const Level* l = (const Level*)levels.ptr();
        const Octave* oct = ((const Octave*)octaves.ptr());
@ -283,8 +284,38 @@ namespace icf {
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
        cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));

-        test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr);
+        test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
+        cudaSafeCall( cudaGetLastError());

+        grid = dim3(fw, fh / 8, 47 - downscales);
+        test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
+        cudaSafeCall( cudaGetLastError());
+        cudaSafeCall( cudaDeviceSynchronize());
+    }
+
+    void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
+        const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
+        PtrStepSzi counter)
+    {
+        int fw = 160;
+        int fh = 120;
+
+        dim3 block(32, 8);
+        dim3 grid(fw, fh / 8, 1);
+
+        const Level* l = (const Level*)levels.ptr();
+        const Octave* oct = ((const Octave*)octaves.ptr());
+        const float* st = (const float*)stages.ptr();
+        const Node* nd = (const Node*)nodes.ptr();
+        const float* lf = (const float*)leaves.ptr();
+        uint* ctr = (uint*)counter.ptr();
+        Detection* det = (Detection*)objects.ptr();
+        uint max_det = objects.cols / sizeof(Detection);
+
+        cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
+        cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
+
+        test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
        cudaSafeCall( cudaGetLastError());
        cudaSafeCall( cudaDeviceSynchronize());
    }
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
 cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
 cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
 bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
-void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();}
+void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, int) { throw_nogpu();}
+void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
+{
+    throw_nogpu();
+}

 #else

@ -60,6 +64,9 @@ namespace icf {
    void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
        const int fw, const int fh, const int bins);
    void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
+        const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
+        PtrStepSzi counter, const int downscales);
+    void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
        const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
        PtrStepSzi counter);
 }
@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds
    int origObjWidth;
    int origObjHeight;

+    int downscales;
+
    GpuMat octaves;
    GpuMat stages;
    GpuMat nodes;
@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds
        FRAME_WIDTH        = 640,
        FRAME_HEIGHT       = 480,
        TOTAL_SCALES       = 55,
-//         CLASSIFIERS        = 5,
        ORIG_OBJECT_WIDTH  = 64,
        ORIG_OBJECT_HEIGHT = 128,
        HOG_BINS           = 6,
@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds
    void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
    {
        cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
-        device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter);
+        device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales);
+    }
+
+    void detectAtScale(int scale, cv::gpu::GpuMat objects, cudaStream_t stream) const
+    {
+        cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
+        device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects,
+            detCounter);
    }

 private:
@ -160,7 +175,7 @@ private:
    }
 };

-inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
+bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
 {
    using namespace device::icf;
    minScale = mins;
@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
    float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);

    float scale = minScale;
+    downscales = 0;
    for (int sc = 0; sc < nscales; ++sc)
    {
        int width  = ::std::max(0.0f, frameW - (origObjWidth  * scale));
@ -366,7 +382,10 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
        if (!width || !height)
            break;
        else
+        {
            vlevels.push_back(level);
+            if (octs[fit].scale < 1) ++downscales;
+        }

        if (::fabs(scale - maxScale) < FLT_EPSILON) break;
        scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
@ -424,8 +443,11 @@ namespace {
        return s;
    }
 }
+
+//================================== synchronous version ============================================================//
+
 void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
-                                GpuMat& objects, const int /*rejectfactor*/, Stream s)
+                                GpuMat& objects, const int /*rejectfactor*/, int specificScale)
 {
    // only color images are supperted
    CV_Assert(colored.type() == CV_8UC3);
@ -513,11 +535,21 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
    }
 #endif

-    cudaStream_t stream = StreamAccessor::getStream(s);
-    flds.detect(objects, stream);
+    if (specificScale == -1)
+        flds.detect(objects, 0);
+    else
+        flds.detectAtScale(specificScale, objects, 0);

-        //     cv::Mat out(flds.detCounter);
-        // std::cout << out << std::endl;
+    cv::Mat out(flds.detCounter);
+    int ndetections = *(out.data);
+
+    objects = GpuMat(objects, cv::Rect(0, 0, ndetections * sizeof(Detection), 1));
 }

+void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
+{
+    // cudaStream_t stream = StreamAccessor::getStream(s);
+}
+
+
 #endif