From b52fea7fae6ec38096dcd57458e0ee6be87da996 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Mon, 1 Oct 2012 13:48:16 +0400 Subject: [PATCH] update soft cascade interface: - add class Detection in interface, - split sync- and async- versions, - add support for detecting at the specific scale. --- modules/gpu/include/opencv2/gpu/gpu.hpp | 26 ++++++++++++- modules/gpu/src/cuda/isf-sc.cu | 45 ++++++++++++++++++---- modules/gpu/src/softcascade.cpp | 50 ++++++++++++++++++++----- 3 files changed, 103 insertions(+), 18 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 5008e10275..f171ad904d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1537,6 +1537,18 @@ public: class CV_EXPORTS SoftCascade { public: + + struct CV_EXPORTS Detection + { + ushort x; + ushort y; + ushort w; + ushort h; + float confidence; + int kind; + + enum {PEDESTRIAN = 0}; + }; //! An empty cascade will be created. SoftCascade(); @@ -1559,9 +1571,19 @@ public: //! Param rois is a mask //! Param objects 4-channel matrix thet contain detected rectangles //! Param rejectfactor used for final object box computing - //! Param stream virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects, - int rejectfactor = 1, Stream stream = Stream::Null()); + int rejectfactor = 1, int specificScale = -1); + + //! detect specific objects on in the input frame for all scales computed flom minScale and maxscale values. + //! asynchronous version. + //! Param image is input frame for detector. Cascade will be applied to it. + //! Param rois is a mask + //! Param objects 4-channel matrix thet contain detected rectangles + //! Param rejectfactor used for final object box computing + //! Param ndet retrieves number of detections + //! Param stream wrapper for CUDA stream + virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects, + int rejectfactor, GpuMat& ndet, Stream stream); private: struct Filds; diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index f3c92cc6ab..3d9a1e10f9 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -105,7 +105,7 @@ namespace icf { float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y); const float expected_new_area = farea * relScale * relScale; - float approx = sarea / expected_new_area; + float approx = __fdividef(sarea, expected_new_area); dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea); @@ -198,12 +198,13 @@ namespace icf { // } __global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages, - const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr) + const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr, + const int downscales) { const int y = blockIdx.y * blockDim.y + threadIdx.y; const int x = blockIdx.x; - Level level = levels[blockIdx.z]; + Level level = levels[downscales + blockIdx.z]; if(x >= level.workRect.x || y >= level.workRect.y) return; @@ -236,7 +237,7 @@ namespace icf { dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact); dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]); dprintf("%d: computed score: %f\n",threadIdx.x, impact); - +#pragma unroll // scan on shuffl functions for (int i = 1; i < 32; i *= 2) { @@ -263,13 +264,13 @@ namespace icf { void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, - PtrStepSz objects, PtrStepSzi counter) + PtrStepSz objects, PtrStepSzi counter, const int downscales) { int fw = 160; int fh = 120; dim3 block(32, 8); - dim3 grid(fw, fh / 8, 47); + dim3 grid(fw, fh / 8, downscales); const Level* l = (const Level*)levels.ptr(); const Octave* oct = ((const Octave*)octaves.ptr()); @@ -283,8 +284,38 @@ namespace icf { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); - test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr); + test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, 0); + cudaSafeCall( cudaGetLastError()); + grid = dim3(fw, fh / 8, 47 - downscales); + test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, downscales); + cudaSafeCall( cudaGetLastError()); + cudaSafeCall( cudaDeviceSynchronize()); + } + + void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, + const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz objects, + PtrStepSzi counter) + { + int fw = 160; + int fh = 120; + + dim3 block(32, 8); + dim3 grid(fw, fh / 8, 1); + + const Level* l = (const Level*)levels.ptr(); + const Octave* oct = ((const Octave*)octaves.ptr()); + const float* st = (const float*)stages.ptr(); + const Node* nd = (const Node*)nodes.ptr(); + const float* lf = (const float*)leaves.ptr(); + uint* ctr = (uint*)counter.ptr(); + Detection* det = (Detection*)objects.ptr(); + uint max_det = objects.cols / sizeof(Detection); + + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); + + test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, scale); cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaDeviceSynchronize()); } diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index 320fbb3437..fd94909cf0 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); } cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); } cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); } bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; } -void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();} +void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, int) { throw_nogpu();} +void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream) +{ + throw_nogpu(); +} #else @@ -60,6 +64,9 @@ namespace icf { void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, const int fw, const int fh, const int bins); void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, + const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz objects, + PtrStepSzi counter, const int downscales); + void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz objects, PtrStepSzi counter); } @@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds int origObjWidth; int origObjHeight; + int downscales; + GpuMat octaves; GpuMat stages; GpuMat nodes; @@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds FRAME_WIDTH = 640, FRAME_HEIGHT = 480, TOTAL_SCALES = 55, -// CLASSIFIERS = 5, ORIG_OBJECT_WIDTH = 64, ORIG_OBJECT_HEIGHT = 128, HOG_BINS = 6, @@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const { cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); - device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter); + device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales); + } + + void detectAtScale(int scale, cv::gpu::GpuMat objects, cudaStream_t stream) const + { + cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); + device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects, + detCounter); } private: @@ -160,7 +175,7 @@ private: } }; -inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs) +bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs) { using namespace device::icf; minScale = mins; @@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector