mirror of
https://github.com/opencv/opencv.git
synced 2024-12-02 07:39:57 +08:00
update soft cascade interface: - add class Detection in interface, - split sync- and async- versions, - add support for detecting at the specific scale.
This commit is contained in:
parent
612a258506
commit
b52fea7fae
@ -1537,6 +1537,18 @@ public:
|
||||
class CV_EXPORTS SoftCascade
|
||||
{
|
||||
public:
|
||||
|
||||
struct CV_EXPORTS Detection
|
||||
{
|
||||
ushort x;
|
||||
ushort y;
|
||||
ushort w;
|
||||
ushort h;
|
||||
float confidence;
|
||||
int kind;
|
||||
|
||||
enum {PEDESTRIAN = 0};
|
||||
};
|
||||
//! An empty cascade will be created.
|
||||
SoftCascade();
|
||||
|
||||
@ -1559,9 +1571,19 @@ public:
|
||||
//! Param rois is a mask
|
||||
//! Param objects 4-channel matrix thet contain detected rectangles
|
||||
//! Param rejectfactor used for final object box computing
|
||||
//! Param stream
|
||||
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
|
||||
int rejectfactor = 1, Stream stream = Stream::Null());
|
||||
int rejectfactor = 1, int specificScale = -1);
|
||||
|
||||
//! detect specific objects on in the input frame for all scales computed flom minScale and maxscale values.
|
||||
//! asynchronous version.
|
||||
//! Param image is input frame for detector. Cascade will be applied to it.
|
||||
//! Param rois is a mask
|
||||
//! Param objects 4-channel matrix thet contain detected rectangles
|
||||
//! Param rejectfactor used for final object box computing
|
||||
//! Param ndet retrieves number of detections
|
||||
//! Param stream wrapper for CUDA stream
|
||||
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
|
||||
int rejectfactor, GpuMat& ndet, Stream stream);
|
||||
|
||||
private:
|
||||
struct Filds;
|
||||
|
@ -105,7 +105,7 @@ namespace icf {
|
||||
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
||||
|
||||
const float expected_new_area = farea * relScale * relScale;
|
||||
float approx = sarea / expected_new_area;
|
||||
float approx = __fdividef(sarea, expected_new_area);
|
||||
|
||||
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
|
||||
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
|
||||
@ -198,12 +198,13 @@ namespace icf {
|
||||
// }
|
||||
|
||||
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
|
||||
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr)
|
||||
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
|
||||
const int downscales)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = blockIdx.x;
|
||||
|
||||
Level level = levels[blockIdx.z];
|
||||
Level level = levels[downscales + blockIdx.z];
|
||||
|
||||
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
||||
|
||||
@ -236,7 +237,7 @@ namespace icf {
|
||||
dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact);
|
||||
dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]);
|
||||
dprintf("%d: computed score: %f\n",threadIdx.x, impact);
|
||||
|
||||
#pragma unroll
|
||||
// scan on shuffl functions
|
||||
for (int i = 1; i < 32; i *= 2)
|
||||
{
|
||||
@ -263,13 +264,13 @@ namespace icf {
|
||||
|
||||
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter)
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
|
||||
{
|
||||
int fw = 160;
|
||||
int fh = 120;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(fw, fh / 8, 47);
|
||||
dim3 grid(fw, fh / 8, downscales);
|
||||
|
||||
const Level* l = (const Level*)levels.ptr();
|
||||
const Octave* oct = ((const Octave*)octaves.ptr());
|
||||
@ -283,8 +284,38 @@ namespace icf {
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
|
||||
|
||||
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr);
|
||||
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
|
||||
grid = dim3(fw, fh / 8, 47 - downscales);
|
||||
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
cudaSafeCall( cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
|
||||
PtrStepSzi counter)
|
||||
{
|
||||
int fw = 160;
|
||||
int fh = 120;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(fw, fh / 8, 1);
|
||||
|
||||
const Level* l = (const Level*)levels.ptr();
|
||||
const Octave* oct = ((const Octave*)octaves.ptr());
|
||||
const float* st = (const float*)stages.ptr();
|
||||
const Node* nd = (const Node*)nodes.ptr();
|
||||
const float* lf = (const float*)leaves.ptr();
|
||||
uint* ctr = (uint*)counter.ptr();
|
||||
Detection* det = (Detection*)objects.ptr();
|
||||
uint max_det = objects.cols / sizeof(Detection);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
|
||||
|
||||
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
cudaSafeCall( cudaDeviceSynchronize());
|
||||
}
|
||||
|
@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
|
||||
cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
|
||||
cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
|
||||
bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
|
||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();}
|
||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, int) { throw_nogpu();}
|
||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
@ -60,6 +64,9 @@ namespace icf {
|
||||
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||
const int fw, const int fh, const int bins);
|
||||
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
|
||||
PtrStepSzi counter, const int downscales);
|
||||
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
|
||||
PtrStepSzi counter);
|
||||
}
|
||||
@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds
|
||||
int origObjWidth;
|
||||
int origObjHeight;
|
||||
|
||||
int downscales;
|
||||
|
||||
GpuMat octaves;
|
||||
GpuMat stages;
|
||||
GpuMat nodes;
|
||||
@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds
|
||||
FRAME_WIDTH = 640,
|
||||
FRAME_HEIGHT = 480,
|
||||
TOTAL_SCALES = 55,
|
||||
// CLASSIFIERS = 5,
|
||||
ORIG_OBJECT_WIDTH = 64,
|
||||
ORIG_OBJECT_HEIGHT = 128,
|
||||
HOG_BINS = 6,
|
||||
@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds
|
||||
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
|
||||
{
|
||||
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
|
||||
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter);
|
||||
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales);
|
||||
}
|
||||
|
||||
void detectAtScale(int scale, cv::gpu::GpuMat objects, cudaStream_t stream) const
|
||||
{
|
||||
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
|
||||
device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects,
|
||||
detCounter);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -160,7 +175,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
|
||||
bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
|
||||
{
|
||||
using namespace device::icf;
|
||||
minScale = mins;
|
||||
@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
|
||||
float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);
|
||||
|
||||
float scale = minScale;
|
||||
downscales = 0;
|
||||
for (int sc = 0; sc < nscales; ++sc)
|
||||
{
|
||||
int width = ::std::max(0.0f, frameW - (origObjWidth * scale));
|
||||
@ -366,7 +382,10 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
|
||||
if (!width || !height)
|
||||
break;
|
||||
else
|
||||
{
|
||||
vlevels.push_back(level);
|
||||
if (octs[fit].scale < 1) ++downscales;
|
||||
}
|
||||
|
||||
if (::fabs(scale - maxScale) < FLT_EPSILON) break;
|
||||
scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
|
||||
@ -424,8 +443,11 @@ namespace {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
//================================== synchronous version ============================================================//
|
||||
|
||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
|
||||
GpuMat& objects, const int /*rejectfactor*/, Stream s)
|
||||
GpuMat& objects, const int /*rejectfactor*/, int specificScale)
|
||||
{
|
||||
// only color images are supperted
|
||||
CV_Assert(colored.type() == CV_8UC3);
|
||||
@ -513,11 +535,21 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
|
||||
}
|
||||
#endif
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
flds.detect(objects, stream);
|
||||
if (specificScale == -1)
|
||||
flds.detect(objects, 0);
|
||||
else
|
||||
flds.detectAtScale(specificScale, objects, 0);
|
||||
|
||||
// cv::Mat out(flds.detCounter);
|
||||
// std::cout << out << std::endl;
|
||||
cv::Mat out(flds.detCounter);
|
||||
int ndetections = *(out.data);
|
||||
|
||||
objects = GpuMat(objects, cv::Rect(0, 0, ndetections * sizeof(Detection), 1));
|
||||
}
|
||||
|
||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
|
||||
{
|
||||
// cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user