2018-03-02 17:04:39 +08:00
# include <fstream>
# include <sstream>
2018-03-03 21:43:21 +08:00
# include <opencv2/dnn.hpp>
2018-03-04 00:29:37 +08:00
# include <opencv2/imgproc.hpp>
# include <opencv2/highgui.hpp>
2018-03-03 21:43:21 +08:00
2024-01-19 21:53:08 +08:00
# if defined(HAVE_THREADS)
2021-10-19 17:28:12 +08:00
# define USE_THREADS 1
# endif
# ifdef USE_THREADS
2019-05-01 19:51:12 +08:00
# include <mutex>
2019-05-14 22:43:48 +08:00
# include <thread>
# include <queue>
# endif
2018-09-20 22:59:04 +08:00
# include "common.hpp"
2023-10-13 21:53:18 +08:00
std : : string param_keys =
2018-03-02 17:04:39 +08:00
" { help h | | Print help message. } "
2018-09-20 22:59:04 +08:00
" { @alias | | An alias name of model to extract preprocessing parameters from models.yml file. } "
" { zoo | models.yml | An optional path to file with preprocessing parameters } "
2018-05-08 12:07:23 +08:00
" { device | 0 | camera device number. } "
" { input i | | Path to input image or video file. Skip this argument to capture frames from a camera. } "
2018-03-02 17:04:39 +08:00
" { framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. } "
" { classes | | Optional path to a text file with names of classes to label detected objects. } "
" { thr | .5 | Confidence threshold. } "
2018-07-12 17:06:53 +08:00
" { nms | .4 | Non-maximum suppression threshold. } "
2019-05-14 22:43:48 +08:00
" { async | 0 | Number of asynchronous forwards at the same time. "
" Choose 0 for synchronous mode } " ;
2023-10-13 21:53:18 +08:00
std : : string backend_keys = cv : : format (
" { backend | 0 | Choose one of computation backends: "
" %d: automatically (by default), "
" %d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" %d: OpenCV implementation, "
" %d: VKCOM, "
" %d: CUDA } " , cv : : dnn : : DNN_BACKEND_DEFAULT , cv : : dnn : : DNN_BACKEND_INFERENCE_ENGINE , cv : : dnn : : DNN_BACKEND_OPENCV , cv : : dnn : : DNN_BACKEND_VKCOM , cv : : dnn : : DNN_BACKEND_CUDA ) ;
std : : string target_keys = cv : : format (
" { target | 0 | Choose one of target computation devices: "
" %d: CPU target (by default), "
" %d: OpenCL, "
" %d: OpenCL fp16 (half-float precision), "
" %d: VPU, "
" %d: Vulkan, "
" %d: CUDA, "
" %d: CUDA fp16 (half-float preprocess) } " , cv : : dnn : : DNN_TARGET_CPU , cv : : dnn : : DNN_TARGET_OPENCL , cv : : dnn : : DNN_TARGET_OPENCL_FP16 , cv : : dnn : : DNN_TARGET_MYRIAD , cv : : dnn : : DNN_TARGET_VULKAN , cv : : dnn : : DNN_TARGET_CUDA , cv : : dnn : : DNN_TARGET_CUDA_FP16 ) ;
std : : string keys = param_keys + backend_keys + target_keys ;
2018-03-02 17:04:39 +08:00
using namespace cv ;
using namespace dnn ;
2018-06-28 14:09:11 +08:00
float confThreshold , nmsThreshold ;
2018-03-02 17:04:39 +08:00
std : : vector < std : : string > classes ;
2019-05-14 22:43:48 +08:00
inline void preprocess ( const Mat & frame , Net & net , Size inpSize , float scale ,
const Scalar & mean , bool swapRB ) ;
2020-05-25 20:34:11 +08:00
void postprocess ( Mat & frame , const std : : vector < Mat > & out , Net & net , int backend ) ;
2018-03-02 17:04:39 +08:00
void drawPred ( int classId , float conf , int left , int top , int right , int bottom , Mat & frame ) ;
void callback ( int pos , void * userdata ) ;
2021-10-19 17:28:12 +08:00
# ifdef USE_THREADS
2019-05-14 22:43:48 +08:00
template < typename T >
class QueueFPS : public std : : queue < T >
{
public :
QueueFPS ( ) : counter ( 0 ) { }
void push ( const T & entry )
{
std : : lock_guard < std : : mutex > lock ( mutex ) ;
std : : queue < T > : : push ( entry ) ;
counter + = 1 ;
if ( counter = = 1 )
{
// Start counting from a second frame (warmup).
tm . reset ( ) ;
tm . start ( ) ;
}
}
T get ( )
{
std : : lock_guard < std : : mutex > lock ( mutex ) ;
T entry = this - > front ( ) ;
this - > pop ( ) ;
return entry ;
}
float getFPS ( )
{
tm . stop ( ) ;
double fps = counter / tm . getTimeSec ( ) ;
tm . start ( ) ;
return static_cast < float > ( fps ) ;
}
void clear ( )
{
std : : lock_guard < std : : mutex > lock ( mutex ) ;
while ( ! this - > empty ( ) )
this - > pop ( ) ;
}
unsigned int counter ;
private :
TickMeter tm ;
std : : mutex mutex ;
} ;
2021-10-19 17:28:12 +08:00
# endif // USE_THREADS
2018-04-13 23:53:12 +08:00
2018-03-02 17:04:39 +08:00
int main ( int argc , char * * argv )
{
CommandLineParser parser ( argc , argv , keys ) ;
2018-09-20 22:59:04 +08:00
const std : : string modelName = parser . get < String > ( " @alias " ) ;
const std : : string zooFile = parser . get < String > ( " zoo " ) ;
keys + = genPreprocArguments ( modelName , zooFile ) ;
parser = CommandLineParser ( argc , argv , keys ) ;
2018-03-02 17:04:39 +08:00
parser . about ( " Use this script to run object detection deep learning networks using OpenCV. " ) ;
if ( argc = = 1 | | parser . has ( " help " ) )
{
parser . printMessage ( ) ;
return 0 ;
}
confThreshold = parser . get < float > ( " thr " ) ;
2018-06-28 14:09:11 +08:00
nmsThreshold = parser . get < float > ( " nms " ) ;
2018-03-02 17:04:39 +08:00
float scale = parser . get < float > ( " scale " ) ;
2018-03-07 00:29:23 +08:00
Scalar mean = parser . get < Scalar > ( " mean " ) ;
2018-03-02 17:04:39 +08:00
bool swapRB = parser . get < bool > ( " rgb " ) ;
int inpWidth = parser . get < int > ( " width " ) ;
int inpHeight = parser . get < int > ( " height " ) ;
2019-10-22 03:33:18 +08:00
size_t asyncNumReq = parser . get < int > ( " async " ) ;
2018-09-20 22:59:04 +08:00
CV_Assert ( parser . has ( " model " ) ) ;
std : : string modelPath = findFile ( parser . get < String > ( " model " ) ) ;
std : : string configPath = findFile ( parser . get < String > ( " config " ) ) ;
2018-03-02 17:04:39 +08:00
// Open file with classes names.
if ( parser . has ( " classes " ) )
{
std : : string file = parser . get < String > ( " classes " ) ;
std : : ifstream ifs ( file . c_str ( ) ) ;
if ( ! ifs . is_open ( ) )
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
std : : string line ;
2018-03-03 21:43:21 +08:00
while ( std : : getline ( ifs , line ) )
2018-03-02 17:04:39 +08:00
{
classes . push_back ( line ) ;
}
}
// Load a model.
2018-09-20 22:59:04 +08:00
Net net = readNet ( modelPath , configPath , parser . get < String > ( " framework " ) ) ;
2020-05-25 20:34:11 +08:00
int backend = parser . get < int > ( " backend " ) ;
net . setPreferableBackend ( backend ) ;
2018-03-03 21:43:21 +08:00
net . setPreferableTarget ( parser . get < int > ( " target " ) ) ;
2018-09-25 23:10:45 +08:00
std : : vector < String > outNames = net . getUnconnectedOutLayersNames ( ) ;
2018-03-02 17:04:39 +08:00
// Create a window
static const std : : string kWinName = " Deep learning object detection in OpenCV " ;
namedWindow ( kWinName , WINDOW_NORMAL ) ;
2018-03-04 00:29:37 +08:00
int initialConf = ( int ) ( confThreshold * 100 ) ;
2018-03-03 21:43:21 +08:00
createTrackbar ( " Confidence threshold, % " , kWinName , & initialConf , 99 , callback ) ;
2018-03-02 17:04:39 +08:00
// Open a video file or an image file or a camera stream.
VideoCapture cap ;
if ( parser . has ( " input " ) )
cap . open ( parser . get < String > ( " input " ) ) ;
else
2018-05-08 12:07:23 +08:00
cap . open ( parser . get < int > ( " device " ) ) ;
2018-03-02 17:04:39 +08:00
2021-10-19 17:28:12 +08:00
# ifdef USE_THREADS
2019-05-14 22:43:48 +08:00
bool process = true ;
// Frames capturing thread
QueueFPS < Mat > framesQueue ;
std : : thread framesThread ( [ & ] ( ) {
Mat frame ;
while ( process )
{
cap > > frame ;
if ( ! frame . empty ( ) )
framesQueue . push ( frame . clone ( ) ) ;
else
break ;
}
} ) ;
// Frames processing thread
QueueFPS < Mat > processedFramesQueue ;
QueueFPS < std : : vector < Mat > > predictionsQueue ;
std : : thread processingThread ( [ & ] ( ) {
2019-05-01 19:51:12 +08:00
std : : queue < AsyncArray > futureOutputs ;
2019-05-14 22:43:48 +08:00
Mat blob ;
while ( process )
{
// Get a next frame
Mat frame ;
{
if ( ! framesQueue . empty ( ) )
{
frame = framesQueue . get ( ) ;
2019-10-22 03:33:18 +08:00
if ( asyncNumReq )
2019-05-14 22:43:48 +08:00
{
2019-10-22 03:33:18 +08:00
if ( futureOutputs . size ( ) = = asyncNumReq )
2019-05-14 22:43:48 +08:00
frame = Mat ( ) ;
}
else
framesQueue . clear ( ) ; // Skip the rest of frames
}
}
// Process the frame
if ( ! frame . empty ( ) )
{
preprocess ( frame , net , Size ( inpWidth , inpHeight ) , scale , mean , swapRB ) ;
processedFramesQueue . push ( frame ) ;
2019-10-22 03:33:18 +08:00
if ( asyncNumReq )
2019-05-14 22:43:48 +08:00
{
futureOutputs . push ( net . forwardAsync ( ) ) ;
}
else
{
std : : vector < Mat > outs ;
net . forward ( outs , outNames ) ;
predictionsQueue . push ( outs ) ;
}
}
while ( ! futureOutputs . empty ( ) & &
2019-05-01 19:51:12 +08:00
futureOutputs . front ( ) . wait_for ( std : : chrono : : seconds ( 0 ) ) )
2019-05-14 22:43:48 +08:00
{
2019-05-01 19:51:12 +08:00
AsyncArray async_out = futureOutputs . front ( ) ;
2019-05-14 22:43:48 +08:00
futureOutputs . pop ( ) ;
2019-05-01 19:51:12 +08:00
Mat out ;
async_out . get ( out ) ;
predictionsQueue . push ( { out } ) ;
2019-05-14 22:43:48 +08:00
}
}
} ) ;
// Postprocessing and rendering loop
while ( waitKey ( 1 ) < 0 )
{
if ( predictionsQueue . empty ( ) )
continue ;
std : : vector < Mat > outs = predictionsQueue . get ( ) ;
Mat frame = processedFramesQueue . get ( ) ;
2020-05-25 20:34:11 +08:00
postprocess ( frame , outs , net , backend ) ;
2019-05-14 22:43:48 +08:00
if ( predictionsQueue . counter > 1 )
{
std : : string label = format ( " Camera: %.2f FPS " , framesQueue . getFPS ( ) ) ;
putText ( frame , label , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
label = format ( " Network: %.2f FPS " , predictionsQueue . getFPS ( ) ) ;
putText ( frame , label , Point ( 0 , 30 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
label = format ( " Skipped frames: %d " , framesQueue . counter - predictionsQueue . counter ) ;
putText ( frame , label , Point ( 0 , 45 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
}
imshow ( kWinName , frame ) ;
}
process = false ;
framesThread . join ( ) ;
processingThread . join ( ) ;
2021-10-19 17:28:12 +08:00
# else // USE_THREADS
2019-10-22 03:33:18 +08:00
if ( asyncNumReq )
2019-05-14 22:43:48 +08:00
CV_Error ( Error : : StsNotImplemented , " Asynchronous forward is supported only with Inference Engine backend. " ) ;
2018-03-02 17:04:39 +08:00
// Process frames.
Mat frame , blob ;
while ( waitKey ( 1 ) < 0 )
{
cap > > frame ;
if ( frame . empty ( ) )
{
waitKey ( ) ;
break ;
}
2019-05-14 22:43:48 +08:00
preprocess ( frame , net , Size ( inpWidth , inpHeight ) , scale , mean , swapRB ) ;
2018-03-02 17:04:39 +08:00
2018-04-13 23:53:12 +08:00
std : : vector < Mat > outs ;
2018-09-25 23:10:45 +08:00
net . forward ( outs , outNames ) ;
2018-03-02 17:04:39 +08:00
2020-05-25 20:34:11 +08:00
postprocess ( frame , outs , net , backend ) ;
2018-03-02 17:04:39 +08:00
// Put efficiency information.
std : : vector < double > layersTimes ;
2018-03-04 00:29:37 +08:00
double freq = getTickFrequency ( ) / 1000 ;
double t = net . getPerfProfile ( layersTimes ) / freq ;
std : : string label = format ( " Inference time: %.2f ms " , t ) ;
2018-03-03 21:43:21 +08:00
putText ( frame , label , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
2018-03-02 17:04:39 +08:00
imshow ( kWinName , frame ) ;
}
2021-10-19 17:28:12 +08:00
# endif // USE_THREADS
2018-03-02 17:04:39 +08:00
return 0 ;
}
2019-05-14 22:43:48 +08:00
inline void preprocess ( const Mat & frame , Net & net , Size inpSize , float scale ,
const Scalar & mean , bool swapRB )
{
static Mat blob ;
// Create a 4D blob from a frame.
if ( inpSize . width < = 0 ) inpSize . width = frame . cols ;
if ( inpSize . height < = 0 ) inpSize . height = frame . rows ;
blobFromImage ( frame , blob , 1.0 , inpSize , Scalar ( ) , swapRB , false , CV_8U ) ;
// Run a model.
net . setInput ( blob , " " , scale , mean ) ;
if ( net . getLayer ( 0 ) - > outputNameToIndex ( " im_info " ) ! = - 1 ) // Faster-RCNN or R-FCN
{
resize ( frame , frame , inpSize ) ;
Mat imInfo = ( Mat_ < float > ( 1 , 3 ) < < inpSize . height , inpSize . width , 1.6f ) ;
net . setInput ( imInfo , " im_info " ) ;
}
}
2020-05-25 20:34:11 +08:00
void postprocess ( Mat & frame , const std : : vector < Mat > & outs , Net & net , int backend )
2018-03-02 17:04:39 +08:00
{
static std : : vector < int > outLayers = net . getUnconnectedOutLayers ( ) ;
static std : : string outLayerType = net . getLayer ( outLayers [ 0 ] ) - > type ;
2018-06-28 14:09:11 +08:00
std : : vector < int > classIds ;
std : : vector < float > confidences ;
std : : vector < Rect > boxes ;
2019-01-23 00:01:48 +08:00
if ( outLayerType = = " DetectionOutput " )
2018-03-02 17:04:39 +08:00
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
2019-01-23 00:01:48 +08:00
CV_Assert ( outs . size ( ) > 0 ) ;
for ( size_t k = 0 ; k < outs . size ( ) ; k + + )
2018-03-02 17:04:39 +08:00
{
2019-01-23 00:01:48 +08:00
float * data = ( float * ) outs [ k ] . data ;
for ( size_t i = 0 ; i < outs [ k ] . total ( ) ; i + = 7 )
2018-03-02 17:04:39 +08:00
{
2019-01-23 00:01:48 +08:00
float confidence = data [ i + 2 ] ;
if ( confidence > confThreshold )
{
int left = ( int ) data [ i + 3 ] ;
int top = ( int ) data [ i + 4 ] ;
int right = ( int ) data [ i + 5 ] ;
int bottom = ( int ) data [ i + 6 ] ;
int width = right - left + 1 ;
int height = bottom - top + 1 ;
2019-09-13 16:50:50 +08:00
if ( width < = 2 | | height < = 2 )
2019-01-23 00:01:48 +08:00
{
left = ( int ) ( data [ i + 3 ] * frame . cols ) ;
top = ( int ) ( data [ i + 4 ] * frame . rows ) ;
right = ( int ) ( data [ i + 5 ] * frame . cols ) ;
bottom = ( int ) ( data [ i + 6 ] * frame . rows ) ;
width = right - left + 1 ;
height = bottom - top + 1 ;
}
classIds . push_back ( ( int ) ( data [ i + 1 ] ) - 1 ) ; // Skip 0th background class id.
boxes . push_back ( Rect ( left , top , width , height ) ) ;
confidences . push_back ( confidence ) ;
}
2018-03-02 17:04:39 +08:00
}
}
}
else if ( outLayerType = = " Region " )
{
2018-04-13 23:53:12 +08:00
for ( size_t i = 0 ; i < outs . size ( ) ; + + i )
2018-03-02 17:04:39 +08:00
{
2018-04-13 23:53:12 +08:00
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float * data = ( float * ) outs [ i ] . data ;
for ( int j = 0 ; j < outs [ i ] . rows ; + + j , data + = outs [ i ] . cols )
2018-03-02 17:04:39 +08:00
{
2018-04-13 23:53:12 +08:00
Mat scores = outs [ i ] . row ( j ) . colRange ( 5 , outs [ i ] . cols ) ;
Point classIdPoint ;
double confidence ;
minMaxLoc ( scores , 0 , & confidence , 0 , & classIdPoint ) ;
if ( confidence > confThreshold )
{
int centerX = ( int ) ( data [ 0 ] * frame . cols ) ;
int centerY = ( int ) ( data [ 1 ] * frame . rows ) ;
int width = ( int ) ( data [ 2 ] * frame . cols ) ;
int height = ( int ) ( data [ 3 ] * frame . rows ) ;
int left = centerX - width / 2 ;
int top = centerY - height / 2 ;
classIds . push_back ( classIdPoint . x ) ;
confidences . push_back ( ( float ) confidence ) ;
boxes . push_back ( Rect ( left , top , width , height ) ) ;
}
2018-03-02 17:04:39 +08:00
}
}
}
else
CV_Error ( Error : : StsNotImplemented , " Unknown output layer type: " + outLayerType ) ;
2018-06-28 14:09:11 +08:00
2020-05-25 20:34:11 +08:00
// NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
// or NMS is required if number of outputs > 1
if ( outLayers . size ( ) > 1 | | ( outLayerType = = " Region " & & backend ! = DNN_BACKEND_OPENCV ) )
{
std : : map < int , std : : vector < size_t > > class2indices ;
for ( size_t i = 0 ; i < classIds . size ( ) ; i + + )
{
if ( confidences [ i ] > = confThreshold )
{
class2indices [ classIds [ i ] ] . push_back ( i ) ;
}
}
std : : vector < Rect > nmsBoxes ;
std : : vector < float > nmsConfidences ;
std : : vector < int > nmsClassIds ;
for ( std : : map < int , std : : vector < size_t > > : : iterator it = class2indices . begin ( ) ; it ! = class2indices . end ( ) ; + + it )
{
std : : vector < Rect > localBoxes ;
std : : vector < float > localConfidences ;
std : : vector < size_t > classIndices = it - > second ;
for ( size_t i = 0 ; i < classIndices . size ( ) ; i + + )
{
localBoxes . push_back ( boxes [ classIndices [ i ] ] ) ;
localConfidences . push_back ( confidences [ classIndices [ i ] ] ) ;
}
std : : vector < int > nmsIndices ;
NMSBoxes ( localBoxes , localConfidences , confThreshold , nmsThreshold , nmsIndices ) ;
for ( size_t i = 0 ; i < nmsIndices . size ( ) ; i + + )
{
size_t idx = nmsIndices [ i ] ;
nmsBoxes . push_back ( localBoxes [ idx ] ) ;
nmsConfidences . push_back ( localConfidences [ idx ] ) ;
nmsClassIds . push_back ( it - > first ) ;
}
}
boxes = nmsBoxes ;
classIds = nmsClassIds ;
confidences = nmsConfidences ;
}
for ( size_t idx = 0 ; idx < boxes . size ( ) ; + + idx )
2018-06-28 14:09:11 +08:00
{
Rect box = boxes [ idx ] ;
drawPred ( classIds [ idx ] , confidences [ idx ] , box . x , box . y ,
box . x + box . width , box . y + box . height , frame ) ;
}
2018-03-02 17:04:39 +08:00
}
void drawPred ( int classId , float conf , int left , int top , int right , int bottom , Mat & frame )
{
rectangle ( frame , Point ( left , top ) , Point ( right , bottom ) , Scalar ( 0 , 255 , 0 ) ) ;
std : : string label = format ( " %.2f " , conf ) ;
if ( ! classes . empty ( ) )
{
CV_Assert ( classId < ( int ) classes . size ( ) ) ;
label = classes [ classId ] + " : " + label ;
}
int baseLine ;
Size labelSize = getTextSize ( label , FONT_HERSHEY_SIMPLEX , 0.5 , 1 , & baseLine ) ;
top = max ( top , labelSize . height ) ;
rectangle ( frame , Point ( left , top - labelSize . height ) ,
Point ( left + labelSize . width , top + baseLine ) , Scalar : : all ( 255 ) , FILLED ) ;
putText ( frame , label , Point ( left , top ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( ) ) ;
}
void callback ( int pos , void * )
{
2018-03-04 00:29:37 +08:00
confThreshold = pos * 0.01f ;
2018-03-02 17:04:39 +08:00
}