2020-05-29 16:41:05 +08:00
//
// this sample demonstrates parsing (segmenting) human body parts from an image using opencv's dnn,
// based on https://github.com/Engineering-Course/LIP_JPPNet
//
// get the pretrained model from: https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
//
# include <opencv2/dnn.hpp>
# include <opencv2/highgui.hpp>
# include <opencv2/imgproc.hpp>
using namespace cv ;
static Mat parse_human ( const Mat & image , const std : : string & model , int backend = dnn : : DNN_BACKEND_DEFAULT , int target = dnn : : DNN_TARGET_CPU ) {
// this network expects an image and a flipped copy as input
Mat flipped ;
flip ( image , flipped , 1 ) ;
std : : vector < Mat > batch ;
batch . push_back ( image ) ;
batch . push_back ( flipped ) ;
Mat blob = dnn : : blobFromImages ( batch , 1.0 , Size ( ) , Scalar ( 104.00698793 , 116.66876762 , 122.67891434 ) ) ;
dnn : : Net net = dnn : : readNet ( model ) ;
net . setPreferableBackend ( backend ) ;
net . setPreferableTarget ( target ) ;
net . setInput ( blob ) ;
Mat out = net . forward ( ) ;
// expected output: [2, 20, 384, 384], (2 lists(orig, flipped) of 20 body part heatmaps 384x384)
// LIP classes:
// 0 Background, 1 Hat, 2 Hair, 3 Glove, 4 Sunglasses, 5 UpperClothes, 6 Dress, 7 Coat, 8 Socks, 9 Pants
// 10 Jumpsuits, 11 Scarf, 12 Skirt, 13 Face, 14 LeftArm, 15 RightArm, 16 LeftLeg, 17 RightLeg, 18 LeftShoe. 19 RightShoe
Vec3b colors [ ] = {
Vec3b ( 0 , 0 , 0 ) , Vec3b ( 128 , 0 , 0 ) , Vec3b ( 255 , 0 , 0 ) , Vec3b ( 0 , 85 , 0 ) , Vec3b ( 170 , 0 , 51 ) , Vec3b ( 255 , 85 , 0 ) ,
Vec3b ( 0 , 0 , 85 ) , Vec3b ( 0 , 119 , 221 ) , Vec3b ( 85 , 85 , 0 ) , Vec3b ( 0 , 85 , 85 ) , Vec3b ( 85 , 51 , 0 ) , Vec3b ( 52 , 86 , 128 ) ,
Vec3b ( 0 , 128 , 0 ) , Vec3b ( 0 , 0 , 255 ) , Vec3b ( 51 , 170 , 221 ) , Vec3b ( 0 , 255 , 255 ) , Vec3b ( 85 , 255 , 170 ) ,
Vec3b ( 170 , 255 , 85 ) , Vec3b ( 255 , 255 , 0 ) , Vec3b ( 255 , 170 , 0 )
} ;
Mat segm ( image . size ( ) , CV_8UC3 , Scalar ( 0 , 0 , 0 ) ) ;
Mat maxval ( image . size ( ) , CV_32F , Scalar ( 0 ) ) ;
// iterate over body part heatmaps (LIP classes)
for ( int i = 0 ; i < out . size [ 1 ] ; i + + ) {
// resize heatmaps to original image size
// "head" is the original image result, "tail" the flipped copy
Mat head , h ( out . size [ 2 ] , out . size [ 3 ] , CV_32F , out . ptr < float > ( 0 , i ) ) ;
resize ( h , head , image . size ( ) ) ;
// we have to swap the last 3 pairs in the "tail" list
static int tail_order [ ] = { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 15 , 14 , 17 , 16 , 19 , 18 } ;
Mat tail , t ( out . size [ 2 ] , out . size [ 3 ] , CV_32F , out . ptr < float > ( 1 , tail_order [ i ] ) ) ;
resize ( t , tail , image . size ( ) ) ;
flip ( tail , tail , 1 ) ;
// mix original and flipped result
Mat avg = ( head + tail ) * 0.5 ;
// write color if prob value > maxval
Mat cmask ;
compare ( avg , maxval , cmask , CMP_GT ) ;
segm . setTo ( colors [ i ] , cmask ) ;
// keep largest values for next iteration
max ( avg , maxval , maxval ) ;
}
cvtColor ( segm , segm , COLOR_RGB2BGR ) ;
return segm ;
}
int main ( int argc , char * * argv )
{
2023-10-13 21:53:18 +08:00
std : : string param_keys =
2020-05-29 16:41:05 +08:00
" {help h | | show help screen / args} "
" {image i | | person image to process } "
2023-10-13 21:53:18 +08:00
" {model m |lip_jppnet_384.pb| network model} " ;
std : : string backend_keys = cv : : format (
" { backend | 0 | Choose one of computation backends: "
" %d: automatically (by default), "
" %d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" %d: OpenCV implementation, "
" %d: VKCOM, "
" %d: CUDA } " , cv : : dnn : : DNN_BACKEND_DEFAULT , cv : : dnn : : DNN_BACKEND_INFERENCE_ENGINE , cv : : dnn : : DNN_BACKEND_OPENCV , cv : : dnn : : DNN_BACKEND_VKCOM , cv : : dnn : : DNN_BACKEND_CUDA ) ;
std : : string target_keys = cv : : format (
" { target | 0 | Choose one of target computation devices: "
" %d: CPU target (by default), "
" %d: OpenCL, "
" %d: OpenCL fp16 (half-float precision), "
" %d: VPU, "
" %d: Vulkan, "
" %d: CUDA, "
" %d: CUDA fp16 (half-float preprocess) } " , cv : : dnn : : DNN_TARGET_CPU , cv : : dnn : : DNN_TARGET_OPENCL , cv : : dnn : : DNN_TARGET_OPENCL_FP16 , cv : : dnn : : DNN_TARGET_MYRIAD , cv : : dnn : : DNN_TARGET_VULKAN , cv : : dnn : : DNN_TARGET_CUDA , cv : : dnn : : DNN_TARGET_CUDA_FP16 ) ;
std : : string keys = param_keys + backend_keys + target_keys ;
CommandLineParser parser ( argc , argv , keys ) ;
2020-05-29 16:41:05 +08:00
if ( argc = = 1 | | parser . has ( " help " ) )
{
parser . printMessage ( ) ;
return 0 ;
}
std : : string model = parser . get < std : : string > ( " model " ) ;
std : : string image = parser . get < std : : string > ( " image " ) ;
int backend = parser . get < int > ( " backend " ) ;
int target = parser . get < int > ( " target " ) ;
Mat input = imread ( image ) ;
Mat segm = parse_human ( input , model , backend , target ) ;
imshow ( " human parsing " , segm ) ;
waitKey ( ) ;
return 0 ;
}