mirror of
https://github.com/opencv/opencv.git
synced 2025-01-10 22:28:13 +08:00
d789cb459c
dnn: cleanup of halide backend for 5.x #24231 Merge with https://github.com/opencv/opencv_extra/pull/1092. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
112 lines
4.9 KiB
C++
112 lines
4.9 KiB
C++
//
|
|
// this sample demonstrates parsing (segmenting) human body parts from an image using opencv's dnn,
|
|
// based on https://github.com/Engineering-Course/LIP_JPPNet
|
|
//
|
|
// get the pretrained model from: https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
|
|
//
|
|
|
|
#include <opencv2/dnn.hpp>
|
|
#include <opencv2/highgui.hpp>
|
|
#include <opencv2/imgproc.hpp>
|
|
using namespace cv;
|
|
|
|
|
|
static Mat parse_human(const Mat &image, const std::string &model, int backend=dnn::DNN_BACKEND_DEFAULT, int target=dnn::DNN_TARGET_CPU) {
|
|
// this network expects an image and a flipped copy as input
|
|
Mat flipped;
|
|
flip(image, flipped, 1);
|
|
std::vector<Mat> batch;
|
|
batch.push_back(image);
|
|
batch.push_back(flipped);
|
|
Mat blob = dnn::blobFromImages(batch, 1.0, Size(), Scalar(104.00698793, 116.66876762, 122.67891434));
|
|
|
|
dnn::Net net = dnn::readNet(model);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
net.setInput(blob);
|
|
Mat out = net.forward();
|
|
// expected output: [2, 20, 384, 384], (2 lists(orig, flipped) of 20 body part heatmaps 384x384)
|
|
|
|
// LIP classes:
|
|
// 0 Background, 1 Hat, 2 Hair, 3 Glove, 4 Sunglasses, 5 UpperClothes, 6 Dress, 7 Coat, 8 Socks, 9 Pants
|
|
// 10 Jumpsuits, 11 Scarf, 12 Skirt, 13 Face, 14 LeftArm, 15 RightArm, 16 LeftLeg, 17 RightLeg, 18 LeftShoe. 19 RightShoe
|
|
Vec3b colors[] = {
|
|
Vec3b(0, 0, 0), Vec3b(128, 0, 0), Vec3b(255, 0, 0), Vec3b(0, 85, 0), Vec3b(170, 0, 51), Vec3b(255, 85, 0),
|
|
Vec3b(0, 0, 85), Vec3b(0, 119, 221), Vec3b(85, 85, 0), Vec3b(0, 85, 85), Vec3b(85, 51, 0), Vec3b(52, 86, 128),
|
|
Vec3b(0, 128, 0), Vec3b(0, 0, 255), Vec3b(51, 170, 221), Vec3b(0, 255, 255), Vec3b(85, 255, 170),
|
|
Vec3b(170, 255, 85), Vec3b(255, 255, 0), Vec3b(255, 170, 0)
|
|
};
|
|
|
|
Mat segm(image.size(), CV_8UC3, Scalar(0,0,0));
|
|
Mat maxval(image.size(), CV_32F, Scalar(0));
|
|
|
|
// iterate over body part heatmaps (LIP classes)
|
|
for (int i=0; i<out.size[1]; i++) {
|
|
// resize heatmaps to original image size
|
|
// "head" is the original image result, "tail" the flipped copy
|
|
Mat head, h(out.size[2], out.size[3], CV_32F, out.ptr<float>(0,i));
|
|
resize(h, head, image.size());
|
|
|
|
// we have to swap the last 3 pairs in the "tail" list
|
|
static int tail_order[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,14,17,16,19,18};
|
|
Mat tail, t(out.size[2], out.size[3], CV_32F, out.ptr<float>(1,tail_order[i]));
|
|
resize(t, tail, image.size());
|
|
flip(tail, tail, 1);
|
|
|
|
// mix original and flipped result
|
|
Mat avg = (head + tail) * 0.5;
|
|
|
|
// write color if prob value > maxval
|
|
Mat cmask;
|
|
compare(avg, maxval, cmask, CMP_GT);
|
|
segm.setTo(colors[i], cmask);
|
|
|
|
// keep largest values for next iteration
|
|
max(avg, maxval, maxval);
|
|
}
|
|
cvtColor(segm, segm, COLOR_RGB2BGR);
|
|
return segm;
|
|
}
|
|
|
|
int main(int argc, char**argv)
|
|
{
|
|
std::string param_keys =
|
|
"{help h | | show help screen / args}"
|
|
"{image i | | person image to process }"
|
|
"{model m |lip_jppnet_384.pb| network model}";
|
|
std::string backend_keys = cv::format(
|
|
"{ backend | 0 | Choose one of computation backends: "
|
|
"%d: automatically (by default), "
|
|
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
|
"%d: OpenCV implementation, "
|
|
"%d: VKCOM, "
|
|
"%d: CUDA }", cv::dnn::DNN_BACKEND_DEFAULT, cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_BACKEND_VKCOM, cv::dnn::DNN_BACKEND_CUDA);
|
|
std::string target_keys = cv::format(
|
|
"{ target | 0 | Choose one of target computation devices: "
|
|
"%d: CPU target (by default), "
|
|
"%d: OpenCL, "
|
|
"%d: OpenCL fp16 (half-float precision), "
|
|
"%d: VPU, "
|
|
"%d: Vulkan, "
|
|
"%d: CUDA, "
|
|
"%d: CUDA fp16 (half-float preprocess) }", cv::dnn::DNN_TARGET_CPU, cv::dnn::DNN_TARGET_OPENCL, cv::dnn::DNN_TARGET_OPENCL_FP16, cv::dnn::DNN_TARGET_MYRIAD, cv::dnn::DNN_TARGET_VULKAN, cv::dnn::DNN_TARGET_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16);
|
|
std::string keys = param_keys + backend_keys + target_keys;
|
|
CommandLineParser parser(argc, argv, keys);
|
|
if (argc == 1 || parser.has("help"))
|
|
{
|
|
parser.printMessage();
|
|
return 0;
|
|
}
|
|
std::string model = parser.get<std::string>("model");
|
|
std::string image = parser.get<std::string>("image");
|
|
int backend = parser.get<int>("backend");
|
|
int target = parser.get<int>("target");
|
|
|
|
Mat input = imread(image);
|
|
Mat segm = parse_human(input, model, backend, target);
|
|
|
|
imshow("human parsing", segm);
|
|
waitKey();
|
|
return 0;
|
|
}
|