mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #20957 from sturkmen72:update-documentation
Update documentation * Update DNN-based Face Detection And Recognition tutorial * samples(dnn/face): update face_detect.cpp * final changes Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
This commit is contained in:
parent
b594ed99b8
commit
a97f21ba4e
@ -36,14 +36,34 @@ There are two models (ONNX format) pre-trained and required for this module:
|
|||||||
|
|
||||||
### DNNFaceDetector
|
### DNNFaceDetector
|
||||||
|
|
||||||
```cpp
|
@add_toggle_cpp
|
||||||
// Initialize FaceDetectorYN
|
- **Downloadable code**: Click
|
||||||
Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(onnx_path, "", image.size(), score_thresh, nms_thresh, top_k);
|
[here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.cpp)
|
||||||
|
|
||||||
// Forward
|
- **Code at glance:**
|
||||||
Mat faces;
|
@include samples/dnn/face_detect.cpp
|
||||||
faceDetector->detect(image, faces);
|
@end_toggle
|
||||||
```
|
|
||||||
|
@add_toggle_python
|
||||||
|
- **Downloadable code**: Click
|
||||||
|
[here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.py)
|
||||||
|
|
||||||
|
- **Code at glance:**
|
||||||
|
@include samples/dnn/face_detect.py
|
||||||
|
@end_toggle
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
@add_toggle_cpp
|
||||||
|
@snippet dnn/face_detect.cpp initialize_FaceDetectorYN
|
||||||
|
@snippet dnn/face_detect.cpp inference
|
||||||
|
@end_toggle
|
||||||
|
|
||||||
|
@add_toggle_python
|
||||||
|
@snippet dnn/face_detect.py initialize_FaceDetectorYN
|
||||||
|
@snippet dnn/face_detect.py inference
|
||||||
|
@end_toggle
|
||||||
|
|
||||||
The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows:
|
The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows:
|
||||||
|
|
||||||
@ -57,28 +77,25 @@ x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm
|
|||||||
|
|
||||||
Following Face Detection, run codes below to extract face feature from facial image.
|
Following Face Detection, run codes below to extract face feature from facial image.
|
||||||
|
|
||||||
```cpp
|
@add_toggle_cpp
|
||||||
// Initialize FaceRecognizerSF with model path (cv::String)
|
@snippet dnn/face_detect.cpp initialize_FaceRecognizerSF
|
||||||
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(model_path, "");
|
@snippet dnn/face_detect.cpp facerecognizer
|
||||||
|
@end_toggle
|
||||||
|
|
||||||
// Aligning and cropping facial image through the first face of faces detected by dnn_face::DNNFaceDetector
|
@add_toggle_python
|
||||||
Mat aligned_face;
|
@snippet dnn/face_detect.py initialize_FaceRecognizerSF
|
||||||
faceRecognizer->alignCrop(image, faces.row(0), aligned_face);
|
@snippet dnn/face_detect.py facerecognizer
|
||||||
|
@end_toggle
|
||||||
// Run feature extraction with given aligned_face (cv::Mat)
|
|
||||||
Mat feature;
|
|
||||||
faceRecognizer->feature(aligned_face, feature);
|
|
||||||
feature = feature.clone();
|
|
||||||
```
|
|
||||||
|
|
||||||
After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces.
|
After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces.
|
||||||
|
|
||||||
```cpp
|
@add_toggle_cpp
|
||||||
// Calculating the discrepancy between two face features by using cosine distance.
|
@snippet dnn/face_detect.cpp match
|
||||||
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::COSINE);
|
@end_toggle
|
||||||
// Calculating the discrepancy between two face features by using normL2 distance.
|
|
||||||
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::NORM_L2);
|
@add_toggle_python
|
||||||
```
|
@snippet dnn/face_detect.py match
|
||||||
|
@end_toggle
|
||||||
|
|
||||||
For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128.
|
For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128.
|
||||||
|
|
||||||
|
@ -8,125 +8,272 @@
|
|||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
static Mat visualize(Mat input, Mat faces, int thickness=2)
|
static
|
||||||
|
void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
|
||||||
{
|
{
|
||||||
Mat output = input.clone();
|
std::string fpsString = cv::format("FPS : %.2f", (float)fps);
|
||||||
|
if (frame >= 0)
|
||||||
|
cout << "Frame " << frame << ", ";
|
||||||
|
cout << "FPS: " << fpsString << endl;
|
||||||
for (int i = 0; i < faces.rows; i++)
|
for (int i = 0; i < faces.rows; i++)
|
||||||
{
|
{
|
||||||
// Print results
|
// Print results
|
||||||
cout << "Face " << i
|
cout << "Face " << i
|
||||||
<< ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
|
<< ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
|
||||||
<< "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
|
<< "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
|
||||||
<< "score: " << faces.at<float>(i, 14) << "\n";
|
<< "score: " << cv::format("%.2f", faces.at<float>(i, 14))
|
||||||
|
<< endl;
|
||||||
|
|
||||||
// Draw bounding box
|
// Draw bounding box
|
||||||
rectangle(output, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
|
rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
|
||||||
// Draw landmarks
|
// Draw landmarks
|
||||||
circle(output, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
|
circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
|
||||||
circle(output, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar( 0, 0, 255), thickness);
|
circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
|
||||||
circle(output, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar( 0, 255, 0), thickness);
|
circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
|
||||||
circle(output, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
|
circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
|
||||||
circle(output, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar( 0, 255, 255), thickness);
|
circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
|
||||||
}
|
}
|
||||||
return output;
|
putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
CommandLineParser parser(argc, argv,
|
CommandLineParser parser(argc, argv,
|
||||||
"{help h | | Print this message.}"
|
"{help h | | Print this message}"
|
||||||
"{input i | | Path to the input image. Omit for detecting on default camera.}"
|
"{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}"
|
||||||
"{model m | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.}"
|
"{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
|
||||||
"{score_threshold | 0.9 | Filter out faces of score < score_threshold.}"
|
"{video v | 0 | Path to the input video}"
|
||||||
"{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold.}"
|
"{scale sc | 1.0 | Scale factor used to resize input video frames}"
|
||||||
"{top_k | 5000 | Keep top_k bounding boxes before NMS.}"
|
"{fd_model fd | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }"
|
||||||
"{save s | false | Set true to save results. This flag is invalid when using camera.}"
|
"{fr_model fr | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}"
|
||||||
"{vis v | true | Set true to open a window for result visualization. This flag is invalid when using camera.}"
|
"{score_threshold | 0.9 | Filter out faces of score < score_threshold}"
|
||||||
|
"{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}"
|
||||||
|
"{top_k | 5000 | Keep top_k bounding boxes before NMS}"
|
||||||
|
"{save s | false | Set true to save results. This flag is invalid when using camera}"
|
||||||
);
|
);
|
||||||
if (argc == 1 || parser.has("help"))
|
if (parser.has("help"))
|
||||||
{
|
{
|
||||||
parser.printMessage();
|
parser.printMessage();
|
||||||
return -1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
String modelPath = parser.get<String>("model");
|
String fd_modelPath = parser.get<String>("fd_model");
|
||||||
|
String fr_modelPath = parser.get<String>("fr_model");
|
||||||
|
|
||||||
float scoreThreshold = parser.get<float>("score_threshold");
|
float scoreThreshold = parser.get<float>("score_threshold");
|
||||||
float nmsThreshold = parser.get<float>("nms_threshold");
|
float nmsThreshold = parser.get<float>("nms_threshold");
|
||||||
int topK = parser.get<int>("top_k");
|
int topK = parser.get<int>("top_k");
|
||||||
|
|
||||||
bool save = parser.get<bool>("save");
|
bool save = parser.get<bool>("save");
|
||||||
bool vis = parser.get<bool>("vis");
|
|
||||||
|
|
||||||
|
double cosine_similar_thresh = 0.363;
|
||||||
|
double l2norm_similar_thresh = 1.128;
|
||||||
|
|
||||||
|
//! [initialize_FaceDetectorYN]
|
||||||
// Initialize FaceDetectorYN
|
// Initialize FaceDetectorYN
|
||||||
Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
|
Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
|
||||||
|
//! [initialize_FaceDetectorYN]
|
||||||
|
|
||||||
|
TickMeter tm;
|
||||||
|
|
||||||
// If input is an image
|
// If input is an image
|
||||||
if (parser.has("input"))
|
if (parser.has("image1"))
|
||||||
{
|
{
|
||||||
String input = parser.get<String>("input");
|
String input1 = parser.get<String>("image1");
|
||||||
Mat image = imread(input);
|
Mat image1 = imread(samples::findFile(input1));
|
||||||
|
if (image1.empty())
|
||||||
|
{
|
||||||
|
std::cerr << "Cannot read image: " << input1 << std::endl;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
tm.start();
|
||||||
|
|
||||||
|
//! [inference]
|
||||||
// Set input size before inference
|
// Set input size before inference
|
||||||
detector->setInputSize(image.size());
|
detector->setInputSize(image1.size());
|
||||||
|
|
||||||
// Inference
|
Mat faces1;
|
||||||
Mat faces;
|
detector->detect(image1, faces1);
|
||||||
detector->detect(image, faces);
|
if (faces1.rows < 1)
|
||||||
|
{
|
||||||
|
std::cerr << "Cannot find a face in " << input1 << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
//! [inference]
|
||||||
|
|
||||||
|
tm.stop();
|
||||||
// Draw results on the input image
|
// Draw results on the input image
|
||||||
Mat result = visualize(image, faces);
|
visualize(image1, -1, faces1, tm.getFPS());
|
||||||
|
|
||||||
// Save results if save is true
|
// Save results if save is true
|
||||||
if(save)
|
if (save)
|
||||||
{
|
{
|
||||||
cout << "Results saved to result.jpg\n";
|
cout << "Saving result.jpg...\n";
|
||||||
imwrite("result.jpg", result);
|
imwrite("result.jpg", image1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visualize results
|
// Visualize results
|
||||||
if (vis)
|
imshow("image1", image1);
|
||||||
|
pollKey(); // handle UI events to show content
|
||||||
|
|
||||||
|
if (parser.has("image2"))
|
||||||
{
|
{
|
||||||
namedWindow(input, WINDOW_AUTOSIZE);
|
String input2 = parser.get<String>("image2");
|
||||||
imshow(input, result);
|
Mat image2 = imread(samples::findFile(input2));
|
||||||
waitKey(0);
|
if (image2.empty())
|
||||||
|
{
|
||||||
|
std::cerr << "Cannot read image2: " << input2 << std::endl;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
tm.reset();
|
||||||
|
tm.start();
|
||||||
|
detector->setInputSize(image2.size());
|
||||||
|
|
||||||
|
Mat faces2;
|
||||||
|
detector->detect(image2, faces2);
|
||||||
|
if (faces2.rows < 1)
|
||||||
|
{
|
||||||
|
std::cerr << "Cannot find a face in " << input2 << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
tm.stop();
|
||||||
|
visualize(image2, -1, faces2, tm.getFPS());
|
||||||
|
if (save)
|
||||||
|
{
|
||||||
|
cout << "Saving result2.jpg...\n";
|
||||||
|
imwrite("result2.jpg", image2);
|
||||||
|
}
|
||||||
|
imshow("image2", image2);
|
||||||
|
pollKey();
|
||||||
|
|
||||||
|
//! [initialize_FaceRecognizerSF]
|
||||||
|
// Initialize FaceRecognizerSF
|
||||||
|
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
|
||||||
|
//! [initialize_FaceRecognizerSF]
|
||||||
|
|
||||||
|
|
||||||
|
//! [facerecognizer]
|
||||||
|
// Aligning and cropping facial image through the first face of faces detected.
|
||||||
|
Mat aligned_face1, aligned_face2;
|
||||||
|
faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
|
||||||
|
faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
|
||||||
|
|
||||||
|
// Run feature extraction with given aligned_face
|
||||||
|
Mat feature1, feature2;
|
||||||
|
faceRecognizer->feature(aligned_face1, feature1);
|
||||||
|
feature1 = feature1.clone();
|
||||||
|
faceRecognizer->feature(aligned_face2, feature2);
|
||||||
|
feature2 = feature2.clone();
|
||||||
|
//! [facerecognizer]
|
||||||
|
|
||||||
|
//! [match]
|
||||||
|
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
|
||||||
|
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
|
||||||
|
//! [match]
|
||||||
|
|
||||||
|
if (cos_score >= cosine_similar_thresh)
|
||||||
|
{
|
||||||
|
std::cout << "They have the same identity;";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "They have different identities;";
|
||||||
|
}
|
||||||
|
std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
|
||||||
|
|
||||||
|
if (L2_score <= l2norm_similar_thresh)
|
||||||
|
{
|
||||||
|
std::cout << "They have the same identity;";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "They have different identities.";
|
||||||
|
}
|
||||||
|
std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
|
||||||
}
|
}
|
||||||
|
cout << "Press any key to exit..." << endl;
|
||||||
|
waitKey(0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int deviceId = 0;
|
int frameWidth, frameHeight;
|
||||||
VideoCapture cap;
|
float scale = parser.get<float>("scale");
|
||||||
cap.open(deviceId, CAP_ANY);
|
VideoCapture capture;
|
||||||
int frameWidth = int(cap.get(CAP_PROP_FRAME_WIDTH));
|
std::string video = parser.get<string>("video");
|
||||||
int frameHeight = int(cap.get(CAP_PROP_FRAME_HEIGHT));
|
if (video.size() == 1 && isdigit(video[0]))
|
||||||
|
capture.open(parser.get<int>("video"));
|
||||||
|
else
|
||||||
|
capture.open(samples::findFileOrKeep(video)); // keep GStreamer pipelines
|
||||||
|
if (capture.isOpened())
|
||||||
|
{
|
||||||
|
frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
|
||||||
|
frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
|
||||||
|
cout << "Video " << video
|
||||||
|
<< ": width=" << frameWidth
|
||||||
|
<< ", height=" << frameHeight
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cout << "Could not initialize video capturing: " << video << "\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
detector->setInputSize(Size(frameWidth, frameHeight));
|
detector->setInputSize(Size(frameWidth, frameHeight));
|
||||||
|
|
||||||
Mat frame;
|
cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
|
||||||
TickMeter tm;
|
int nFrame = 0;
|
||||||
String msg = "FPS: ";
|
for (;;)
|
||||||
while(waitKey(1) < 0) // Press any key to exit
|
|
||||||
{
|
{
|
||||||
// Get frame
|
// Get frame
|
||||||
if (!cap.read(frame))
|
Mat frame;
|
||||||
|
if (!capture.read(frame))
|
||||||
{
|
{
|
||||||
cerr << "No frames grabbed!\n";
|
cerr << "Can't grab frame! Stop\n";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resize(frame, frame, Size(frameWidth, frameHeight));
|
||||||
|
|
||||||
// Inference
|
// Inference
|
||||||
Mat faces;
|
Mat faces;
|
||||||
tm.start();
|
tm.start();
|
||||||
detector->detect(frame, faces);
|
detector->detect(frame, faces);
|
||||||
tm.stop();
|
tm.stop();
|
||||||
|
|
||||||
|
Mat result = frame.clone();
|
||||||
// Draw results on the input image
|
// Draw results on the input image
|
||||||
Mat result = visualize(frame, faces);
|
visualize(result, nFrame, faces, tm.getFPS());
|
||||||
putText(result, msg + to_string(tm.getFPS()), Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
|
||||||
|
|
||||||
// Visualize results
|
// Visualize results
|
||||||
imshow("Live", result);
|
imshow("Live", result);
|
||||||
|
|
||||||
tm.reset();
|
int key = waitKey(1);
|
||||||
|
bool saveFrame = save;
|
||||||
|
if (key == ' ')
|
||||||
|
{
|
||||||
|
saveFrame = true;
|
||||||
|
key = 0; // handled
|
||||||
|
}
|
||||||
|
|
||||||
|
if (saveFrame)
|
||||||
|
{
|
||||||
|
std::string frame_name = cv::format("frame_%05d.png", nFrame);
|
||||||
|
std::string result_name = cv::format("result_%05d.jpg", nFrame);
|
||||||
|
cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
|
||||||
|
imwrite(frame_name, frame);
|
||||||
|
imwrite(result_name, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
++nFrame;
|
||||||
|
|
||||||
|
if (key > 0)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
cout << "Processed " << nFrame << " frames" << endl;
|
||||||
}
|
}
|
||||||
}
|
cout << "Done." << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@ -12,90 +12,144 @@ def str2bool(v):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
|
parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.')
|
||||||
parser.add_argument('--model', '-m', type=str, default='yunet.onnx', help='Path to the model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
|
parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
|
||||||
|
parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
|
||||||
|
parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
|
||||||
|
parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
|
||||||
|
parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
|
||||||
parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
|
parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
|
||||||
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
||||||
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
||||||
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
||||||
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
def visualize(input, faces, thickness=2):
|
def visualize(input, faces, fps, thickness=2):
|
||||||
output = input.copy()
|
|
||||||
if faces[1] is not None:
|
if faces[1] is not None:
|
||||||
for idx, face in enumerate(faces[1]):
|
for idx, face in enumerate(faces[1]):
|
||||||
print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))
|
print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))
|
||||||
|
|
||||||
coords = face[:-1].astype(np.int32)
|
coords = face[:-1].astype(np.int32)
|
||||||
cv.rectangle(output, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), 2)
|
cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)
|
||||||
cv.circle(output, (coords[4], coords[5]), 2, (255, 0, 0), 2)
|
cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)
|
||||||
cv.circle(output, (coords[6], coords[7]), 2, (0, 0, 255), 2)
|
cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)
|
||||||
cv.circle(output, (coords[8], coords[9]), 2, (0, 255, 0), 2)
|
cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)
|
||||||
cv.circle(output, (coords[10], coords[11]), 2, (255, 0, 255), 2)
|
cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)
|
||||||
cv.circle(output, (coords[12], coords[13]), 2, (0, 255, 255), 2)
|
cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)
|
||||||
return output
|
cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
# Instantiate FaceDetectorYN
|
## [initialize_FaceDetectorYN]
|
||||||
detector = cv.FaceDetectorYN.create(
|
detector = cv.FaceDetectorYN.create(
|
||||||
args.model,
|
args.face_detection_model,
|
||||||
"",
|
"",
|
||||||
(320, 320),
|
(320, 320),
|
||||||
args.score_threshold,
|
args.score_threshold,
|
||||||
args.nms_threshold,
|
args.nms_threshold,
|
||||||
args.top_k
|
args.top_k
|
||||||
)
|
)
|
||||||
|
## [initialize_FaceDetectorYN]
|
||||||
|
|
||||||
|
tm = cv.TickMeter()
|
||||||
|
|
||||||
# If input is an image
|
# If input is an image
|
||||||
if args.input is not None:
|
if args.image1 is not None:
|
||||||
image = cv.imread(args.input)
|
img1 = cv.imread(cv.samples.findFile(args.image1))
|
||||||
|
|
||||||
|
tm.start()
|
||||||
|
## [inference]
|
||||||
# Set input size before inference
|
# Set input size before inference
|
||||||
detector.setInputSize((image.shape[1], image.shape[0]))
|
detector.setInputSize((img1.shape[1], img1.shape[0]))
|
||||||
|
|
||||||
# Inference
|
faces1 = detector.detect(img1)
|
||||||
faces = detector.detect(image)
|
## [inference]
|
||||||
|
|
||||||
|
tm.stop()
|
||||||
|
assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1)
|
||||||
|
|
||||||
# Draw results on the input image
|
# Draw results on the input image
|
||||||
result = visualize(image, faces)
|
visualize(img1, faces1, tm.getFPS())
|
||||||
|
|
||||||
# Save results if save is true
|
# Save results if save is true
|
||||||
if args.save:
|
if args.save:
|
||||||
print('Resutls saved to result.jpg\n')
|
print('Results saved to result.jpg\n')
|
||||||
cv.imwrite('result.jpg', result)
|
cv.imwrite('result.jpg', img1)
|
||||||
|
|
||||||
# Visualize results in a new window
|
# Visualize results in a new window
|
||||||
if args.vis:
|
cv.imshow("image1", img1)
|
||||||
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
|
||||||
cv.imshow(args.input, result)
|
if args.image2 is not None:
|
||||||
cv.waitKey(0)
|
img2 = cv.imread(cv.samples.findFile(args.image2))
|
||||||
|
|
||||||
|
tm.reset()
|
||||||
|
tm.start()
|
||||||
|
detector.setInputSize((img2.shape[1], img2.shape[0]))
|
||||||
|
faces2 = detector.detect(img2)
|
||||||
|
tm.stop()
|
||||||
|
assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2)
|
||||||
|
visualize(img2, faces2, tm.getFPS())
|
||||||
|
cv.imshow("image2", img2)
|
||||||
|
|
||||||
|
## [initialize_FaceRecognizerSF]
|
||||||
|
recognizer = cv.FaceRecognizerSF.create(
|
||||||
|
args.face_recognition_model,"")
|
||||||
|
## [initialize_FaceRecognizerSF]
|
||||||
|
|
||||||
|
## [facerecognizer]
|
||||||
|
# Align faces
|
||||||
|
face1_align = recognizer.alignCrop(img1, faces1[1][0])
|
||||||
|
face2_align = recognizer.alignCrop(img2, faces2[1][0])
|
||||||
|
|
||||||
|
# Extract features
|
||||||
|
face1_feature = recognizer.feature(face1_align)
|
||||||
|
face2_feature = recognizer.feature(face2_align)
|
||||||
|
## [facerecognizer]
|
||||||
|
|
||||||
|
cosine_similarity_threshold = 0.363
|
||||||
|
l2_similarity_threshold = 1.128
|
||||||
|
|
||||||
|
## [match]
|
||||||
|
cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
|
||||||
|
l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
|
||||||
|
## [match]
|
||||||
|
|
||||||
|
msg = 'different identities'
|
||||||
|
if cosine_score >= cosine_similarity_threshold:
|
||||||
|
msg = 'the same identity'
|
||||||
|
print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
|
||||||
|
|
||||||
|
msg = 'different identities'
|
||||||
|
if l2_score <= l2_similarity_threshold:
|
||||||
|
msg = 'the same identity'
|
||||||
|
print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
|
||||||
|
cv.waitKey(0)
|
||||||
else: # Omit input to call default camera
|
else: # Omit input to call default camera
|
||||||
deviceId = 0
|
if args.video is not None:
|
||||||
|
deviceId = args.video
|
||||||
|
else:
|
||||||
|
deviceId = 0
|
||||||
cap = cv.VideoCapture(deviceId)
|
cap = cv.VideoCapture(deviceId)
|
||||||
frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
|
frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)
|
||||||
frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
|
frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)
|
||||||
detector.setInputSize([frameWidth, frameHeight])
|
detector.setInputSize([frameWidth, frameHeight])
|
||||||
|
|
||||||
tm = cv.TickMeter()
|
|
||||||
while cv.waitKey(1) < 0:
|
while cv.waitKey(1) < 0:
|
||||||
hasFrame, frame = cap.read()
|
hasFrame, frame = cap.read()
|
||||||
if not hasFrame:
|
if not hasFrame:
|
||||||
print('No frames grabbed!')
|
print('No frames grabbed!')
|
||||||
break
|
break
|
||||||
|
|
||||||
|
frame = cv.resize(frame, (frameWidth, frameHeight))
|
||||||
|
|
||||||
# Inference
|
# Inference
|
||||||
tm.start()
|
tm.start()
|
||||||
faces = detector.detect(frame) # faces is a tuple
|
faces = detector.detect(frame) # faces is a tuple
|
||||||
tm.stop()
|
tm.stop()
|
||||||
|
|
||||||
# Draw results on the input image
|
# Draw results on the input image
|
||||||
frame = visualize(frame, faces)
|
visualize(frame, faces, tm.getFPS())
|
||||||
|
|
||||||
cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
|
# Visualize results
|
||||||
|
|
||||||
# Visualize results in a new Window
|
|
||||||
cv.imshow('Live', frame)
|
cv.imshow('Live', frame)
|
||||||
|
cv.destroyAllWindows()
|
||||||
tm.reset()
|
|
||||||
|
@ -1,103 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
#include "opencv2/dnn.hpp"
|
|
||||||
#include "opencv2/imgproc.hpp"
|
|
||||||
#include "opencv2/highgui.hpp"
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include "opencv2/objdetect.hpp"
|
|
||||||
|
|
||||||
|
|
||||||
using namespace cv;
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
|
||||||
{
|
|
||||||
if (argc != 5)
|
|
||||||
{
|
|
||||||
std::cerr << "Usage " << argv[0] << ": "
|
|
||||||
<< "<det_onnx_path> "
|
|
||||||
<< "<reg_onnx_path> "
|
|
||||||
<< "<image1>"
|
|
||||||
<< "<image2>\n";
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
String det_onnx_path = argv[1];
|
|
||||||
String reg_onnx_path = argv[2];
|
|
||||||
String image1_path = argv[3];
|
|
||||||
String image2_path = argv[4];
|
|
||||||
std::cout<<image1_path<<" "<<image2_path<<std::endl;
|
|
||||||
Mat image1 = imread(image1_path);
|
|
||||||
Mat image2 = imread(image2_path);
|
|
||||||
|
|
||||||
float score_thresh = 0.9f;
|
|
||||||
float nms_thresh = 0.3f;
|
|
||||||
double cosine_similar_thresh = 0.363;
|
|
||||||
double l2norm_similar_thresh = 1.128;
|
|
||||||
int top_k = 5000;
|
|
||||||
|
|
||||||
// Initialize FaceDetector
|
|
||||||
Ptr<FaceDetectorYN> faceDetector;
|
|
||||||
|
|
||||||
faceDetector = FaceDetectorYN::create(det_onnx_path, "", image1.size(), score_thresh, nms_thresh, top_k);
|
|
||||||
Mat faces_1;
|
|
||||||
faceDetector->detect(image1, faces_1);
|
|
||||||
if (faces_1.rows < 1)
|
|
||||||
{
|
|
||||||
std::cerr << "Cannot find a face in " << image1_path << "\n";
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
faceDetector = FaceDetectorYN::create(det_onnx_path, "", image2.size(), score_thresh, nms_thresh, top_k);
|
|
||||||
Mat faces_2;
|
|
||||||
faceDetector->detect(image2, faces_2);
|
|
||||||
if (faces_2.rows < 1)
|
|
||||||
{
|
|
||||||
std::cerr << "Cannot find a face in " << image2_path << "\n";
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize FaceRecognizerSF
|
|
||||||
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(reg_onnx_path, "");
|
|
||||||
|
|
||||||
|
|
||||||
Mat aligned_face1, aligned_face2;
|
|
||||||
faceRecognizer->alignCrop(image1, faces_1.row(0), aligned_face1);
|
|
||||||
faceRecognizer->alignCrop(image2, faces_2.row(0), aligned_face2);
|
|
||||||
|
|
||||||
Mat feature1, feature2;
|
|
||||||
faceRecognizer->feature(aligned_face1, feature1);
|
|
||||||
feature1 = feature1.clone();
|
|
||||||
faceRecognizer->feature(aligned_face2, feature2);
|
|
||||||
feature2 = feature2.clone();
|
|
||||||
|
|
||||||
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
|
|
||||||
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
|
|
||||||
|
|
||||||
if(cos_score >= cosine_similar_thresh)
|
|
||||||
{
|
|
||||||
std::cout << "They have the same identity;";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "They have different identities;";
|
|
||||||
}
|
|
||||||
std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
|
|
||||||
|
|
||||||
if(L2_score <= l2norm_similar_thresh)
|
|
||||||
{
|
|
||||||
std::cout << "They have the same identity;";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "They have different identities.";
|
|
||||||
}
|
|
||||||
std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,57 +0,0 @@
|
|||||||
import argparse
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import cv2 as cv
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image1.')
|
|
||||||
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image2.')
|
|
||||||
parser.add_argument('--face_detection_model', '-fd', type=str, help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
|
|
||||||
parser.add_argument('--face_recognition_model', '-fr', type=str, help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Read the input image
|
|
||||||
img1 = cv.imread(args.input1)
|
|
||||||
img2 = cv.imread(args.input2)
|
|
||||||
|
|
||||||
# Instantiate face detector and recognizer
|
|
||||||
detector = cv.FaceDetectorYN.create(
|
|
||||||
args.face_detection_model,
|
|
||||||
"",
|
|
||||||
(img1.shape[1], img1.shape[0])
|
|
||||||
)
|
|
||||||
recognizer = cv.FaceRecognizerSF.create(
|
|
||||||
args.face_recognition_model,
|
|
||||||
""
|
|
||||||
)
|
|
||||||
|
|
||||||
# Detect face
|
|
||||||
detector.setInputSize((img1.shape[1], img1.shape[0]))
|
|
||||||
face1 = detector.detect(img1)
|
|
||||||
detector.setInputSize((img2.shape[1], img2.shape[0]))
|
|
||||||
face2 = detector.detect(img2)
|
|
||||||
assert face1[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input1)
|
|
||||||
assert face2[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input2)
|
|
||||||
|
|
||||||
# Align faces
|
|
||||||
face1_align = recognizer.alignCrop(img1, face1[1][0])
|
|
||||||
face2_align = recognizer.alignCrop(img2, face2[1][0])
|
|
||||||
|
|
||||||
# Extract features
|
|
||||||
face1_feature = recognizer.feature(face1_align)
|
|
||||||
face2_feature = recognizer.feature(face2_align)
|
|
||||||
|
|
||||||
# Calculate distance (0: cosine, 1: L2)
|
|
||||||
cosine_similarity_threshold = 0.363
|
|
||||||
cosine_score = recognizer.match(face1_feature, face2_feature, 0)
|
|
||||||
msg = 'different identities'
|
|
||||||
if cosine_score >= cosine_similarity_threshold:
|
|
||||||
msg = 'the same identity'
|
|
||||||
print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
|
|
||||||
|
|
||||||
l2_similarity_threshold = 1.128
|
|
||||||
l2_score = recognizer.match(face1_feature, face2_feature, 1)
|
|
||||||
msg = 'different identities'
|
|
||||||
if l2_score <= l2_similarity_threshold:
|
|
||||||
msg = 'the same identity'
|
|
||||||
print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
|
|
Loading…
Reference in New Issue
Block a user