Merge pull request #20957 from sturkmen72:update-documentation

Update documentation

* Update DNN-based Face Detection And Recognition tutorial

* samples(dnn/face): update face_detect.cpp

* final changes

Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
This commit is contained in:
Suleyman TURKMEN 2021-11-28 15:56:28 +03:00 committed by GitHub
parent b594ed99b8
commit a97f21ba4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 333 additions and 275 deletions

View File

@ -36,14 +36,34 @@ There are two models (ONNX format) pre-trained and required for this module:
### DNNFaceDetector ### DNNFaceDetector
```cpp @add_toggle_cpp
// Initialize FaceDetectorYN - **Downloadable code**: Click
Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(onnx_path, "", image.size(), score_thresh, nms_thresh, top_k); [here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.cpp)
// Forward - **Code at glance:**
Mat faces; @include samples/dnn/face_detect.cpp
faceDetector->detect(image, faces); @end_toggle
```
@add_toggle_python
- **Downloadable code**: Click
[here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.py)
- **Code at glance:**
@include samples/dnn/face_detect.py
@end_toggle
Explanation
-----------
@add_toggle_cpp
@snippet dnn/face_detect.cpp initialize_FaceDetectorYN
@snippet dnn/face_detect.cpp inference
@end_toggle
@add_toggle_python
@snippet dnn/face_detect.py initialize_FaceDetectorYN
@snippet dnn/face_detect.py inference
@end_toggle
The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows: The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows:
@ -57,28 +77,25 @@ x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm
Following Face Detection, run codes below to extract face feature from facial image. Following Face Detection, run codes below to extract face feature from facial image.
```cpp @add_toggle_cpp
// Initialize FaceRecognizerSF with model path (cv::String) @snippet dnn/face_detect.cpp initialize_FaceRecognizerSF
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(model_path, ""); @snippet dnn/face_detect.cpp facerecognizer
@end_toggle
// Aligning and cropping facial image through the first face of faces detected by dnn_face::DNNFaceDetector @add_toggle_python
Mat aligned_face; @snippet dnn/face_detect.py initialize_FaceRecognizerSF
faceRecognizer->alignCrop(image, faces.row(0), aligned_face); @snippet dnn/face_detect.py facerecognizer
@end_toggle
// Run feature extraction with given aligned_face (cv::Mat)
Mat feature;
faceRecognizer->feature(aligned_face, feature);
feature = feature.clone();
```
After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces. After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces.
```cpp @add_toggle_cpp
// Calculating the discrepancy between two face features by using cosine distance. @snippet dnn/face_detect.cpp match
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::COSINE); @end_toggle
// Calculating the discrepancy between two face features by using normL2 distance.
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::NORM_L2); @add_toggle_python
``` @snippet dnn/face_detect.py match
@end_toggle
For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128. For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128.

View File

@ -8,125 +8,272 @@
using namespace cv; using namespace cv;
using namespace std; using namespace std;
static Mat visualize(Mat input, Mat faces, int thickness=2) static
void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
{ {
Mat output = input.clone(); std::string fpsString = cv::format("FPS : %.2f", (float)fps);
if (frame >= 0)
cout << "Frame " << frame << ", ";
cout << "FPS: " << fpsString << endl;
for (int i = 0; i < faces.rows; i++) for (int i = 0; i < faces.rows; i++)
{ {
// Print results // Print results
cout << "Face " << i cout << "Face " << i
<< ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), " << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
<< "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", " << "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
<< "score: " << faces.at<float>(i, 14) << "\n"; << "score: " << cv::format("%.2f", faces.at<float>(i, 14))
<< endl;
// Draw bounding box // Draw bounding box
rectangle(output, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness); rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
// Draw landmarks // Draw landmarks
circle(output, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness); circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
circle(output, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar( 0, 0, 255), thickness); circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
circle(output, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar( 0, 255, 0), thickness); circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
circle(output, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness); circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
circle(output, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar( 0, 255, 255), thickness); circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
} }
return output; putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
} }
int main(int argc, char ** argv) int main(int argc, char** argv)
{ {
CommandLineParser parser(argc, argv, CommandLineParser parser(argc, argv,
"{help h | | Print this message.}" "{help h | | Print this message}"
"{input i | | Path to the input image. Omit for detecting on default camera.}" "{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}"
"{model m | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.}" "{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
"{score_threshold | 0.9 | Filter out faces of score < score_threshold.}" "{video v | 0 | Path to the input video}"
"{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold.}" "{scale sc | 1.0 | Scale factor used to resize input video frames}"
"{top_k | 5000 | Keep top_k bounding boxes before NMS.}" "{fd_model fd | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }"
"{save s | false | Set true to save results. This flag is invalid when using camera.}" "{fr_model fr | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}"
"{vis v | true | Set true to open a window for result visualization. This flag is invalid when using camera.}" "{score_threshold | 0.9 | Filter out faces of score < score_threshold}"
"{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}"
"{top_k | 5000 | Keep top_k bounding boxes before NMS}"
"{save s | false | Set true to save results. This flag is invalid when using camera}"
); );
if (argc == 1 || parser.has("help")) if (parser.has("help"))
{ {
parser.printMessage(); parser.printMessage();
return -1; return 0;
} }
String modelPath = parser.get<String>("model"); String fd_modelPath = parser.get<String>("fd_model");
String fr_modelPath = parser.get<String>("fr_model");
float scoreThreshold = parser.get<float>("score_threshold"); float scoreThreshold = parser.get<float>("score_threshold");
float nmsThreshold = parser.get<float>("nms_threshold"); float nmsThreshold = parser.get<float>("nms_threshold");
int topK = parser.get<int>("top_k"); int topK = parser.get<int>("top_k");
bool save = parser.get<bool>("save"); bool save = parser.get<bool>("save");
bool vis = parser.get<bool>("vis");
double cosine_similar_thresh = 0.363;
double l2norm_similar_thresh = 1.128;
//! [initialize_FaceDetectorYN]
// Initialize FaceDetectorYN // Initialize FaceDetectorYN
Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK); Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
//! [initialize_FaceDetectorYN]
TickMeter tm;
// If input is an image // If input is an image
if (parser.has("input")) if (parser.has("image1"))
{ {
String input = parser.get<String>("input"); String input1 = parser.get<String>("image1");
Mat image = imread(input); Mat image1 = imread(samples::findFile(input1));
if (image1.empty())
{
std::cerr << "Cannot read image: " << input1 << std::endl;
return 2;
}
tm.start();
//! [inference]
// Set input size before inference // Set input size before inference
detector->setInputSize(image.size()); detector->setInputSize(image1.size());
// Inference Mat faces1;
Mat faces; detector->detect(image1, faces1);
detector->detect(image, faces); if (faces1.rows < 1)
{
std::cerr << "Cannot find a face in " << input1 << std::endl;
return 1;
}
//! [inference]
tm.stop();
// Draw results on the input image // Draw results on the input image
Mat result = visualize(image, faces); visualize(image1, -1, faces1, tm.getFPS());
// Save results if save is true // Save results if save is true
if(save) if (save)
{ {
cout << "Results saved to result.jpg\n"; cout << "Saving result.jpg...\n";
imwrite("result.jpg", result); imwrite("result.jpg", image1);
} }
// Visualize results // Visualize results
if (vis) imshow("image1", image1);
pollKey(); // handle UI events to show content
if (parser.has("image2"))
{ {
namedWindow(input, WINDOW_AUTOSIZE); String input2 = parser.get<String>("image2");
imshow(input, result); Mat image2 = imread(samples::findFile(input2));
waitKey(0); if (image2.empty())
{
std::cerr << "Cannot read image2: " << input2 << std::endl;
return 2;
}
tm.reset();
tm.start();
detector->setInputSize(image2.size());
Mat faces2;
detector->detect(image2, faces2);
if (faces2.rows < 1)
{
std::cerr << "Cannot find a face in " << input2 << std::endl;
return 1;
}
tm.stop();
visualize(image2, -1, faces2, tm.getFPS());
if (save)
{
cout << "Saving result2.jpg...\n";
imwrite("result2.jpg", image2);
}
imshow("image2", image2);
pollKey();
//! [initialize_FaceRecognizerSF]
// Initialize FaceRecognizerSF
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
//! [initialize_FaceRecognizerSF]
//! [facerecognizer]
// Aligning and cropping facial image through the first face of faces detected.
Mat aligned_face1, aligned_face2;
faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
// Run feature extraction with given aligned_face
Mat feature1, feature2;
faceRecognizer->feature(aligned_face1, feature1);
feature1 = feature1.clone();
faceRecognizer->feature(aligned_face2, feature2);
feature2 = feature2.clone();
//! [facerecognizer]
//! [match]
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
//! [match]
if (cos_score >= cosine_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities;";
}
std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
if (L2_score <= l2norm_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities.";
}
std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
} }
cout << "Press any key to exit..." << endl;
waitKey(0);
} }
else else
{ {
int deviceId = 0; int frameWidth, frameHeight;
VideoCapture cap; float scale = parser.get<float>("scale");
cap.open(deviceId, CAP_ANY); VideoCapture capture;
int frameWidth = int(cap.get(CAP_PROP_FRAME_WIDTH)); std::string video = parser.get<string>("video");
int frameHeight = int(cap.get(CAP_PROP_FRAME_HEIGHT)); if (video.size() == 1 && isdigit(video[0]))
capture.open(parser.get<int>("video"));
else
capture.open(samples::findFileOrKeep(video)); // keep GStreamer pipelines
if (capture.isOpened())
{
frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
cout << "Video " << video
<< ": width=" << frameWidth
<< ", height=" << frameHeight
<< endl;
}
else
{
cout << "Could not initialize video capturing: " << video << "\n";
return 1;
}
detector->setInputSize(Size(frameWidth, frameHeight)); detector->setInputSize(Size(frameWidth, frameHeight));
Mat frame; cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
TickMeter tm; int nFrame = 0;
String msg = "FPS: "; for (;;)
while(waitKey(1) < 0) // Press any key to exit
{ {
// Get frame // Get frame
if (!cap.read(frame)) Mat frame;
if (!capture.read(frame))
{ {
cerr << "No frames grabbed!\n"; cerr << "Can't grab frame! Stop\n";
break; break;
} }
resize(frame, frame, Size(frameWidth, frameHeight));
// Inference // Inference
Mat faces; Mat faces;
tm.start(); tm.start();
detector->detect(frame, faces); detector->detect(frame, faces);
tm.stop(); tm.stop();
Mat result = frame.clone();
// Draw results on the input image // Draw results on the input image
Mat result = visualize(frame, faces); visualize(result, nFrame, faces, tm.getFPS());
putText(result, msg + to_string(tm.getFPS()), Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
// Visualize results // Visualize results
imshow("Live", result); imshow("Live", result);
tm.reset(); int key = waitKey(1);
bool saveFrame = save;
if (key == ' ')
{
saveFrame = true;
key = 0; // handled
}
if (saveFrame)
{
std::string frame_name = cv::format("frame_%05d.png", nFrame);
std::string result_name = cv::format("result_%05d.jpg", nFrame);
cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
imwrite(frame_name, frame);
imwrite(result_name, result);
}
++nFrame;
if (key > 0)
break;
} }
cout << "Processed " << nFrame << " frames" << endl;
} }
} cout << "Done." << endl;
return 0;
}

View File

@ -12,90 +12,144 @@ def str2bool(v):
raise NotImplementedError raise NotImplementedError
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', type=str, help='Path to the input image.') parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.')
parser.add_argument('--model', '-m', type=str, default='yunet.onnx', help='Path to the model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.') parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.') parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
args = parser.parse_args() args = parser.parse_args()
def visualize(input, faces, thickness=2): def visualize(input, faces, fps, thickness=2):
output = input.copy()
if faces[1] is not None: if faces[1] is not None:
for idx, face in enumerate(faces[1]): for idx, face in enumerate(faces[1]):
print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1])) print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))
coords = face[:-1].astype(np.int32) coords = face[:-1].astype(np.int32)
cv.rectangle(output, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), 2) cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)
cv.circle(output, (coords[4], coords[5]), 2, (255, 0, 0), 2) cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)
cv.circle(output, (coords[6], coords[7]), 2, (0, 0, 255), 2) cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)
cv.circle(output, (coords[8], coords[9]), 2, (0, 255, 0), 2) cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)
cv.circle(output, (coords[10], coords[11]), 2, (255, 0, 255), 2) cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)
cv.circle(output, (coords[12], coords[13]), 2, (0, 255, 255), 2) cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)
return output cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
if __name__ == '__main__': if __name__ == '__main__':
# Instantiate FaceDetectorYN ## [initialize_FaceDetectorYN]
detector = cv.FaceDetectorYN.create( detector = cv.FaceDetectorYN.create(
args.model, args.face_detection_model,
"", "",
(320, 320), (320, 320),
args.score_threshold, args.score_threshold,
args.nms_threshold, args.nms_threshold,
args.top_k args.top_k
) )
## [initialize_FaceDetectorYN]
tm = cv.TickMeter()
# If input is an image # If input is an image
if args.input is not None: if args.image1 is not None:
image = cv.imread(args.input) img1 = cv.imread(cv.samples.findFile(args.image1))
tm.start()
## [inference]
# Set input size before inference # Set input size before inference
detector.setInputSize((image.shape[1], image.shape[0])) detector.setInputSize((img1.shape[1], img1.shape[0]))
# Inference faces1 = detector.detect(img1)
faces = detector.detect(image) ## [inference]
tm.stop()
assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1)
# Draw results on the input image # Draw results on the input image
result = visualize(image, faces) visualize(img1, faces1, tm.getFPS())
# Save results if save is true # Save results if save is true
if args.save: if args.save:
print('Resutls saved to result.jpg\n') print('Results saved to result.jpg\n')
cv.imwrite('result.jpg', result) cv.imwrite('result.jpg', img1)
# Visualize results in a new window # Visualize results in a new window
if args.vis: cv.imshow("image1", img1)
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
cv.imshow(args.input, result) if args.image2 is not None:
cv.waitKey(0) img2 = cv.imread(cv.samples.findFile(args.image2))
tm.reset()
tm.start()
detector.setInputSize((img2.shape[1], img2.shape[0]))
faces2 = detector.detect(img2)
tm.stop()
assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2)
visualize(img2, faces2, tm.getFPS())
cv.imshow("image2", img2)
## [initialize_FaceRecognizerSF]
recognizer = cv.FaceRecognizerSF.create(
args.face_recognition_model,"")
## [initialize_FaceRecognizerSF]
## [facerecognizer]
# Align faces
face1_align = recognizer.alignCrop(img1, faces1[1][0])
face2_align = recognizer.alignCrop(img2, faces2[1][0])
# Extract features
face1_feature = recognizer.feature(face1_align)
face2_feature = recognizer.feature(face2_align)
## [facerecognizer]
cosine_similarity_threshold = 0.363
l2_similarity_threshold = 1.128
## [match]
cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
## [match]
msg = 'different identities'
if cosine_score >= cosine_similarity_threshold:
msg = 'the same identity'
print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
msg = 'different identities'
if l2_score <= l2_similarity_threshold:
msg = 'the same identity'
print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
cv.waitKey(0)
else: # Omit input to call default camera else: # Omit input to call default camera
deviceId = 0 if args.video is not None:
deviceId = args.video
else:
deviceId = 0
cap = cv.VideoCapture(deviceId) cap = cv.VideoCapture(deviceId)
frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)
frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)
detector.setInputSize([frameWidth, frameHeight]) detector.setInputSize([frameWidth, frameHeight])
tm = cv.TickMeter()
while cv.waitKey(1) < 0: while cv.waitKey(1) < 0:
hasFrame, frame = cap.read() hasFrame, frame = cap.read()
if not hasFrame: if not hasFrame:
print('No frames grabbed!') print('No frames grabbed!')
break break
frame = cv.resize(frame, (frameWidth, frameHeight))
# Inference # Inference
tm.start() tm.start()
faces = detector.detect(frame) # faces is a tuple faces = detector.detect(frame) # faces is a tuple
tm.stop() tm.stop()
# Draw results on the input image # Draw results on the input image
frame = visualize(frame, faces) visualize(frame, faces, tm.getFPS())
cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) # Visualize results
# Visualize results in a new Window
cv.imshow('Live', frame) cv.imshow('Live', frame)
cv.destroyAllWindows()
tm.reset()

View File

@ -1,103 +0,0 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "opencv2/dnn.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
#include "opencv2/objdetect.hpp"
using namespace cv;
using namespace std;
int main(int argc, char ** argv)
{
if (argc != 5)
{
std::cerr << "Usage " << argv[0] << ": "
<< "<det_onnx_path> "
<< "<reg_onnx_path> "
<< "<image1>"
<< "<image2>\n";
return -1;
}
String det_onnx_path = argv[1];
String reg_onnx_path = argv[2];
String image1_path = argv[3];
String image2_path = argv[4];
std::cout<<image1_path<<" "<<image2_path<<std::endl;
Mat image1 = imread(image1_path);
Mat image2 = imread(image2_path);
float score_thresh = 0.9f;
float nms_thresh = 0.3f;
double cosine_similar_thresh = 0.363;
double l2norm_similar_thresh = 1.128;
int top_k = 5000;
// Initialize FaceDetector
Ptr<FaceDetectorYN> faceDetector;
faceDetector = FaceDetectorYN::create(det_onnx_path, "", image1.size(), score_thresh, nms_thresh, top_k);
Mat faces_1;
faceDetector->detect(image1, faces_1);
if (faces_1.rows < 1)
{
std::cerr << "Cannot find a face in " << image1_path << "\n";
return -1;
}
faceDetector = FaceDetectorYN::create(det_onnx_path, "", image2.size(), score_thresh, nms_thresh, top_k);
Mat faces_2;
faceDetector->detect(image2, faces_2);
if (faces_2.rows < 1)
{
std::cerr << "Cannot find a face in " << image2_path << "\n";
return -1;
}
// Initialize FaceRecognizerSF
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(reg_onnx_path, "");
Mat aligned_face1, aligned_face2;
faceRecognizer->alignCrop(image1, faces_1.row(0), aligned_face1);
faceRecognizer->alignCrop(image2, faces_2.row(0), aligned_face2);
Mat feature1, feature2;
faceRecognizer->feature(aligned_face1, feature1);
feature1 = feature1.clone();
faceRecognizer->feature(aligned_face2, feature2);
feature2 = feature2.clone();
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
if(cos_score >= cosine_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities;";
}
std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
if(L2_score <= l2norm_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities.";
}
std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
return 0;
}

View File

@ -1,57 +0,0 @@
import argparse
import numpy as np
import cv2 as cv
parser = argparse.ArgumentParser()
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image1.')
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image2.')
parser.add_argument('--face_detection_model', '-fd', type=str, help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
parser.add_argument('--face_recognition_model', '-fr', type=str, help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
args = parser.parse_args()
# Read the input image
img1 = cv.imread(args.input1)
img2 = cv.imread(args.input2)
# Instantiate face detector and recognizer
detector = cv.FaceDetectorYN.create(
args.face_detection_model,
"",
(img1.shape[1], img1.shape[0])
)
recognizer = cv.FaceRecognizerSF.create(
args.face_recognition_model,
""
)
# Detect face
detector.setInputSize((img1.shape[1], img1.shape[0]))
face1 = detector.detect(img1)
detector.setInputSize((img2.shape[1], img2.shape[0]))
face2 = detector.detect(img2)
assert face1[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input1)
assert face2[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input2)
# Align faces
face1_align = recognizer.alignCrop(img1, face1[1][0])
face2_align = recognizer.alignCrop(img2, face2[1][0])
# Extract features
face1_feature = recognizer.feature(face1_align)
face2_feature = recognizer.feature(face2_align)
# Calculate distance (0: cosine, 1: L2)
cosine_similarity_threshold = 0.363
cosine_score = recognizer.match(face1_feature, face2_feature, 0)
msg = 'different identities'
if cosine_score >= cosine_similarity_threshold:
msg = 'the same identity'
print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
l2_similarity_threshold = 1.128
l2_score = recognizer.match(face1_feature, face2_feature, 1)
msg = 'different identities'
if l2_score <= l2_similarity_threshold:
msg = 'the same identity'
print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))