Merge pull request #9516 from arrybn:ssd_face_detector

This commit is contained in:
Alexander Alekhin 2017-08-31 11:43:04 +00:00
commit f6265500fb
8 changed files with 5705 additions and 0 deletions

View File

@ -8,6 +8,27 @@ ocv_check_dependencies(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})
if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
project(dnn_samples)
# Model branch name: dnn_samples_face_detector_20170830
set(DNN_FACE_DETECTOR_MODEL_COMMIT "b2bfc75f6aea5b1f834ff0f0b865a7c18ff1459f")
set(DNN_FACE_DETECTOR_MODEL_HASH "afbb6037fd180e8d2acb3b58ca737b9e")
set(DNN_FACE_DETECTOR_MODEL_NAME "res10_300x300_ssd_iter_140000.caffemodel")
set(DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR "${CMAKE_CURRENT_LIST_DIR}/face_detector")
if(COMMAND ocv_download)
ocv_download(FILENAME ${DNN_FACE_DETECTOR_MODEL_NAME}
HASH ${DNN_FACE_DETECTOR_MODEL_HASH}
URL
"$ENV{OPENCV_DNN_MODELS_URL}"
"${OPENCV_DNN_MODELS_URL}"
"https://raw.githubusercontent.com/opencv/opencv_3rdparty/${DNN_FACE_DETECTOR_MODEL_COMMIT}/"
DESTINATION_DIR ${DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR}
ID DNN_FACE_DETECTOR
RELATIVE_URL
STATUS res)
endif()
ocv_include_directories("${OpenCV_SOURCE_DIR}/include")
ocv_include_modules_recurse(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})

1
samples/dnn/face_detector/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
res10_300x300_ssd_iter_140000.caffemodel

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,79 @@
This is a brief description of training process which has been used to get res10_300x300_ssd_iter_140000.caffemodel.
The model was created with SSD framework using ResNet-10 like architecture as a backbone. Channels count in ResNet-10 convolution layers was significantly dropped (2x- or 4x- fewer channels).
The model was trained in Caffe framework on some huge and avaliable online dataset.
1. Prepare training tools
You need to use "ssd" branch from this repository https://github.com/weiliu89/caffe/tree/ssd . Checkout this branch and built it (see instructions in repo's README)
2. Prepare training data.
The data preparation pipeline can be represented as:
(a)Download original face detection dataset -> (b)Convert annotation to the PASCAL VOC format -> (c)Create LMDB database with images + annotations for training
a) Find some datasets with face bounding boxes annotation. For some reasons I can't provide links here, but you easily find them on your own. Also study the data. It may contain small or low quality faces which can spoil training process. Often there are special flags about object quality in annotation. Remove such faces from annotation (smaller when 16 along at least one side, or blurred, of highly-occluded, or something else).
b) The downloaded dataset will have some format of annotation. It may be one single file for all images, or separate file for each image or something else. But to train SSD in Caffe you need to convert annotation to PASCAL VOC format.
PASCAL VOC annoitation consist of .xml file for each image. In this xml file all face bounding boxes should be listed as:
<annotation>
<size>
<width>300</width>
<height>300</height>
</size>
<object>
<name>face</name>
<difficult>0</difficult>
<bndbox>
<xmin>100</xmin>
<ymin>100</ymin>
<xmax>200</xmax>
<ymax>200</ymax>
</bndbox>
</object>
<object>
<name>face</name>
<difficult>0</difficult>
<bndbox>
<xmin>0</xmin>
<ymin>0</ymin>
<xmax>100</xmax>
<ymax>100</ymax>
</bndbox>
</object>
</annotation>
So, convert your dataset's annotation to the fourmat above.
Also, you should create labelmap.prototxt file with the following content:
item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "face"
label: 1
display_name: "face"
}
You need this file to establish correspondence between name of class and digital label of class.
For next step we also need file there all our image-annotation file names pairs are listed. This file should contain similar lines:
images_val/0.jpg annotations_val/0.jpg.xml
c) To create LMDB you need to use create_data.sh tool from caffe/data/VOC0712 Caffe's source code directory.
This script calls create_annoset.py inside, so check out what you need to pass as script's arguments
You need to prepare 2 LMDB databases: one for training images, one for validation images.
3. Train your detector
For training you need to have 3 files: train.prototxt, test.prototxt and solver.prototxt. You can find these files in the same directory as for this readme.
Also you need to edit train.prototxt and test.prototxt to replace paths for your LMDB databases to actual databases you've crated in step 2.
Now all is done for launch training process.
Execute next lines in Terminal:
mkdir -p snapshot
mkdir -p log
/path_for_caffe_build_dir/tools/caffe train -solver="solver.prototxt" -gpu 0 2>&1 | tee -a log/log.log
And wait. It will take about 8 hours to finish the process.
After it you can use your .caffemodel from snapshot/ subdirectory in resnet_face_ssd_python.py sample.

View File

@ -0,0 +1,28 @@
train_net: "train.prototxt"
test_net: "test.prototxt"
test_iter: 2312
test_interval: 5000
test_initialization: true
base_lr: 0.01
display: 10
lr_policy: "multistep"
max_iter: 140000
stepvalue: 80000
stepvalue: 120000
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
average_loss: 500
iter_size: 1
type: "SGD"
solver_mode: GPU
random_seed: 0
debug_info: false
snapshot: 1000
snapshot_prefix: "snapshot/res10_300x300_ssd"
eval_type: "detection"
ap_version: "11point"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
import numpy as np
import argparse
import os
import sys
sys.path.append('/home/arrybn/build/opencv/lib')
import cv2 as cv
try:
import cv2 as cv
except ImportError:
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, '
'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)')
from cv2 import dnn
inWidth = 300
inHeight = 300
confThreshold = 0.5
prototxt = 'face_detector/deploy.prototxt'
caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'
if __name__ == '__main__':
net = dnn.readNetFromCaffe(prototxt, caffemodel)
cap = cv.VideoCapture(0)
while True:
ret, frame = cap.read()
cols = frame.shape[1]
rows = frame.shape[0]
net.setInput(dnn.blobFromImage(cv.resize(frame, (inWidth, inHeight)),
1.0, (inWidth, inHeight), (104., 177., 123.)))
detections = net.forward()
perf_stats = net.getPerfProfile()
print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000))
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confThreshold:
xLeftBottom = int(detections[0, 0, i, 3] * cols)
yLeftBottom = int(detections[0, 0, i, 4] * rows)
xRightTop = int(detections[0, 0, i, 5] * cols)
yRightTop = int(detections[0, 0, i, 6] * rows)
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
(0, 255, 0))
label = "face: %.4f" % confidence
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
(xLeftBottom + labelSize[0], yLeftBottom + baseLine),
(255, 255, 255), cv.FILLED)
cv.putText(frame, label, (xLeftBottom, yLeftBottom),
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
cv.imshow("detections", frame)
if cv.waitKey(1) != -1:
break