opencv/samples/dnn/models.yml
Gursimar Singh 073488896e
Merge pull request #25326 from gursimarsingh:improved_text_detection_sample
Improved and refactored text detection sample in dnn module #25326

Clean up samples: #25006

This pull requests merges and simplifies different text detection samples in dnn module of opencv in to one file. An option has been provided to choose the detection model from EAST or DB

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
2024-09-09 17:43:15 +03:00

351 lines
11 KiB
YAML

%YAML 1.0
---
################################################################################
# Object detection models.
################################################################################
# YOLOv8 object detection family from ultralytics (https://github.com/ultralytics/ultralytics)
# Might be used for all YOLOv8n YOLOv8s YOLOv8m YOLOv8l and YOLOv8x
yolov8x:
load_info:
url: "https://huggingface.co/cabelo/yolov8/resolve/main/yolov8x.onnx?download=true"
sha1: "462f15d668c046d38e27d3df01fe8142dd004cb4"
model: "yolov8x.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "yolo_detector"
yolov8s:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8s.onnx"
sha1: "82cd83984396fe929909ecb58212b0e86d0904b1"
model: "yolov8s.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "yolo_detector"
yolov8n:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
sha1: "68f864475d06e2ec4037181052739f268eeac38d"
model: "yolov8n.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "yolo_detector"
yolov8m:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8m.onnx"
sha1: "656ffeb4f3b067bc30df956728b5f9c61a4cb090"
model: "yolov8m.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "yolo_detector"
yolov8l:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8l.onnx"
sha1: "462df53ca3a85d110bf6be7fc2e2bb1277124395"
model: "yolov8l.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "yolo_detector"
# YOLO4 object detection family from Darknet (https://github.com/AlexeyAB/darknet)
# YOLO object detection family from Darknet (https://pjreddie.com/darknet/yolo/)
# Might be used for all YOLOv2, TinyYolov2, YOLOv3, YOLOv4 and TinyYolov4
yolov4:
load_info:
url: "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights"
sha1: "0143deb6c46fcc7f74dd35bf3c14edc3784e99ee"
model: "yolov4.weights"
config: "yolov4.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection"
yolov4-tiny:
load_info:
url: "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights"
sha1: "451caaab22fb9831aa1a5ee9b5ba74a35ffa5dcb"
model: "yolov4-tiny.weights"
config: "yolov4-tiny.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection"
yolov3:
load_info:
url: "https://pjreddie.com/media/files/yolov3.weights"
sha1: "520878f12e97cf820529daea502acca380f1cb8e"
model: "yolov3.weights"
config: "yolov3.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection"
tiny-yolo-voc:
load_info:
url: "https://pjreddie.com/media/files/yolov2-tiny-voc.weights"
sha1: "24b4bd049fc4fa5f5e95f684a8967e65c625dff9"
model: "tiny-yolo-voc.weights"
config: "tiny-yolo-voc.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
classes: "object_detection_classes_pascal_voc.txt"
background_label_id: 0
sample: "object_detection"
yolov8:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
sha1: "68f864475d06e2ec4037181052739f268eeac38d"
model: "yolov8n.onnx"
mean: [0, 0, 0]
scale: 0.00392
width: 640
height: 640
rgb: true
postprocessing: "yolov8"
classes: "object_detection_classes_yolo.txt"
sample: "object_detection"
# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD
ssd_caffe:
load_info:
url: "https://drive.google.com/uc?export=download&id=0B3gersZ2cHIxRm5PMWRoTkdHdHc"
sha1: "994d30a8afaa9e754d17d2373b2d62a7dfbaaf7a"
model: "MobileNetSSD_deploy.caffemodel"
config: "MobileNetSSD_deploy.prototxt"
mean: [127.5, 127.5, 127.5]
scale: 0.007843
width: 300
height: 300
rgb: false
classes: "object_detection_classes_pascal_voc.txt"
sample: "object_detection"
# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection
ssd_tf:
load_info:
url: "http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz"
sha1: "9e4bcdd98f4c6572747679e4ce570de4f03a70e2"
download_sha: "6157ddb6da55db2da89dd561eceb7f944928e317"
download_name: "ssd_mobilenet_v1_coco_2017_11_17.tar.gz"
member: "ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb"
model: "ssd_mobilenet_v1_coco_2017_11_17.pb"
config: "ssd_mobilenet_v1_coco_2017_11_17.pbtxt"
mean: [0, 0, 0]
scale: 1.0
width: 300
height: 300
rgb: true
classes: "object_detection_classes_coco.txt"
sample: "object_detection"
# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
faster_rcnn_tf:
load_info:
url: "http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"
sha1: "f2e4bf386b9bb3e25ddfcbbd382c20f417e444f3"
download_sha: "c710f25e5c6a3ce85fe793d5bf266d581ab1c230"
download_name: "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"
member: "faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
model: "faster_rcnn_inception_v2_coco_2018_01_28.pb"
config: "faster_rcnn_inception_v2_coco_2018_01_28.pbtxt"
mean: [0, 0, 0]
scale: 1.0
width: 800
height: 600
rgb: true
sample: "object_detection"
################################################################################
# Image classification models.
################################################################################
squeezenet:
load_info:
url: "https://github.com/onnx/models/raw/main/validated/vision/classification/squeezenet/model/squeezenet1.1-7.onnx?download="
sha1: "ec31942d17715941bb9b81f3a91dc59def9236be"
model: "squeezenet1.1-7.onnx"
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
scale: 0.003921
width: 224
height: 224
rgb: true
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
googlenet:
load_info:
url: "https://github.com/onnx/models/raw/69c5d3751dda5349fd3fc53f525395d180420c07/vision/classification/inception_and_googlenet/googlenet/model/googlenet-8.onnx"
sha1: "da39a3ee5e6b4b0d3255bfef95601890afd80709"
model: "googlenet-8.onnx"
mean: [103.939, 116.779, 123.675]
std: [1, 1, 1]
scale: 1.0
width: 224
height: 224
rgb: false
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
resnet:
load_info:
url: "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx"
sha1: "c3a67b3cb2f0a61a7eb75eb8bd9139c89557cbe0"
model: "resnet50-v2-7.onnx"
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
scale: 1.0
width: 224
height: 224
rgb: true
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
################################################################################
# Semantic segmentation models.
################################################################################
fcnresnet50:
load_info:
url: "https://github.com/onnx/models/raw/491ce05590abb7551d7fae43c067c060eeb575a6/validated/vision/object_detection_segmentation/fcn/model/fcn-resnet50-12.onnx"
sha1: "1bb0c7e0034038969aecc6251166f1612a139230"
model: "fcn-resnet50-12.onnx"
mean: [103.5, 116.2, 123.6]
scale: 0.019
width: 500
height: 500
rgb: false
sample: "segmentation"
fcnresnet101:
load_info:
url: "https://github.com/onnx/models/raw/fb8271d5d5d9b90dbb1eb5e8e40f8f580fb248b3/vision/object_detection_segmentation/fcn/model/fcn-resnet101-11.onnx"
sha1: "e7e76474bf6b73334ab32c4be1374c9e605f5aed"
model: "fcn-resnet101-11.onnx"
mean: [103.5, 116.2, 123.6]
scale: 0.019
width: 500
height: 500
rgb: false
sample: "segmentation"
u2netp:
load_info:
url: "https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2netp.onnx"
sha1: "0a99236f0d5c1916a99a8c401b23e5ef32038606"
model: "u2netp.onnx"
mean: [123.6, 116.2, 103.5]
scale: 0.019
width: 320
height: 320
rgb: true
sample: "segmentation"
################################################################################
# Text detection models.
################################################################################
DB:
load_info:
url: "https://drive.google.com/uc?export=dowload&id=17_ABp79PlFt9yPCxSaarVc_DKTmrSGGf"
sha1: "bef233c28947ef6ec8c663d20a2b326302421fa3"
model: "DB_IC15_resnet50.onnx"
ocr_load_info:
ocr_url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
ocr_sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
ocr_model: "VGG_CTC.onnx"
mean: [122.67891434, 116.66876762, 104.00698793]
scale: 0.00392
width: 736
height: 736
rgb: false
sample: "text_detection"
East:
load_info:
url: "https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1"
sha1: "fffabf5ac36f37bddf68e34e84b45f5c4247ed06"
download_name: "frozen_east_text_detection.tar.gz"
download_sha: "3ca8233d6edd748f7ed23246c8ca24cbf696bb94"
model: "frozen_east_text_detection.pb"
ocr_load_info:
ocr_url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
ocr_sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
ocr_model: "VGG_CTC.onnx"
mean: [123.68, 116.78, 103.94]
scale: 1.0
width: 736
height: 736
rgb: false
sample: "text_detection"
OCR:
load_info:
url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
model: "VGG_CTC.onnx"
sample: "text_recognition"
# Edge Detection models.
################################################################################
dexined:
load_info:
url: "https://github.com/gursimarsingh/opencv_zoo/raw/dexined_model/models/edge_detection_dexined/dexined.onnx"
sha1: "f86f2d32c3cf892771f76b5e6b629b16a66510e9"
model: "dexined.onnx"
mean: [103.5, 116.2, 123.6]
scale: 1.0
width: 512
height: 512
rgb: false
sample: "edge_detection"