opencv/samples/dnn/models.yml
Gursimar Singh b605fc13d8
Extension to PR #26605 ldm inpainting sample (#26904)
Extension to PR #26605 ldm inpainting sample #26904

This PR adds and fixes following points in the ldm_inpainting sample on top of original PR #26605 by @Abdurrahheem 

DONE:

1. Added functionality to load models from a YAML configuration file, allowing for automatic downloading if models are not found locally.
2. Updated the script usage instructions to reflect the correct command format.
3. Improved user interaction by adding instructions to the image window for inpainting controls.
4. Introduced a new models.yml configuration section for inpainting models weights downloading, including placeholders for model SHA1 checksums.
5. Fixed input types and names of the onnx graph generation.
6. Added links to onnx graphs in models.yml
7. Support added for findModels and standarized the sample usage similar to other dnn samples
8. Fixes issue in download_models.py for downloading models from dl.opencv.org
9. Fixes issue in common.py which used to print duplicated positional arguments in case of samples that use multiple models. 

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake

---------

Co-authored-by: Abdurrahheem <abduragim.shtanchaev@xperience.ai>
2025-02-11 17:58:39 +03:00

480 lines
17 KiB
YAML

%YAML 1.0
---
################################################################################
# Object detection models.
################################################################################
# YOLOv8 object detection family from ultralytics (https://github.com/ultralytics/ultralytics)
# Might be used for all YOLOv8n YOLOv8s YOLOv8m YOLOv8l and YOLOv8x
yolov8x:
load_info:
url: "https://huggingface.co/cabelo/yolov8/resolve/main/yolov8x.onnx?download=true"
sha1: "462f15d668c046d38e27d3df01fe8142dd004cb4"
model: "yolov8x.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov8"
sample: "object_detection"
yolov8s:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8s.onnx"
sha1: "82cd83984396fe929909ecb58212b0e86d0904b1"
model: "yolov8s.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov8"
sample: "object_detection"
yolov8:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
sha1: "68f864475d06e2ec4037181052739f268eeac38d"
model: "yolov8n.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov8"
sample: "object_detection"
yolov8m:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8m.onnx"
sha1: "656ffeb4f3b067bc30df956728b5f9c61a4cb090"
model: "yolov8m.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov8"
sample: "object_detection"
yolov8l:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8l.onnx"
sha1: "462df53ca3a85d110bf6be7fc2e2bb1277124395"
model: "yolov8l.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov8"
sample: "object_detection"
# YOLOv5 object detection family from ultralytics (https://github.com/ultralytics/ultralytics)
# Might be used for all YOLOv5n YOLOv5s YOLOv5m YOLOv5l and YOLOv5x
yolov5l:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov5l.onnx"
sha1: "9de7e54c524b7fe7577bbd4cdbbdaed53375c8f1"
model: "yolov5l.onnx"
mean: 0.0
scale: 0.00392
width: 640
height: 640
rgb: true
labels: "object_detection_classes_yolo.txt"
postprocessing: "yolov5"
sample: "object_detection"
# YOLO4 object detection family from Darknet (https://github.com/AlexeyAB/darknet)
# YOLO object detection family from Darknet (https://pjreddie.com/darknet/yolo/)
# Might be used for all YOLOv2, TinyYolov2, YOLOv3, YOLOv4 and TinyYolov4
yolov4:
load_info:
url: "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights"
sha1: "0143deb6c46fcc7f74dd35bf3c14edc3784e99ee"
model: "yolov4.weights"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/yolov4.cfg"
sha1: "ed0aeace88527af7524c3baf66ca44fbf049b878"
config: "yolov4.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
labels: "object_detection_classes_yolo.txt"
background_label_id: 0
postprocessing: "darknet"
sample: "object_detection"
yolov4-tiny:
load_info:
url: "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights"
sha1: "451caaab22fb9831aa1a5ee9b5ba74a35ffa5dcb"
model: "yolov4-tiny.weights"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/yolov4-tiny-2020-12.cfg"
sha1: "b161c2b0984b0c3b466c04b0d6cb3e52f06d93dd"
config: "yolov4-tiny-2020-12.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
labels: "object_detection_classes_yolo.txt"
background_label_id: 0
postprocessing: "darknet"
sample: "object_detection"
yolov3:
load_info:
url: "https://pjreddie.com/media/files/yolov3.weights"
sha1: "520878f12e97cf820529daea502acca380f1cb8e"
model: "yolov3.weights"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/yolov3.cfg"
sha1: "caaf16a895b7bae3cd5c042199d1df0269f3dce6"
config: "yolov3.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
labels: "object_detection_classes_yolo.txt"
background_label_id: 0
postprocessing: "darknet"
sample: "object_detection"
tiny-yolo-voc:
load_info:
url: "https://pjreddie.com/media/files/yolov2-tiny-voc.weights"
sha1: "24b4bd049fc4fa5f5e95f684a8967e65c625dff9"
model: "tiny-yolo-voc.weights"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/tiny-yolo-voc.cfg"
sha1: "d26e2408ce4e20136278411760ba904d744fe5b5"
config: "tiny-yolo-voc.cfg"
mean: [0, 0, 0]
scale: 0.00392
width: 416
height: 416
rgb: true
labels: "object_detection_classes_pascal_voc.txt"
background_label_id: 0
postprocessing: "darknet"
sample: "object_detection"
# Caffe implementation of SSD model from https://github.com/PINTO0309/MobileNet-SSD-RealSense
ssd_caffe:
load_info:
url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel"
sha1: "994d30a8afaa9e754d17d2373b2d62a7dfbaaf7a"
model: "MobileNetSSD_deploy.caffemodel"
config_load_info:
url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.prototxt"
sha1: "25c8404cecdef638c2bd9ac7f3b46a8b96897deb"
config: "MobileNetSSD_deploy.prototxt"
mean: [127.5, 127.5, 127.5]
scale: 0.007843
width: 300
height: 300
rgb: false
labels: "object_detection_classes_pascal_voc.txt"
postprocessing: "ssd"
sample: "object_detection"
# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection
ssd_tf:
load_info:
url: "http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz"
sha1: "9e4bcdd98f4c6572747679e4ce570de4f03a70e2"
download_sha: "6157ddb6da55db2da89dd561eceb7f944928e317"
download_name: "ssd_mobilenet_v1_coco_2017_11_17.tar.gz"
member: "ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb"
model: "ssd_mobilenet_v1_coco_2017_11_17.pb"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt"
sha1: "c7cf985ce0a4a8953daaa4b8cacdd3c8e31437a6"
config: "ssd_mobilenet_v1_coco_2017_11_17.pbtxt"
mean: [0, 0, 0]
scale: 1.0
width: 300
height: 300
rgb: true
labels: "object_detection_classes_coco.txt"
postprocessing: "ssd"
sample: "object_detection"
# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
faster_rcnn_tf:
load_info:
url: "http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"
sha1: "f2e4bf386b9bb3e25ddfcbbd382c20f417e444f3"
download_sha: "c710f25e5c6a3ce85fe793d5bf266d581ab1c230"
download_name: "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"
member: "faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
model: "faster_rcnn_inception_v2_coco_2018_01_28.pb"
config_load_info:
url: "https://github.com/opencv/opencv_extra/raw/refs/heads/4.x/testdata/dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt"
sha1: "059ee437fb4d6f82a6f1d2b3c7a8dd54c107687f"
config: "faster_rcnn_inception_v2_coco_2018_01_28.pbtxt"
mean: [0, 0, 0]
scale: 1.0
width: 800
height: 600
rgb: true
postprocessing: "ssd"
sample: "object_detection"
################################################################################
# Image classification models.
################################################################################
squeezenet:
load_info:
url: "https://github.com/onnx/models/raw/main/validated/vision/classification/squeezenet/model/squeezenet1.1-7.onnx?download="
sha1: "ec31942d17715941bb9b81f3a91dc59def9236be"
model: "squeezenet1.1-7.onnx"
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
scale: 0.003921
width: 224
height: 224
rgb: true
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
googlenet:
load_info:
url: "https://github.com/onnx/models/raw/69c5d3751dda5349fd3fc53f525395d180420c07/vision/classification/inception_and_googlenet/googlenet/model/googlenet-8.onnx"
sha1: "da39a3ee5e6b4b0d3255bfef95601890afd80709"
model: "googlenet-8.onnx"
mean: [103.939, 116.779, 123.675]
std: [1, 1, 1]
scale: 1.0
width: 224
height: 224
rgb: false
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
resnet:
load_info:
url: "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx"
sha1: "c3a67b3cb2f0a61a7eb75eb8bd9139c89557cbe0"
model: "resnet50-v2-7.onnx"
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
scale: 1.0
width: 224
height: 224
rgb: true
labels: "classification_classes_ILSVRC2012.txt"
sample: "classification"
################################################################################
# Semantic segmentation models.
################################################################################
fcnresnet50:
load_info:
url: "https://github.com/onnx/models/raw/491ce05590abb7551d7fae43c067c060eeb575a6/validated/vision/object_detection_segmentation/fcn/model/fcn-resnet50-12.onnx"
sha1: "1bb0c7e0034038969aecc6251166f1612a139230"
model: "fcn-resnet50-12.onnx"
mean: [103.5, 116.2, 123.6]
scale: 0.019
width: 500
height: 500
rgb: false
sample: "segmentation"
fcnresnet101:
load_info:
url: "https://github.com/onnx/models/raw/fb8271d5d5d9b90dbb1eb5e8e40f8f580fb248b3/vision/object_detection_segmentation/fcn/model/fcn-resnet101-11.onnx"
sha1: "e7e76474bf6b73334ab32c4be1374c9e605f5aed"
model: "fcn-resnet101-11.onnx"
mean: [103.5, 116.2, 123.6]
scale: 0.019
width: 500
height: 500
rgb: false
sample: "segmentation"
u2netp:
load_info:
url: "https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2netp.onnx"
sha1: "0a99236f0d5c1916a99a8c401b23e5ef32038606"
model: "u2netp.onnx"
mean: [123.6, 116.2, 103.5]
scale: 0.019
width: 320
height: 320
rgb: true
sample: "segmentation"
################################################################################
# Text detection models.
################################################################################
DB:
load_info:
url: "https://drive.google.com/uc?export=dowload&id=17_ABp79PlFt9yPCxSaarVc_DKTmrSGGf"
sha1: "bef233c28947ef6ec8c663d20a2b326302421fa3"
model: "DB_IC15_resnet50.onnx"
ocr_load_info:
ocr_url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
ocr_sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
ocr_model: "VGG_CTC.onnx"
mean: [122.67891434, 116.66876762, 104.00698793]
scale: 0.00392
width: 736
height: 736
rgb: false
sample: "text_detection"
East:
load_info:
url: "https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1"
sha1: "fffabf5ac36f37bddf68e34e84b45f5c4247ed06"
download_name: "frozen_east_text_detection.tar.gz"
download_sha: "3ca8233d6edd748f7ed23246c8ca24cbf696bb94"
model: "frozen_east_text_detection.pb"
ocr_load_info:
ocr_url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
ocr_sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
ocr_model: "VGG_CTC.onnx"
mean: [123.68, 116.78, 103.94]
scale: 1.0
width: 736
height: 736
rgb: false
sample: "text_detection"
OCR:
load_info:
url: "https://drive.google.com/uc?export=dowload&id=159VavnbvfBQkLIPSAu2SP5Yij1Fy4azw"
sha1: "c4ab1fb3f13c1c8ffc04f016e72ec85311de4ebe"
model: "VGG_CTC.onnx"
sample: "text_recognition"
# Edge Detection models.
################################################################################
dexined:
load_info:
url: "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/edge_detection_dexined/edge_detection_dexined_2024sep.onnx?download="
sha1: "f86f2d32c3cf892771f76b5e6b629b16a66510e9"
model: "edge_detection_dexined_2024sep.onnx"
mean: [103.5, 116.2, 123.6]
scale: 1.0
width: 512
height: 512
rgb: false
sample: "edge_detection"
################################################################################
# Edge Detection models.
################################################################################
reid:
load_info:
url: "https://github.com/opencv/opencv_zoo/raw/main/models/person_reid_youtureid/person_reid_youtu_2021nov.onnx?download="
sha1: "d4316b100db40f8840aa82626e1cf3f519a7f1ae"
model: "person_reid_youtu_2021nov.onnx"
yolo_load_info:
yolo_url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
yolo_sha1: "68f864475d06e2ec4037181052739f268eeac38d"
yolo_model: "yolov8n.onnx"
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
scale: 0.00392
yolo_scale: 0.00392
yolo_width: 640
yolo_height: 640
width: 128
height: 256
rgb: false
yolo_rgb: true
sample: "person_reid"
################################################################################
# Tracker models.
################################################################################
vit:
load_info:
url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx"
sha1: "50008bb4f6a27b1aa940ad886b1bd1936ac4ed3e"
model: "object_tracking_vittrack_2023sep.onnx"
sample: "object_tracker"
nanotrack:
nanotrack_head_load_info:
nanotrack_head_url: "https://github.com/HonglinChu/SiamTrackers/raw/refs/heads/master/NanoTrack/models/nanotrackv2/nanotrack_head_sim.onnx"
nanotrack_head_sha1: "39f168489671700cf739e402dfc67d41ce648aef"
nanotrack_head_model: "nanotrack_head_sim.onnx"
nanotrack_back_load_info:
nanotrack_back_url: "https://github.com/HonglinChu/SiamTrackers/raw/refs/heads/master/NanoTrack/models/nanotrackv2/nanotrack_backbone_sim.onnx"
nanotrack_back_sha1: "6e773a364457b78574f9f63a23b0659ee8646f8f"
nanotrack_back_model: "nanotrack_backbone_sim.onnx"
sample: "object_tracker"
dasiamrpn:
dasiamrpn_load_info:
dasiamrpn_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx?download="
dasiamrpn_sha1: "91b774fce7df4c0e4918469f0f482d9a27d0e2d4"
dasiamrpn_model: "object_tracking_dasiamrpn_model_2021nov.onnx"
dasiamrpn_kernel_r1_load_info:
dasiamrpn_kernel_r1_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx?download="
dasiamrpn_kernel_r1_sha1: "bb64620a54348657133eb28be2d3a2a8c76b84b3"
dasiamrpn_kernel_r1_model: "object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"
dasiamrpn_kernel_cls_load_info:
dasiamrpn_kernel_cls_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx?download="
dasiamrpn_kernel_cls_sha1: "e9ccd270ce8059bdf7ed0d1845c03ef4a951ee0f"
dasiamrpn_kernel_cls_model: "object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"
sample: "object_tracker"
################################################################################
# Inpainting models.
################################################################################
lama:
load_info:
url: "https://github.com/gursimarsingh/opencv_zoo/raw/0417e12d24bba41613ae0380bd698cca73a4fb17/models/inpainting_lama/inpainting_lama_2025jan.onnx?download="
sha1: "7c6cdb9362bf73de2a80cfcaf17e121e3302f24c"
model: "inpainting_lama_2025jan.onnx"
mean: [0, 0, 0]
scale: 0.00392
width: 512
height: 512
rgb: false
sample: "inpainting"
ldm_inpainting:
encoder_load_info:
encoder_url: "https://dl.opencv.org/models/ldm_inpainting/InpaintEncoder.onnx"
encoder_sha1: "eb663262304473d81d6ae627d7117892dac56b5e"
encoder_model: "InpaintEncoder.onnx"
decoder_load_info:
decoder_url: "https://dl.opencv.org/models/ldm_inpainting/InpaintDecoder.onnx"
decoder_sha1: "af258c100e3a3b0970493b6375c8775beaffc9d1"
decoder_model: "InpaintDecoder.onnx"
diffusor_load_info:
diffusor_url: "https://dl.opencv.org/models/ldm_inpainting/LatentDiffusion.onnx"
diffusor_sha1: "2c6f8a505d9a93195510c854d8f023fab27ce70e"
diffusor_model: "LatentDiffusion.onnx"
mean: [0, 0, 0]
scale: 0.00392
width: 512
height: 512
rgb: true
sample: "ldm_inpainting"