mirror of
https://github.com/opencv/opencv.git
synced 2024-12-13 07:59:27 +08:00
448375d1e7
Replaced caffe model with onnx for colorization sample #25433 #25006 Improved sample for colorization with onnx model in cpp and python. Added a demo image in data folder for testing ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
88 lines
4.2 KiB
Python
88 lines
4.2 KiB
Python
# Script is based on https://github.com/richzhang/colorization/blob/master/colorization/colorize.py
|
|
# To download the onnx model, see: https://storage.googleapis.com/ailia-models/colorization/colorizer.onnx
|
|
# python colorization.py --onnx_model_path colorizer.onnx --input ansel_adams3.jpg
|
|
import numpy as np
|
|
import argparse
|
|
import cv2 as cv
|
|
import numpy as np
|
|
|
|
def parse_args():
|
|
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE,
|
|
cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA)
|
|
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD,
|
|
cv.dnn.DNN_TARGET_HDDL, cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16)
|
|
|
|
parser = argparse.ArgumentParser(description='iColor: deep interactive colorization')
|
|
parser.add_argument('--input', default='baboon.jpg',help='Path to image.')
|
|
parser.add_argument('--onnx_model_path', help='Path to onnx model', required=True)
|
|
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
|
help="Choose one of computation backends: "
|
|
"%d: automatically (by default), "
|
|
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
|
"%d: OpenCV implementation, "
|
|
"%d: VKCOM, "
|
|
"%d: CUDA" % backends)
|
|
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
|
|
help='Choose one of target computation devices: '
|
|
'%d: CPU target (by default), '
|
|
'%d: OpenCL, '
|
|
'%d: OpenCL fp16 (half-float precision), '
|
|
'%d: NCS2 VPU, '
|
|
'%d: HDDL VPU, '
|
|
'%d: Vulkan, '
|
|
'%d: CUDA, '
|
|
'%d: CUDA fp16 (half-float preprocess)'% targets)
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
if __name__ == '__main__':
|
|
args = parse_args()
|
|
img_gray=cv.imread(cv.samples.findFile(args.input),cv.IMREAD_GRAYSCALE)
|
|
|
|
img_gray_rs = cv.resize(img_gray, (256, 256), interpolation=cv.INTER_CUBIC)
|
|
img_gray_rs = img_gray_rs.astype(np.float32) # Convert to float to avoid data overflow
|
|
img_gray_rs *= (100.0 / 255.0) # Scale L channel to 0-100 range
|
|
|
|
onnx_model_path = args.onnx_model_path # Update this path to your ONNX model's path
|
|
session = cv.dnn.readNetFromONNX(onnx_model_path)
|
|
session.setPreferableBackend(args.backend)
|
|
session.setPreferableTarget(args.target)
|
|
|
|
# Process each image in the batch (assuming batch processing is needed)
|
|
blob = cv.dnn.blobFromImage(img_gray_rs, swapRB=False) # Adjust swapRB according to your model's training
|
|
session.setInput(blob)
|
|
result_numpy = np.array(session.forward()[0])
|
|
|
|
if result_numpy.shape[0] == 2:
|
|
# Transpose result_numpy to shape (H, W, 2)
|
|
ab = result_numpy.transpose((1, 2, 0))
|
|
else:
|
|
# If it's already (H, W, 2), assign it directly
|
|
ab = result_numpy
|
|
|
|
|
|
# Resize ab to match img_gray's dimensions if they are not the same
|
|
h, w = img_gray.shape
|
|
if ab.shape[:2] != (h, w):
|
|
ab_resized = cv.resize(ab, (w, h), interpolation=cv.INTER_LINEAR)
|
|
else:
|
|
ab_resized = ab
|
|
|
|
# Expand dimensions of L to match ab's dimensions
|
|
img_l_expanded = np.expand_dims(img_gray, axis=-1)
|
|
|
|
# Concatenate L with AB to get the LAB image
|
|
lab_image = np.concatenate((img_l_expanded, ab_resized), axis=-1)
|
|
|
|
# Convert the Lab image to a 32-bit float format
|
|
lab_image = lab_image.astype(np.float32)
|
|
|
|
# Normalize L channel to the range [0, 100] and AB channels to the range [-127, 127]
|
|
lab_image[:, :, 0] *= (100.0 / 255.0) # Rescale L channel
|
|
#lab_image[:, :, 1:] -= 128 # Shift AB channels
|
|
|
|
# Convert the LAB image to BGR
|
|
image_bgr_out = cv.cvtColor(lab_image, cv.COLOR_Lab2BGR)
|
|
cv.imshow("input image",img_gray)
|
|
cv.imshow("output image",image_bgr_out)
|
|
cv.waitKey(0) |