mirror of
https://github.com/opencv/opencv.git
synced 2025-01-12 15:49:32 +08:00
eba158fb0c
Optimize int8 layers in DNN modules by using RISC-V Vector intrinsic. #25230 This patch optimize 3 functions in the int8 layer by using RVV Native Intrinsic. This patch was tested on QEMU using VLEN=128 and VLEN=256 on `./bin/opencv_test_dnn --gtest_filter="*Int8*"`; On the real device (k230, VLEN=128), `EfficientDet_int8` in `opencv_perf_dnn` showed a performance improvement of 1.46x. | Name of Test | Original | optimized | Speed-up | | ------------------------------------------ | -------- | ---------- | -------- | | EfficientDet_int8::DNNTestNetwork::OCV/CPU | 2843.467 | 1947.013 | 1.46 | ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
326 lines
13 KiB
CMake
326 lines
13 KiB
CMake
if(WINRT)
|
|
ocv_module_disable(dnn)
|
|
endif()
|
|
|
|
set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass")
|
|
|
|
ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV LASX)
|
|
ocv_add_dispatched_file_force_all("int8layers/layers_common" AVX2 AVX512_SKX RVV LASX)
|
|
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_block" AVX AVX2 NEON NEON_FP16)
|
|
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_depthwise" AVX AVX2 RVV LASX)
|
|
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_winograd_f63" AVX AVX2 NEON_FP16)
|
|
ocv_add_dispatched_file_force_all("layers/cpu_kernels/fast_gemm_kernels" AVX AVX2 NEON LASX)
|
|
|
|
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js)
|
|
|
|
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/plugin.cmake)
|
|
|
|
|
|
ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
|
|
|
|
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "CV_OCL4DNN=1")
|
|
endif()
|
|
|
|
if(WITH_WEBNN AND HAVE_WEBNN)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_WEBNN=1")
|
|
endif()
|
|
|
|
if(HAVE_TIMVX)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TIMVX=1")
|
|
endif()
|
|
|
|
if(HAVE_CANN)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_CANN=1")
|
|
endif()
|
|
|
|
ocv_option(OPENCV_DNN_CUDA "Build with CUDA support"
|
|
HAVE_CUDA
|
|
AND HAVE_CUBLAS
|
|
AND HAVE_CUDNN
|
|
)
|
|
|
|
if(OPENCV_DNN_CUDA)
|
|
if(HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "CV_CUDA4DNN=1")
|
|
else()
|
|
if(NOT HAVE_CUDA)
|
|
message(SEND_ERROR "DNN: CUDA backend requires CUDA Toolkit. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
|
elseif(NOT HAVE_CUBLAS)
|
|
message(SEND_ERROR "DNN: CUDA backend requires cuBLAS. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
|
elseif(NOT HAVE_CUDNN)
|
|
message(SEND_ERROR "DNN: CUDA backend requires cuDNN. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
|
|
ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
|
|
|
|
|
|
if(MSVC)
|
|
add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
|
|
/wd4305 /wd4127 /wd4100 /wd4512 /wd4125 /wd4389 /wd4510 /wd4610
|
|
/wd4702 /wd4456 /wd4457 /wd4065 /wd4310 /wd4661 /wd4506
|
|
)
|
|
if(MSVC_VERSION LESS 1920) # MSVS 2015/2017, .pb.cc generated files
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4309) # 'static_cast': truncation of constant value
|
|
endif()
|
|
if(MSVC_VERSION LESS 1920) # <MSVS2019, .pb.cc generated files
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4189) # local variable is initialized but not referenced
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4592) # symbol will be dynamically initialized (implementation limitation)
|
|
endif()
|
|
else()
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-deprecated -Wmissing-prototypes -Wmissing-declarations -Wshadow
|
|
-Wunused-parameter -Wsign-compare
|
|
)
|
|
endif()
|
|
if(HAVE_CUDA)
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
|
|
endif()
|
|
if(NOT HAVE_CXX11)
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-undef) # LANG_CXX11 from protobuf files
|
|
endif()
|
|
|
|
if(APPLE_FRAMEWORK)
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshorten-64-to-32)
|
|
endif()
|
|
|
|
if(ANDROID)
|
|
add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
|
|
endif()
|
|
|
|
if(NOT BUILD_PROTOBUF)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_DNN_EXTERNAL_PROTOBUF=1")
|
|
endif()
|
|
|
|
#suppress warnings in autogenerated caffe.pb.* files
|
|
ocv_warnings_disable(CMAKE_CXX_FLAGS
|
|
/wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702
|
|
/wd4456 /wd4510 /wd4610 /wd4800
|
|
/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
|
|
/wd4505 # unreferenced local function has been removed
|
|
/wd4458 # declaration of 'x' hides class member. GCC still works, MSVC bug is here: https://developercommunity.visualstudio.com/content/problem/219311/c-c4458-declaration-hides-class-member-warning-iss.html
|
|
-wd858 -wd2196
|
|
-Winvalid-offsetof # Apple Clang (attr_value.pb.cc)
|
|
)
|
|
|
|
set(include_dirs "")
|
|
set(libs "")
|
|
|
|
if(HAVE_PROTOBUF)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_PROTOBUF=1")
|
|
|
|
if(PROTOBUF_UPDATE_FILES)
|
|
file(GLOB proto_files "${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto" "${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto" "${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto")
|
|
set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow
|
|
protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})
|
|
else()
|
|
file(GLOB fw_srcs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.cc")
|
|
file(GLOB fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.h" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.h" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.h")
|
|
set(fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/caffe" "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx")
|
|
endif()
|
|
endif()
|
|
|
|
ocv_option(OPENCV_DNN_TFLITE "Build with TFLite support" (TARGET ocv.3rdparty.flatbuffers))
|
|
if(TARGET ocv.3rdparty.flatbuffers AND OPENCV_DNN_TFLITE)
|
|
if(NOT HAVE_FLATBUFFERS)
|
|
message(FATAL_ERROR "DNN: TFLite is not supported without enabled 'flatbuffers'. Check build configuration.")
|
|
endif()
|
|
list(APPEND libs ocv.3rdparty.flatbuffers)
|
|
list(APPEND fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tflite/schema_generated.h")
|
|
list(APPEND fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/tflite")
|
|
|
|
# Schema is generated by this command:
|
|
#add_custom_command(
|
|
# OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h"
|
|
# COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs")
|
|
endif()
|
|
|
|
list(APPEND include_dirs ${fw_inc})
|
|
list(APPEND libs ${Protobuf_LIBRARIES})
|
|
if(NOT BUILD_PROTOBUF)
|
|
list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS})
|
|
endif()
|
|
|
|
set(sources_options "")
|
|
|
|
list(APPEND libs ${LAPACK_LIBRARIES})
|
|
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
|
|
list(APPEND include_dirs ${OPENCL_INCLUDE_DIRS})
|
|
else()
|
|
set(sources_options EXCLUDE_OPENCL)
|
|
endif()
|
|
|
|
if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
|
|
list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
|
|
set(CC_LIST ${CUDA_ARCH_BIN})
|
|
separate_arguments(CC_LIST)
|
|
foreach(cc ${CC_LIST})
|
|
if(cc VERSION_LESS 3.0)
|
|
message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
|
|
endif()
|
|
endforeach()
|
|
unset(CC_LIST)
|
|
if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
|
|
list(APPEND libs ${CUDNN_LIBRARIES} CUDA::cublas${CUDA_LIB_EXT})
|
|
if(NOT CUDA_VERSION VERSION_LESS 10.1)
|
|
list(APPEND libs CUDA::cublasLt${CUDA_LIB_EXT})
|
|
endif()
|
|
endif()
|
|
else()
|
|
set(sources_options ${sources_options} EXCLUDE_CUDA)
|
|
endif()
|
|
|
|
if(HAVE_TIMVX)
|
|
list(APPEND include_dirs ${TIMVX_INCLUDE_DIR})
|
|
list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive)
|
|
endif()
|
|
|
|
if(HAVE_CANN)
|
|
list(APPEND include_dirs ${CANN_INCLUDE_DIRS})
|
|
list(APPEND libs -Wl,--whole-archive ${CANN_LIBRARIES} -Wl,--no-whole-archive)
|
|
endif()
|
|
|
|
set(webnn_srcs "")
|
|
if(NOT EMSCRIPTEN)
|
|
if(HAVE_WEBNN)
|
|
list(APPEND include_dirs ${WEBNN_HEADER_DIRS})
|
|
list(APPEND include_dirs ${WEBNN_INCLUDE_DIRS})
|
|
list(APPEND libs -Wl,--whole-archive ${WEBNN_LIBRARIES} -Wl,--no-whole-archive)
|
|
list(APPEND webnn_srcs $ENV{WEBNN_NATIVE_DIR}/gen/src/webnn/webnn_cpp.cpp)
|
|
endif()
|
|
endif()
|
|
|
|
ocv_module_include_directories(${include_dirs})
|
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
|
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
|
|
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-array-bounds") # GCC 9.3.0 (Ubuntu 20.04)
|
|
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
|
|
endif()
|
|
|
|
set(dnn_runtime_libs "")
|
|
|
|
file(GLOB_RECURSE dnn_srcs
|
|
"${CMAKE_CURRENT_LIST_DIR}/src/*.cpp"
|
|
)
|
|
file(GLOB_RECURSE dnn_int_hdrs
|
|
"${CMAKE_CURRENT_LIST_DIR}/src/*.hpp"
|
|
"${CMAKE_CURRENT_LIST_DIR}/src/*.h"
|
|
)
|
|
set(dnn_plugin_srcs ${dnn_srcs} ${dnn_int_hdrs})
|
|
ocv_list_filterout_ex(dnn_plugin_srcs
|
|
"/src/dnn.cpp$|/src/dnn_utils.cpp$|/src/dnn_utils.cpp$|/src/dnn_read.cpp$|/src/registry.cpp$|/src/backend.cpp$"
|
|
# importers
|
|
"/src/(caffe|darknet|onnx|tensorflow|torch)/"
|
|
# executors
|
|
"/src/(cuda|cuda4dnn|ocl4dnn|vkcom|webnn)/"
|
|
)
|
|
|
|
ocv_option(OPENCV_DNN_OPENVINO "Build with OpenVINO support (2021.4+)" (TARGET ocv.3rdparty.openvino))
|
|
if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO)
|
|
if(NOT HAVE_OPENVINO AND NOT HAVE_NGRAPH)
|
|
message(FATAL_ERROR "DNN: Inference Engine is not supported without enabled 'nGraph'. Check build configuration.")
|
|
endif()
|
|
if("openvino" IN_LIST DNN_PLUGIN_LIST OR DNN_PLUGIN_LIST STREQUAL "all")
|
|
# plugin doesn't support PCH, separate directory scope is necessary
|
|
# opencv_world requires absolute path
|
|
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/misc/plugin/openvino" "${CMAKE_CURRENT_BINARY_DIR}/dnn_plugin_openvino")
|
|
elseif(NOT OPENCV_DNN_BUILTIN_BACKEND)
|
|
list(APPEND dnn_runtime_libs ocv.3rdparty.openvino)
|
|
endif()
|
|
endif()
|
|
|
|
set(OPENCV_DNN_BACKEND_DEFAULT "" CACHE STRING "Default backend used by the DNN module (DNN_BACKEND_OPENCV if empty)")
|
|
if(OPENCV_DNN_BACKEND_DEFAULT)
|
|
ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}")
|
|
endif()
|
|
|
|
ocv_install_used_external_targets(${libs} ${dnn_runtime_libs})
|
|
|
|
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs} ${webnn_srcs})
|
|
ocv_create_module(${libs} ${dnn_runtime_libs})
|
|
ocv_add_samples()
|
|
ocv_add_accuracy_tests(${dnn_runtime_libs})
|
|
|
|
if(NOT BUILD_PROTOBUF)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "OPENCV_DNN_EXTERNAL_PROTOBUF=1")
|
|
endif()
|
|
endif()
|
|
|
|
set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf")
|
|
file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp")
|
|
file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h")
|
|
ocv_add_perf_tests(${dnn_runtime_libs}
|
|
FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp"
|
|
FILES Src ${perf_srcs}
|
|
FILES Include ${perf_hdrs}
|
|
)
|
|
|
|
ocv_option(OPENCV_DNN_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
|
|
ocv_option(OPENCV_DNN_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
|
|
if(BUILD_PERF_TESTS)
|
|
if (OPENCV_DNN_PERF_CAFFE
|
|
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
|
|
)
|
|
find_package(Caffe QUIET)
|
|
if (Caffe_FOUND)
|
|
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "HAVE_CAFFE=1")
|
|
ocv_target_link_libraries(opencv_perf_dnn caffe)
|
|
endif()
|
|
elseif(OPENCV_DNN_PERF_CLCAFFE
|
|
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
|
|
)
|
|
find_package(Caffe QUIET)
|
|
if (Caffe_FOUND)
|
|
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "HAVE_CLCAFFE=1")
|
|
ocv_target_link_libraries(opencv_perf_dnn caffe)
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
if(DNN_ENABLE_PLUGINS)
|
|
ocv_target_compile_definitions(${the_module} PRIVATE ENABLE_PLUGINS)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_compile_definitions(opencv_test_dnn PRIVATE ENABLE_PLUGINS)
|
|
endif()
|
|
if(OPENCV_DEBUG_POSTFIX)
|
|
ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/backend.cpp" "DEBUG_POSTFIX=${OPENCV_DEBUG_POSTFIX}")
|
|
endif()
|
|
endif()
|
|
|
|
ocv_option(OPENCV_TEST_DNN_OPENVINO "Build test with OpenVINO code" (TARGET ocv.3rdparty.openvino))
|
|
if(TARGET ocv.3rdparty.openvino AND OPENCV_TEST_DNN_OPENVINO)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.openvino)
|
|
endif()
|
|
endif()
|
|
|
|
ocv_option(OPENCV_TEST_DNN_CANN "Build test with CANN" (TARGET ocv.3rdparty.cann))
|
|
if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.cann)
|
|
endif()
|
|
endif()
|
|
|
|
ocv_option(OPENCV_TEST_DNN_TIMVX "Build test with TIM-VX" (HAVE_TIMVX))
|
|
if(OPENCV_TEST_DNN_TIMVX)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "HAVE_TIMVX=1")
|
|
endif()
|
|
endif()
|
|
|
|
ocv_option(OPENCV_TEST_DNN_TFLITE "Build test with TFLite" (OPENCV_DNN_TFLITE))
|
|
if(OPENCV_TEST_DNN_TFLITE)
|
|
if(TARGET opencv_test_dnn)
|
|
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1")
|
|
endif()
|
|
if(TARGET opencv_perf_dnn)
|
|
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1")
|
|
endif()
|
|
endif()
|