opencv/modules/dnn/CMakeLists.txt
HAN Liutong eba158fb0c
Merge pull request #25230 from hanliutong/rvv-conv
Optimize int8 layers in DNN modules by using RISC-V Vector intrinsic. #25230

This patch optimize 3 functions in the int8 layer by using RVV Native Intrinsic.

This patch was tested on QEMU using VLEN=128 and VLEN=256 on `./bin/opencv_test_dnn --gtest_filter="*Int8*"`;
On the real device (k230, VLEN=128), `EfficientDet_int8` in `opencv_perf_dnn` showed a performance improvement of 1.46x.

| Name of Test                               |  Original | optimized | Speed-up |
| ------------------------------------------ | -------- | ---------- | -------- |
| EfficientDet_int8::DNNTestNetwork::OCV/CPU | 2843.467 | 1947.013   | 1.46     |


### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2024-03-31 16:47:06 +03:00

326 lines
13 KiB
CMake

if(WINRT)
ocv_module_disable(dnn)
endif()
set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass")
ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV LASX)
ocv_add_dispatched_file_force_all("int8layers/layers_common" AVX2 AVX512_SKX RVV LASX)
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_block" AVX AVX2 NEON NEON_FP16)
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_depthwise" AVX AVX2 RVV LASX)
ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_winograd_f63" AVX AVX2 NEON_FP16)
ocv_add_dispatched_file_force_all("layers/cpu_kernels/fast_gemm_kernels" AVX AVX2 NEON LASX)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/plugin.cmake)
ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
ocv_target_compile_definitions(${the_module} PRIVATE "CV_OCL4DNN=1")
endif()
if(WITH_WEBNN AND HAVE_WEBNN)
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_WEBNN=1")
endif()
if(HAVE_TIMVX)
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TIMVX=1")
endif()
if(HAVE_CANN)
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_CANN=1")
endif()
ocv_option(OPENCV_DNN_CUDA "Build with CUDA support"
HAVE_CUDA
AND HAVE_CUBLAS
AND HAVE_CUDNN
)
if(OPENCV_DNN_CUDA)
if(HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
ocv_target_compile_definitions(${the_module} PRIVATE "CV_CUDA4DNN=1")
else()
if(NOT HAVE_CUDA)
message(SEND_ERROR "DNN: CUDA backend requires CUDA Toolkit. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
elseif(NOT HAVE_CUBLAS)
message(SEND_ERROR "DNN: CUDA backend requires cuBLAS. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
elseif(NOT HAVE_CUDNN)
message(SEND_ERROR "DNN: CUDA backend requires cuDNN. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
endif()
endif()
endif()
ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
if(MSVC)
add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
/wd4305 /wd4127 /wd4100 /wd4512 /wd4125 /wd4389 /wd4510 /wd4610
/wd4702 /wd4456 /wd4457 /wd4065 /wd4310 /wd4661 /wd4506
)
if(MSVC_VERSION LESS 1920) # MSVS 2015/2017, .pb.cc generated files
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4309) # 'static_cast': truncation of constant value
endif()
if(MSVC_VERSION LESS 1920) # <MSVS2019, .pb.cc generated files
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4189) # local variable is initialized but not referenced
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4592) # symbol will be dynamically initialized (implementation limitation)
endif()
else()
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-deprecated -Wmissing-prototypes -Wmissing-declarations -Wshadow
-Wunused-parameter -Wsign-compare
)
endif()
if(HAVE_CUDA)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
endif()
if(NOT HAVE_CXX11)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-undef) # LANG_CXX11 from protobuf files
endif()
if(APPLE_FRAMEWORK)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshorten-64-to-32)
endif()
if(ANDROID)
add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
endif()
if(NOT BUILD_PROTOBUF)
ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_DNN_EXTERNAL_PROTOBUF=1")
endif()
#suppress warnings in autogenerated caffe.pb.* files
ocv_warnings_disable(CMAKE_CXX_FLAGS
/wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702
/wd4456 /wd4510 /wd4610 /wd4800
/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
/wd4505 # unreferenced local function has been removed
/wd4458 # declaration of 'x' hides class member. GCC still works, MSVC bug is here: https://developercommunity.visualstudio.com/content/problem/219311/c-c4458-declaration-hides-class-member-warning-iss.html
-wd858 -wd2196
-Winvalid-offsetof # Apple Clang (attr_value.pb.cc)
)
set(include_dirs "")
set(libs "")
if(HAVE_PROTOBUF)
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_PROTOBUF=1")
if(PROTOBUF_UPDATE_FILES)
file(GLOB proto_files "${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto" "${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto" "${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto")
set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow
protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})
else()
file(GLOB fw_srcs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.cc")
file(GLOB fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.h" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.h" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.h")
set(fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/caffe" "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx")
endif()
endif()
ocv_option(OPENCV_DNN_TFLITE "Build with TFLite support" (TARGET ocv.3rdparty.flatbuffers))
if(TARGET ocv.3rdparty.flatbuffers AND OPENCV_DNN_TFLITE)
if(NOT HAVE_FLATBUFFERS)
message(FATAL_ERROR "DNN: TFLite is not supported without enabled 'flatbuffers'. Check build configuration.")
endif()
list(APPEND libs ocv.3rdparty.flatbuffers)
list(APPEND fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tflite/schema_generated.h")
list(APPEND fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/tflite")
# Schema is generated by this command:
#add_custom_command(
# OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h"
# COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs")
endif()
list(APPEND include_dirs ${fw_inc})
list(APPEND libs ${Protobuf_LIBRARIES})
if(NOT BUILD_PROTOBUF)
list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS})
endif()
set(sources_options "")
list(APPEND libs ${LAPACK_LIBRARIES})
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
list(APPEND include_dirs ${OPENCL_INCLUDE_DIRS})
else()
set(sources_options EXCLUDE_OPENCL)
endif()
if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
set(CC_LIST ${CUDA_ARCH_BIN})
separate_arguments(CC_LIST)
foreach(cc ${CC_LIST})
if(cc VERSION_LESS 3.0)
message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
endif()
endforeach()
unset(CC_LIST)
if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
list(APPEND libs ${CUDNN_LIBRARIES} CUDA::cublas${CUDA_LIB_EXT})
if(NOT CUDA_VERSION VERSION_LESS 10.1)
list(APPEND libs CUDA::cublasLt${CUDA_LIB_EXT})
endif()
endif()
else()
set(sources_options ${sources_options} EXCLUDE_CUDA)
endif()
if(HAVE_TIMVX)
list(APPEND include_dirs ${TIMVX_INCLUDE_DIR})
list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive)
endif()
if(HAVE_CANN)
list(APPEND include_dirs ${CANN_INCLUDE_DIRS})
list(APPEND libs -Wl,--whole-archive ${CANN_LIBRARIES} -Wl,--no-whole-archive)
endif()
set(webnn_srcs "")
if(NOT EMSCRIPTEN)
if(HAVE_WEBNN)
list(APPEND include_dirs ${WEBNN_HEADER_DIRS})
list(APPEND include_dirs ${WEBNN_INCLUDE_DIRS})
list(APPEND libs -Wl,--whole-archive ${WEBNN_LIBRARIES} -Wl,--no-whole-archive)
list(APPEND webnn_srcs $ENV{WEBNN_NATIVE_DIR}/gen/src/webnn/webnn_cpp.cpp)
endif()
endif()
ocv_module_include_directories(${include_dirs})
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-array-bounds") # GCC 9.3.0 (Ubuntu 20.04)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
endif()
set(dnn_runtime_libs "")
file(GLOB_RECURSE dnn_srcs
"${CMAKE_CURRENT_LIST_DIR}/src/*.cpp"
)
file(GLOB_RECURSE dnn_int_hdrs
"${CMAKE_CURRENT_LIST_DIR}/src/*.hpp"
"${CMAKE_CURRENT_LIST_DIR}/src/*.h"
)
set(dnn_plugin_srcs ${dnn_srcs} ${dnn_int_hdrs})
ocv_list_filterout_ex(dnn_plugin_srcs
"/src/dnn.cpp$|/src/dnn_utils.cpp$|/src/dnn_utils.cpp$|/src/dnn_read.cpp$|/src/registry.cpp$|/src/backend.cpp$"
# importers
"/src/(caffe|darknet|onnx|tensorflow|torch)/"
# executors
"/src/(cuda|cuda4dnn|ocl4dnn|vkcom|webnn)/"
)
ocv_option(OPENCV_DNN_OPENVINO "Build with OpenVINO support (2021.4+)" (TARGET ocv.3rdparty.openvino))
if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO)
if(NOT HAVE_OPENVINO AND NOT HAVE_NGRAPH)
message(FATAL_ERROR "DNN: Inference Engine is not supported without enabled 'nGraph'. Check build configuration.")
endif()
if("openvino" IN_LIST DNN_PLUGIN_LIST OR DNN_PLUGIN_LIST STREQUAL "all")
# plugin doesn't support PCH, separate directory scope is necessary
# opencv_world requires absolute path
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/misc/plugin/openvino" "${CMAKE_CURRENT_BINARY_DIR}/dnn_plugin_openvino")
elseif(NOT OPENCV_DNN_BUILTIN_BACKEND)
list(APPEND dnn_runtime_libs ocv.3rdparty.openvino)
endif()
endif()
set(OPENCV_DNN_BACKEND_DEFAULT "" CACHE STRING "Default backend used by the DNN module (DNN_BACKEND_OPENCV if empty)")
if(OPENCV_DNN_BACKEND_DEFAULT)
ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}")
endif()
ocv_install_used_external_targets(${libs} ${dnn_runtime_libs})
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs} ${webnn_srcs})
ocv_create_module(${libs} ${dnn_runtime_libs})
ocv_add_samples()
ocv_add_accuracy_tests(${dnn_runtime_libs})
if(NOT BUILD_PROTOBUF)
if(TARGET opencv_test_dnn)
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "OPENCV_DNN_EXTERNAL_PROTOBUF=1")
endif()
endif()
set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf")
file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp")
file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h")
ocv_add_perf_tests(${dnn_runtime_libs}
FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp"
FILES Src ${perf_srcs}
FILES Include ${perf_hdrs}
)
ocv_option(OPENCV_DNN_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
ocv_option(OPENCV_DNN_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
if(BUILD_PERF_TESTS)
if (OPENCV_DNN_PERF_CAFFE
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
)
find_package(Caffe QUIET)
if (Caffe_FOUND)
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "HAVE_CAFFE=1")
ocv_target_link_libraries(opencv_perf_dnn caffe)
endif()
elseif(OPENCV_DNN_PERF_CLCAFFE
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
)
find_package(Caffe QUIET)
if (Caffe_FOUND)
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "HAVE_CLCAFFE=1")
ocv_target_link_libraries(opencv_perf_dnn caffe)
endif()
endif()
endif()
if(DNN_ENABLE_PLUGINS)
ocv_target_compile_definitions(${the_module} PRIVATE ENABLE_PLUGINS)
if(TARGET opencv_test_dnn)
ocv_target_compile_definitions(opencv_test_dnn PRIVATE ENABLE_PLUGINS)
endif()
if(OPENCV_DEBUG_POSTFIX)
ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/backend.cpp" "DEBUG_POSTFIX=${OPENCV_DEBUG_POSTFIX}")
endif()
endif()
ocv_option(OPENCV_TEST_DNN_OPENVINO "Build test with OpenVINO code" (TARGET ocv.3rdparty.openvino))
if(TARGET ocv.3rdparty.openvino AND OPENCV_TEST_DNN_OPENVINO)
if(TARGET opencv_test_dnn)
ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.openvino)
endif()
endif()
ocv_option(OPENCV_TEST_DNN_CANN "Build test with CANN" (TARGET ocv.3rdparty.cann))
if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN)
if(TARGET opencv_test_dnn)
ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.cann)
endif()
endif()
ocv_option(OPENCV_TEST_DNN_TIMVX "Build test with TIM-VX" (HAVE_TIMVX))
if(OPENCV_TEST_DNN_TIMVX)
if(TARGET opencv_test_dnn)
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "HAVE_TIMVX=1")
endif()
endif()
ocv_option(OPENCV_TEST_DNN_TFLITE "Build test with TFLite" (OPENCV_DNN_TFLITE))
if(OPENCV_TEST_DNN_TFLITE)
if(TARGET opencv_test_dnn)
ocv_target_compile_definitions(opencv_test_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1")
endif()
if(TARGET opencv_perf_dnn)
ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1")
endif()
endif()