From 12338c1dc4a7ddea7ae9db8626e775eb2330557a Mon Sep 17 00:00:00 2001 From: HAN Liutong Date: Wed, 16 Feb 2022 16:01:38 +0800 Subject: [PATCH 01/84] Update clang toolchain for RVV. --- platforms/linux/riscv64-clang.toolchain.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platforms/linux/riscv64-clang.toolchain.cmake b/platforms/linux/riscv64-clang.toolchain.cmake index f19c244f7b..62d9e293d2 100644 --- a/platforms/linux/riscv64-clang.toolchain.cmake +++ b/platforms/linux/riscv64-clang.toolchain.cmake @@ -17,8 +17,8 @@ set(CMAKE_ASM_COMPILER_TARGET ${CLANG_TARGET_TRIPLE}) # Don't run the linker on compiler check set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -set(CMAKE_C_FLAGS "-march=rv64gcv0p10 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-march=rv64gcv0p10 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") +set(CMAKE_C_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") From f5105bac653423900f255c5f2666478c2fdc27b7 Mon Sep 17 00:00:00 2001 From: Zhuo Zhang Date: Mon, 21 Feb 2022 20:39:41 +0800 Subject: [PATCH 02/84] remove const in seamless_cloding APIs for better semantics --- modules/photo/src/seamless_cloning.hpp | 6 +++--- modules/photo/src/seamless_cloning_impl.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/photo/src/seamless_cloning.hpp b/modules/photo/src/seamless_cloning.hpp index 92b24e7b09..4d43970d2d 100644 --- a/modules/photo/src/seamless_cloning.hpp +++ b/modules/photo/src/seamless_cloning.hpp @@ -53,7 +53,7 @@ namespace cv class Cloning { public: - void normalClone(const cv::Mat& destination, const cv::Mat &mask, const cv::Mat &wmask, cv::Mat &cloned, int flag); + void normalClone(const cv::Mat& destination, const cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, int flag); void illuminationChange(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, float alpha, float beta); void localColorChange(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, float red_mul, float green_mul, float blue_mul); void textureFlatten(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, float low_threshold, float high_threhold, int kernel_size, cv::Mat &cloned); @@ -61,10 +61,10 @@ namespace cv protected: void initVariables(const cv::Mat &destination, const cv::Mat &binaryMask); - void computeDerivatives(const cv::Mat &destination, const cv::Mat &patch, const cv::Mat &binaryMask); + void computeDerivatives(const cv::Mat &destination, const cv::Mat &patch, cv::Mat &binaryMask); void scalarProduct(cv::Mat mat, float r, float g, float b); void poisson(const cv::Mat &destination); - void evaluate(const cv::Mat &I, const cv::Mat &wmask, const cv::Mat &cloned); + void evaluate(const cv::Mat &I, cv::Mat &wmask, const cv::Mat &cloned); void dst(const Mat& src, Mat& dest, bool invert = false); void solve(const Mat &img, Mat& mod_diff, Mat &result); diff --git a/modules/photo/src/seamless_cloning_impl.cpp b/modules/photo/src/seamless_cloning_impl.cpp index 8fd4bc7865..4b3258a1d9 100644 --- a/modules/photo/src/seamless_cloning_impl.cpp +++ b/modules/photo/src/seamless_cloning_impl.cpp @@ -246,7 +246,7 @@ void Cloning::initVariables(const Mat &destination, const Mat &binaryMask) filter_Y[j] = 2.0f * (float)std::cos(scale * (j + 1)); } -void Cloning::computeDerivatives(const Mat& destination, const Mat &patch, const Mat &binaryMask) +void Cloning::computeDerivatives(const Mat& destination, const Mat &patch, Mat &binaryMask) { initVariables(destination, binaryMask); @@ -306,7 +306,7 @@ void Cloning::poisson(const Mat &destination) } } -void Cloning::evaluate(const Mat &I, const Mat &wmask, const Mat &cloned) +void Cloning::evaluate(const Mat &I, Mat &wmask, const Mat &cloned) { bitwise_not(wmask,wmask); @@ -320,7 +320,7 @@ void Cloning::evaluate(const Mat &I, const Mat &wmask, const Mat &cloned) merge(output,cloned); } -void Cloning::normalClone(const Mat &destination, const Mat &patch, const Mat &binaryMask, Mat &cloned, int flag) +void Cloning::normalClone(const Mat &destination, const Mat &patch, Mat &binaryMask, Mat &cloned, int flag) { const int w = destination.cols; const int h = destination.rows; From 8f1c502d2b94c283badd8d11277f17d9cdecddd8 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Thu, 24 Feb 2022 13:35:52 +0300 Subject: [PATCH 03/84] Merge pull request #21618 from sivanov-work:vpp_preproc_core G-API: Add VPL/VPP preproc core module * Add BaseMediAdapter for VPL * Add PreprocSession & PreprocEngine interface part * Implement preproc UT, Fix base path * Add common PP interface, add common pp_params * Rough decoupling VPL & Preproc * Add syntax sugar for PP interface * Integrate VPP preproc in GIEbackend * Add PP bypass * Add perf tests for PP * Fix warning in vpl core UT * Add inner preproc resolution Unit Test * Remove VPP preproc description from single ROI sample * Apply SetROIBlob for diferent Infer operations * Eliminate extra branch-lines for cfg_param_parser & transcode_engine * Fix UT warning &PreprocSession compile * Fix compilation & warnings * Reduce Session&Engine code amount * Apply some comments * Revert IE changes, rename preproc * Fix for DX11 infer for OV: turn off texture array * Remove dependency PP on IE * Change fixture tests params * Apply other comments & turn off ROI for GPU * Fix compilation: remove forgotten INFER define * Apply debt comments * Fix PP UTs: add FrameInfo value comparator * Fix style * Remove standalone map for preproc frames storage * Add other comments --- modules/gapi/CMakeLists.txt | 5 + .../gapi_streaming_source_perf_tests.cpp | 211 +++++++- .../gapi/samples/onevpl_infer_single_roi.cpp | 30 +- modules/gapi/src/backends/ie/giebackend.cpp | 24 +- .../onevpl/accelerators/accel_policy_cpu.cpp | 5 +- .../onevpl/accelerators/accel_policy_cpu.hpp | 2 +- .../onevpl/accelerators/accel_policy_dx11.cpp | 38 +- .../onevpl/accelerators/accel_policy_dx11.hpp | 2 +- .../accelerators/accel_policy_interface.hpp | 11 +- .../surface/base_frame_adapter.cpp | 70 +++ .../surface/base_frame_adapter.hpp | 43 ++ .../surface/cpu_frame_adapter.cpp | 42 +- .../surface/cpu_frame_adapter.hpp | 16 +- .../surface/dx11_frame_adapter.cpp | 107 ++-- .../surface/dx11_frame_adapter.hpp | 14 +- .../streaming/onevpl/cfg_params_parser.cpp | 77 +-- .../engine/decode/decode_engine_legacy.cpp | 32 +- .../engine/decode/decode_engine_legacy.hpp | 1 - .../onevpl/engine/decode/decode_session.cpp | 29 +- .../onevpl/engine/decode/decode_session.hpp | 16 +- .../onevpl/engine/engine_session.cpp | 31 +- .../onevpl/engine/engine_session.hpp | 12 +- .../onevpl/engine/preproc/preproc_engine.cpp | 459 ++++++++++++++++ .../onevpl/engine/preproc/preproc_engine.hpp | 67 +++ .../onevpl/engine/preproc/preproc_session.cpp | 67 +++ .../onevpl/engine/preproc/preproc_session.hpp | 61 +++ .../streaming/onevpl/engine/preproc/utils.cpp | 86 +++ .../streaming/onevpl/engine/preproc/utils.hpp | 32 ++ .../engine/preproc/vpp_preproc_defines.hpp | 29 + .../onevpl/engine/preproc_defines.hpp | 89 ++++ .../engine/preproc_engine_interface.hpp | 35 ++ .../transcode/transcode_engine_legacy.cpp | 67 +-- .../transcode/transcode_engine_legacy.hpp | 2 - .../engine/transcode/transcode_session.cpp | 17 +- .../engine/transcode/transcode_session.hpp | 5 +- .../src/streaming/onevpl/onevpl_export.hpp | 3 + .../gapi/src/streaming/onevpl/source_priv.cpp | 11 +- .../gapi/src/streaming/onevpl/source_priv.hpp | 3 +- modules/gapi/src/streaming/onevpl/utils.cpp | 38 ++ modules/gapi/src/streaming/onevpl/utils.hpp | 4 + .../gapi_streaming_vpl_core_test.cpp | 262 ++++++++- .../gapi_streaming_vpl_data_provider.cpp | 8 +- .../gapi_streaming_vpp_preproc_test.cpp | 495 ++++++++++++++++++ 43 files changed, 2260 insertions(+), 398 deletions(-) create mode 100644 modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp create mode 100644 modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp create mode 100644 modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index dd135d1efc..af50af3f8c 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -186,6 +186,7 @@ set(gapi_srcs src/streaming/onevpl/cfg_params_parser.cpp src/streaming/onevpl/utils.cpp src/streaming/onevpl/data_provider_interface_exception.cpp + src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/surface.cpp @@ -200,6 +201,8 @@ set(gapi_srcs src/streaming/onevpl/engine/decode/decode_session.cpp src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp src/streaming/onevpl/engine/transcode/transcode_session.cpp + src/streaming/onevpl/engine/preproc/preproc_engine.cpp + src/streaming/onevpl/engine/preproc/preproc_session.cpp src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp src/streaming/onevpl/data_provider_dispatcher.cpp @@ -360,7 +363,9 @@ endif() # perf test dependencies postprocessing if(HAVE_GAPI_ONEVPL) # NB: TARGET opencv_perf_gapi doesn't exist before `ocv_add_perf_tests` + # src/ is specified to include dirs for INTERNAL tests only. if(TARGET opencv_perf_gapi) + target_include_directories(opencv_perf_gapi PRIVATE "${CMAKE_CURRENT_LIST_DIR}/src") ocv_target_compile_definitions(opencv_perf_gapi PRIVATE -DHAVE_ONEVPL) ocv_target_link_libraries(opencv_perf_gapi PRIVATE ${VPL_IMPORTED_TARGETS}) if(HAVE_D3D11 AND HAVE_OPENCL) diff --git a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp index 7d06ad068b..513d4d1f56 100644 --- a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp +++ b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp @@ -11,6 +11,13 @@ #include #include +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" + namespace opencv_test { using namespace perf; @@ -32,10 +39,10 @@ using codec_t = std::string; using accel_mode_t = std::string; using source_description_t = std::tuple; -class OneVPLSourcePerfTest : public TestPerfParams {}; -class VideoCapSourcePerfTest : public TestPerfParams {}; +class OneVPLSourcePerf_Test : public TestPerfParams {}; +class VideoCapSourcePerf_Test : public TestPerfParams {}; -PERF_TEST_P_(OneVPLSourcePerfTest, TestPerformance) +PERF_TEST_P_(OneVPLSourcePerf_Test, TestPerformance) { using namespace cv::gapi::wip::onevpl; @@ -67,7 +74,7 @@ PERF_TEST_P_(OneVPLSourcePerfTest, TestPerformance) SANITY_CHECK_NOTHING(); } -PERF_TEST_P_(VideoCapSourcePerfTest, TestPerformance) +PERF_TEST_P_(VideoCapSourcePerf_Test, TestPerformance) { using namespace cv::gapi::wip; @@ -82,7 +89,7 @@ PERF_TEST_P_(VideoCapSourcePerfTest, TestPerformance) SANITY_CHECK_NOTHING(); } -INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerfTest, +INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerf_Test, Values(source_description_t(files[0], codec[0], ""), source_description_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11"), source_description_t(files[1], codec[1], ""), @@ -90,10 +97,202 @@ INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerfTest, source_description_t(files[2], codec[2], ""), source_description_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11"))); -INSTANTIATE_TEST_CASE_P(Streaming, VideoCapSourcePerfTest, +INSTANTIATE_TEST_CASE_P(Streaming, VideoCapSourcePerf_Test, Values(files[0], files[1], files[2])); + +using pp_out_param_t = cv::GFrameDesc; +using source_description_preproc_t = decltype(std::tuple_cat(std::declval(), + std::declval>())); +class OneVPLSourcePerf_PP_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Test, TestPerformance) +{ + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + pp_out_param_t res = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + cfg_params.push_back(CfgParam::create_vpp_out_width(static_cast(res.size.width))); + cfg_params.push_back(CfgParam::create_vpp_out_height(static_cast(res.size.height))); + cfg_params.push_back(CfgParam::create_vpp_out_crop_x(0)); + cfg_params.push_back(CfgParam::create_vpp_out_crop_y(0)); + cfg_params.push_back(CfgParam::create_vpp_out_crop_w(static_cast(res.size.width))); + cfg_params.push_back(CfgParam::create_vpp_out_crop_h(static_cast(res.size.height))); + + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params); + + cv::gapi::wip::Data out; + TEST_CYCLE() + { + source_ptr->pull(out); + } + + SANITY_CHECK_NOTHING(); +} +static pp_out_param_t full_hd = pp_out_param_t {cv::MediaFormat::NV12, + {1920, 1080}}; + +static pp_out_param_t cif = pp_out_param_t {cv::MediaFormat::NV12, + {352, 288}}; + +INSTANTIATE_TEST_CASE_P(Streaming_Source_PP, OneVPLSourcePerf_PP_Test, + Values(source_description_preproc_t(files[0], codec[0], "", full_hd), + source_description_preproc_t(files[0], codec[0], "", cif), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", cif), + source_description_preproc_t(files[1], codec[1], "", full_hd), + source_description_preproc_t(files[1], codec[1], "", cif), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",full_hd), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",cif), + source_description_preproc_t(files[2], codec[2], "", full_hd), + source_description_preproc_t(files[2], codec[2], "", cif), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", cif))); + +class OneVPLSourcePerf_PP_Engine_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Test, TestPerformance) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + const pp_out_param_t &required_frame_param = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + auto device_selector = std::make_shared(cfg_params); + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params, device_selector); + + // create VPP preproc engine + std::unique_ptr policy; + if (mode == "MFX_ACCEL_MODE_VIA_D3D11") { + policy.reset(new VPLDX11AccelerationPolicy(device_selector)); + } else if (mode.empty()){ + policy.reset(new VPLCPUAccelerationPolicy(device_selector)); + } else { + ASSERT_TRUE(false && "Unsupported acceleration policy type"); + } + VPPPreprocEngine preproc_engine(std::move(policy)); + cv::gapi::wip::Data out; + TEST_CYCLE() + { + source_ptr->pull(out); + cv::MediaFrame frame = cv::util::get(out); + cv::util::optional param = preproc_engine.is_applicable(frame); + pp_session sess = preproc_engine.initialize_preproc(param.value(), + required_frame_param); + (void)preproc_engine.run_sync(sess, frame); + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(Streaming_Engine_PP, OneVPLSourcePerf_PP_Engine_Test, + Values(source_description_preproc_t(files[0], codec[0], "", full_hd), + source_description_preproc_t(files[0], codec[0], "", cif), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", cif), + source_description_preproc_t(files[1], codec[1], "", full_hd), + source_description_preproc_t(files[1], codec[1], "", cif), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",full_hd), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",cif), + source_description_preproc_t(files[2], codec[2], "", full_hd), + source_description_preproc_t(files[2], codec[2], "", cif), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", cif))); + +class OneVPLSourcePerf_PP_Engine_Bypass_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Bypass_Test, TestPerformance) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + const pp_out_param_t &required_frame_param = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + auto device_selector = std::make_shared(cfg_params); + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params, device_selector); + + // create VPP preproc engine + std::unique_ptr policy; + if (mode == "MFX_ACCEL_MODE_VIA_D3D11") { + policy.reset(new VPLDX11AccelerationPolicy(device_selector)); + } else { + policy.reset(new VPLCPUAccelerationPolicy(device_selector)); + } + VPPPreprocEngine preproc_engine(std::move(policy)); + cv::gapi::wip::Data out; + TEST_CYCLE() + { + source_ptr->pull(out); + cv::MediaFrame frame = cv::util::get(out); + cv::util::optional param = preproc_engine.is_applicable(frame); + pp_session sess = preproc_engine.initialize_preproc(param.value(), + required_frame_param); + (void)preproc_engine.run_sync(sess, frame); + } + + SANITY_CHECK_NOTHING(); +} + +static pp_out_param_t res_672x384 = pp_out_param_t {cv::MediaFormat::NV12, + {672, 384}}; +static pp_out_param_t res_336x256 = pp_out_param_t {cv::MediaFormat::NV12, + {336, 256}}; +INSTANTIATE_TEST_CASE_P(Streaming_Engine_PP_Bypass, OneVPLSourcePerf_PP_Engine_Bypass_Test, + Values(source_description_preproc_t(files[0], codec[0], "", res_672x384), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", res_672x384), + source_description_preproc_t(files[1], codec[1], "", res_672x384), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11", res_672x384), + source_description_preproc_t(files[2], codec[2], "", res_336x256), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", res_336x256))); } // namespace opencv_test #endif // HAVE_ONEVPL diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index 80327e2d59..6935cbb709 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -46,8 +46,7 @@ const std::string keys = "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" - "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" - "{ source_preproc_enable | 0 | Turn on OneVPL source frame preprocessing using network input description instead of IE plugin preprocessing}"; + "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}"; namespace { bool is_gpu(const std::string &device_name) { @@ -217,7 +216,6 @@ int main(int argc, char *argv[]) { const auto streaming_queue_capacity = cmd.get("streaming_queue_capacity"); const auto source_decode_queue_capacity = cmd.get("frames_pool_size"); const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); - const auto vpl_source_preproc_enable = cmd.get("source_preproc_enable"); const auto device_id = cmd.get("faced"); // check ouput file extension @@ -235,12 +233,6 @@ int main(int argc, char *argv[]) { try { std::string line; while (std::getline(params_list, line, ';')) { - if (vpl_source_preproc_enable == 0) { - if (line.find("vpp.") != std::string::npos) { - // skip VPP preprocessing primitives if not requested - continue; - } - } source_cfgs.push_back(cfg::create_from_string(line)); } } catch (const std::exception& ex) { @@ -325,23 +317,11 @@ int main(int argc, char *argv[]) { // set ctx_config for GPU device only - no need in case of CPU device type if (is_gpu(device_id)) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, - {"VA_DEVICE", accel_device_ptr} }); - + {"VA_DEVICE", accel_device_ptr} }); face_net.cfgContextParams(ctx_config); - face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); - std::cout <<"/*******************************************************/\n" - "ATTENTION: GPU Inference Engine preprocessing is not vital as expected!" - " Please consider param \"source_preproc_enable=1\" and specify " - " appropriated media frame transformation using oneVPL::VPP primitives" - " which force onevpl::GSource to produce tranformed media frames." - " For exploring list of supported transformations please find out " - " vpp_* related stuff in" - " gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp" - " Pay attention that to obtain expected result In this case VPP " - " transformation must match network input params." - " Please vote/create issue about exporting network params using GAPI\n" - "/******************************************************/" << std::endl; + // NB: consider NV12 surface because it's one of native GPU image format + face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); } #endif // HAVE_INF_ENGINE @@ -378,7 +358,7 @@ int main(int argc, char *argv[]) { cv::GFrame in; auto size = cv::gapi::streaming::size(in); auto roi = custom::LocateROI::on(size, std::cref(device_id)); - auto blob = cv::gapi::infer(roi, in); + auto blob = cv::gapi::infer(in); cv::GArray rcs = cv::gapi::parseSSD(blob, size, 0.5f, true, true); auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi)); auto out = cv::gapi::streaming::BGR(out_frame); diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index b155ff0aea..711827d574 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -619,12 +619,14 @@ static void setBlob(InferenceEngine::InferRequest& req, static void setROIBlob(InferenceEngine::InferRequest& req, const std::string& layer_name, const IE::Blob::Ptr& blob, - const cv::Rect &roi, + const cv::Rect &roi, const IECallContext& ctx) { - if (ctx.uu.params.device_id.find("GPU") != std::string::npos) { - GAPI_LOG_DEBUG(nullptr, "Skip ROI blob creation for device_id: " << - ctx.uu.params.device_id << ", layer: " << layer_name); - setBlob(req, layer_name, blob, ctx); + if (ctx.uu.params.device_id.find("GPU") != std::string::npos && + ctx.uu.rctx) { + GAPI_LOG_WARNING(nullptr, "ROI blob creation for device_id: " << + ctx.uu.params.device_id << ", layer: " << layer_name << + "is not supported yet"); + GAPI_Assert(false && "Unsupported ROI blob creation for GPU remote context"); } else { setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); } @@ -1330,8 +1332,7 @@ struct InferList: public cv::detail::KernelTag { reqPool.execute( cv::gimpl::ie::RequestPool::Task { [ctx, rc, this_blob](InferenceEngine::InferRequest &req) { - IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); - setBlob(req, ctx->uu.params.input_names[0u], roi_blob, *ctx); + setROIBlob(req, ctx->uu.params.input_names[0u], this_blob, rc, *ctx); req.StartAsync(); }, std::bind(callback, std::placeholders::_1, pos) @@ -1488,19 +1489,20 @@ struct InferList2: public cv::detail::KernelTag { for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { const auto &this_vec = ctx->inArg(in_idx+1u); GAPI_Assert(this_vec.size() == list_size); - IE::Blob::Ptr this_blob; if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { const auto &vec = this_vec.rref(); - this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx])); + setROIBlob(req, ctx->uu.params.input_names[in_idx], + blob_0, vec[list_idx], *ctx); } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { const auto &vec = this_vec.rref(); const auto &mat = vec[list_idx]; - this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR); + setBlob(req, ctx->uu.params.input_names[in_idx], + wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR), + *ctx); } else { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - setBlob(req, ctx->uu.params.input_names[in_idx], this_blob, *ctx); } req.StartAsync(); }, diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp index ad0e5bf667..0a5f8f4a35 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp @@ -273,7 +273,7 @@ size_t VPLCPUAccelerationPolicy::get_surface_count(pool_key_t key) const { } cv::MediaFrame::AdapterPtr VPLCPUAccelerationPolicy::create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) { + const FrameConstructorArgs ¶ms) { auto pool_it = pool_table.find(key); if (pool_it == pool_table.end()) { std::stringstream ss; @@ -284,7 +284,8 @@ cv::MediaFrame::AdapterPtr VPLCPUAccelerationPolicy::create_frame_adapter(pool_k } pool_t& requested_pool = pool_it->second; - return cv::MediaFrame::AdapterPtr{new VPLMediaFrameCPUAdapter(requested_pool.find_by_handle(surface))}; + return cv::MediaFrame::AdapterPtr{new VPLMediaFrameCPUAdapter(requested_pool.find_by_handle(params.assoc_surface), + params.assoc_handle)}; } } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp index 8a2061dce0..57c14ad792 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp @@ -38,7 +38,7 @@ struct GAPI_EXPORTS VPLCPUAccelerationPolicy final : public VPLAccelerationPolic size_t get_surface_count(pool_key_t key) const override; cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) override; + const FrameConstructorArgs& args) override; private: std::map pool_table; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp index 02720f3774..932d7d7842 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp @@ -157,12 +157,21 @@ size_t VPLDX11AccelerationPolicy::get_free_surface_count(pool_key_t) const { GAPI_Assert(false && "get_free_surface_count() is not implemented"); } -size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t) const { - GAPI_Assert(false && "VPLDX11AccelerationPolicy::get_surface_count() is not implemented"); +size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t key) const { + auto pool_it = pool_table.find(key); + if (pool_it == pool_table.end()) { + std::stringstream ss; + ss << "key is not found: " << key << ", table size: " << pool_table.size(); + const std::string& str = ss.str(); + GAPI_LOG_WARNING(nullptr, str); + throw std::runtime_error(std::string(__FUNCTION__) + " - " + str); + } + return pool_it->second.total_size(); } -cv::MediaFrame::AdapterPtr VPLDX11AccelerationPolicy::create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) { +cv::MediaFrame::AdapterPtr +VPLDX11AccelerationPolicy::create_frame_adapter(pool_key_t key, + const FrameConstructorArgs ¶ms) { auto pool_it = pool_table.find(key); if (pool_it == pool_table.end()) { std::stringstream ss; @@ -173,7 +182,8 @@ cv::MediaFrame::AdapterPtr VPLDX11AccelerationPolicy::create_frame_adapter(pool_ } pool_t& requested_pool = pool_it->second; - return cv::MediaFrame::AdapterPtr{new VPLMediaFrameDX11Adapter(requested_pool.find_by_handle(surface))}; + return cv::MediaFrame::AdapterPtr{new VPLMediaFrameDX11Adapter(requested_pool.find_by_handle(params.assoc_surface), + params.assoc_handle)}; } mfxStatus VPLDX11AccelerationPolicy::alloc_cb(mfxHDL pthis, mfxFrameAllocRequest *request, @@ -283,12 +293,28 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques desc.BindFlags = 0; } + /* NB: + * On the one hand current OpenVINO API doesn't support texture array and + * D3D11 API doesn't allow to address specific texture element in array. + * On the other hand using textures array should be more performant case + * in applications (according to community experience) + * So, to be compliant with OV let's turn off textures array feature, but keep + * this code in commented section to consider such "optimization" in future + */ +#if 0 size_t main_textures_count = 1; if (D3D11_BIND_RENDER_TARGET & desc.BindFlags) { GAPI_LOG_DEBUG(nullptr, "Use array of testures instead of texture array"); desc.ArraySize = 1; main_textures_count = request->NumFrameSuggested; } +#else + // enforcement to use array of textures + size_t main_textures_count = request->NumFrameSuggested; + + // enforcement to do not use texture array as subresources as part of a single texture + desc.ArraySize = 1; +#endif // create GPU textures HRESULT err = S_OK; @@ -407,6 +433,8 @@ mfxStatus VPLDX11AccelerationPolicy::on_free(mfxFrameAllocResponse *response) { } allocation_table.erase(table_it); + GAPI_LOG_DEBUG(nullptr, "Allocation by requested id: " << response->AllocId << + " has been erased"); return MFX_ERR_NONE; } } // namespace onevpl diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp index 893698eb36..61513a45af 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp @@ -49,7 +49,7 @@ struct GAPI_EXPORTS VPLDX11AccelerationPolicy final: public VPLAccelerationPolic size_t get_surface_count(pool_key_t key) const override; cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) override; + const FrameConstructorArgs ¶ms) override; private: ID3D11Device *hw_handle; ID3D11DeviceContext* device_context; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp index b1d7c25bb1..a2a4845db2 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp @@ -16,13 +16,12 @@ #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" namespace cv { namespace gapi { namespace wip { namespace onevpl { - -class Surface; struct VPLAccelerationPolicy { using device_selector_ptr_t = std::shared_ptr; @@ -40,6 +39,11 @@ struct VPLAccelerationPolicy size_t out_buf_ptr_offset, size_t out_buf_ptr_size)>; + struct FrameConstructorArgs { + surface_t::handle_t *assoc_surface; + session_t assoc_handle; + }; + device_selector_ptr_t get_device_selector() { return device_selector; } @@ -61,8 +65,7 @@ struct VPLAccelerationPolicy virtual size_t get_surface_count(pool_key_t key) const = 0; virtual cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) = 0; -private: + const FrameConstructorArgs ¶ms) = 0; device_selector_ptr_t device_selector; }; } // namespace onevpl diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp new file mode 100644 index 0000000000..82859e474c --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp @@ -0,0 +1,70 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "logger.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +BaseFrameAdapter::BaseFrameAdapter(std::shared_ptr surface, SessionHandle assoc_handle): + parent_surface_ptr(surface), parent_handle(assoc_handle) { + GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); + GAPI_Assert(parent_handle && "mfxSession is nullptr"); + + const Surface::info_t& info = parent_surface_ptr->get_info(); + GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << + ", w: " << info.Width << ", h: " << info.Height << + ", p: " << parent_surface_ptr->get_data().Pitch << + ", frame id: " << reinterpret_cast(this)); + switch(info.FourCC) { + case MFX_FOURCC_I420: + throw std::runtime_error("MediaFrame doesn't support I420 type"); + break; + case MFX_FOURCC_NV12: + frame_desc.fmt = MediaFormat::NV12; + break; + default: + throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); + } + + frame_desc.size = cv::Size{info.Width, info.Height}; + parent_surface_ptr->obtain_lock(); +} + +BaseFrameAdapter::~BaseFrameAdapter() { + // Each BaseFrameAdapter releases mfx surface counter + // The last BaseFrameAdapter releases shared Surface pointer + // The last surface pointer releases workspace memory + GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast(this)); + parent_surface_ptr->release_lock(); +} + +const std::shared_ptr& BaseFrameAdapter::get_surface() const { + return parent_surface_ptr; +} + +std::shared_ptr BaseFrameAdapter::surface() { + return parent_surface_ptr; +} + +const BaseFrameAdapter::SessionHandle BaseFrameAdapter::get_session_handle() const { + return parent_handle; +} + +cv::GFrameDesc BaseFrameAdapter::meta() const { + return frame_desc; +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp new file mode 100644 index 0000000000..3d8d951535 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp @@ -0,0 +1,43 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP +#define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP +#include + +#include +#include "streaming/onevpl/accelerators/surface/surface.hpp" + +#ifdef HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +class BaseFrameAdapter : public cv::MediaFrame::IAdapter { +public: + using SessionHandle = mfxSession; + + const std::shared_ptr& get_surface() const; + const SessionHandle get_session_handle() const; + + cv::GFrameDesc meta() const override; +protected: + BaseFrameAdapter(std::shared_ptr assoc_surface, SessionHandle assoc_handle); + ~BaseFrameAdapter(); + std::shared_ptr surface(); + + std::shared_ptr parent_surface_ptr; + SessionHandle parent_handle; + GFrameDesc frame_desc; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp index 39094c9bc3..58be29f628 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp @@ -16,46 +16,16 @@ namespace gapi { namespace wip { namespace onevpl { -VPLMediaFrameCPUAdapter::VPLMediaFrameCPUAdapter(std::shared_ptr surface): - parent_surface_ptr(surface) { - - GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); - GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << - ", w: " << parent_surface_ptr->get_info().Width << - ", h: " << parent_surface_ptr->get_info().Height << - ", p: " << parent_surface_ptr->get_data().Pitch); - const Surface::info_t& info = parent_surface_ptr->get_info(); - switch(info.FourCC) - { - case MFX_FOURCC_I420: - throw std::runtime_error("MediaFrame doesn't support I420 type"); - break; - case MFX_FOURCC_NV12: - frame_desc.fmt = MediaFormat::NV12; - break; - default: - throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); - } - - frame_desc.size = cv::Size{info.Width, info.Height}; - parent_surface_ptr->obtain_lock(); +VPLMediaFrameCPUAdapter::VPLMediaFrameCPUAdapter(std::shared_ptr surface, + SessionHandle assoc_handle): + BaseFrameAdapter(surface, assoc_handle) { } -VPLMediaFrameCPUAdapter::~VPLMediaFrameCPUAdapter() { - - // Each VPLMediaFrameCPUAdapter releases mfx surface counter - // The last VPLMediaFrameCPUAdapter releases shared Surface pointer - // The last surface pointer releases workspace memory - parent_surface_ptr->release_lock(); -} - -cv::GFrameDesc VPLMediaFrameCPUAdapter::meta() const { - return frame_desc; -} +VPLMediaFrameCPUAdapter::~VPLMediaFrameCPUAdapter() = default; MediaFrame::View VPLMediaFrameCPUAdapter::access(MediaFrame::Access) { - const Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + const Surface::data_t& data = get_surface()->get_data(); + const Surface::info_t& info = get_surface()->get_info(); using stride_t = typename cv::MediaFrame::View::Strides::value_type; stride_t pitch = static_cast(data.Pitch); diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp index 1c51ad7473..849c3a2775 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp @@ -6,10 +6,8 @@ #ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_CPU_FRAME_ADAPTER_HPP #define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_CPU_FRAME_ADAPTER_HPP -#include -#include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" #ifdef HAVE_ONEVPL @@ -18,22 +16,20 @@ namespace gapi { namespace wip { namespace onevpl { -class Surface; -class VPLMediaFrameCPUAdapter : public cv::MediaFrame::IAdapter { +class VPLMediaFrameCPUAdapter : public BaseFrameAdapter { public: // GAPI_EXPORTS for tests - GAPI_EXPORTS explicit VPLMediaFrameCPUAdapter(std::shared_ptr assoc_surface); + GAPI_EXPORTS explicit VPLMediaFrameCPUAdapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle); GAPI_EXPORTS ~VPLMediaFrameCPUAdapter(); - cv::GFrameDesc meta() const override; + MediaFrame::View access(MediaFrame::Access) override; // The default implementation does nothing cv::util::any blobParams() const override; void serialize(cv::gapi::s11n::IOStream&) override; void deserialize(cv::gapi::s11n::IIStream&) override; -private: - std::shared_ptr parent_surface_ptr; - GFrameDesc frame_desc; + }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp index 6afa2cf0b6..db23a3c69f 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp @@ -40,117 +40,71 @@ void unlock_mid(mfxMemId mid, mfxFrameData &data, MediaFrame::Access mode) { } } -VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr surface): - parent_surface_ptr(surface) { - GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); - - const Surface::info_t& info = parent_surface_ptr->get_info(); - Surface::data_t& data = parent_surface_ptr->get_data(); - GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << - ", w: " << info.Width << ", h: " << info.Height << - ", p: " << data.Pitch << - ", frame id: " << reinterpret_cast(this)); - switch(info.FourCC) - { - case MFX_FOURCC_I420: - throw std::runtime_error("MediaFrame doesn't support I420 type"); - break; - case MFX_FOURCC_NV12: - frame_desc.fmt = MediaFormat::NV12; - break; - default: - throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); - } - frame_desc.size = cv::Size{info.Width, info.Height}; +VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle): + BaseFrameAdapter(assoc_surface, assoc_handle) { + Surface::data_t& data = assoc_surface->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); alloc_data->set_adaptee(this); - - parent_surface_ptr->obtain_lock(); } VPLMediaFrameDX11Adapter::~VPLMediaFrameDX11Adapter() { - // Each VPLMediaFrameDX11Adapter releases mfx surface counter - // The last VPLMediaFrameDX11Adapter releases shared Surface pointer - // The last surface pointer releases workspace memory - - GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast(this)); - - Surface::data_t& data = parent_surface_ptr->get_data(); + Surface::data_t& data = surface()->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); alloc_data->set_adaptee(nullptr); - - parent_surface_ptr->release_lock(); -} - -cv::GFrameDesc VPLMediaFrameDX11Adapter::meta() const { - return frame_desc; } MediaFrame::View VPLMediaFrameDX11Adapter::access(MediaFrame::Access mode) { - Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + // NB: make copy for some copyable object, because access release may be happened + // after source/pool destruction, so we need a copy + auto surface_ptr_copy = surface(); + Surface::data_t& data = surface_ptr_copy->get_data(); + const Surface::info_t& info = surface_ptr_copy->get_info(); void* frame_id = reinterpret_cast(this); - GAPI_LOG_DEBUG(nullptr, "START lock frame in surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_DEBUG(nullptr, "START lock frame in surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); // lock MT lock_mid(data.MemId, data, mode); - GAPI_LOG_DEBUG(nullptr, "FINISH lock frame in surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_DEBUG(nullptr, "FINISH lock frame in surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); using stride_t = typename cv::MediaFrame::View::Strides::value_type; stride_t pitch = static_cast(data.Pitch); - // NB: make copy for some copyable object, because access release may be happened - // after source/pool destruction, so we need a copy - auto parent_surface_ptr_copy = parent_surface_ptr; + auto release_guard = [surface_ptr_copy, frame_id, mode] () { + surface_ptr_copy->obtain_lock(); + + auto& data = surface_ptr_copy->get_data(); + GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + unlock_mid(data.MemId, data, mode); + + GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + surface_ptr_copy->release_lock(); + }; + switch(info.FourCC) { case MFX_FOURCC_I420: { GAPI_Assert(data.Y && data.U && data.V && "MFX_FOURCC_I420 frame data is nullptr"); cv::MediaFrame::View::Ptrs pp = { data.Y, data.U, data.V, nullptr }; cv::MediaFrame::View::Strides ss = { pitch, pitch / 2, pitch / 2, 0u }; - return cv::MediaFrame::View(std::move(pp), std::move(ss), - [parent_surface_ptr_copy, - frame_id, mode] () { - parent_surface_ptr_copy->obtain_lock(); - - auto& data = parent_surface_ptr_copy->get_data(); - GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - unlock_mid(data.MemId, data, mode); - - GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - - parent_surface_ptr_copy->release_lock(); - }); + return cv::MediaFrame::View(std::move(pp), std::move(ss), release_guard); } case MFX_FOURCC_NV12: { if (!data.Y || !data.UV) { - GAPI_LOG_WARNING(nullptr, "Empty data detected!!! for surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_WARNING(nullptr, "Empty data detected!!! for surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); } GAPI_Assert(data.Y && data.UV && "MFX_FOURCC_NV12 frame data is nullptr"); cv::MediaFrame::View::Ptrs pp = { data.Y, data.UV, nullptr, nullptr }; cv::MediaFrame::View::Strides ss = { pitch, pitch, 0u, 0u }; - return cv::MediaFrame::View(std::move(pp), std::move(ss), - [parent_surface_ptr_copy, - frame_id, mode] () { - parent_surface_ptr_copy->obtain_lock(); - - auto& data = parent_surface_ptr_copy->get_data(); - GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - unlock_mid(data.MemId, data, mode); - - GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - parent_surface_ptr_copy->release_lock(); - }); + return cv::MediaFrame::View(std::move(pp), std::move(ss), release_guard); } break; default: @@ -162,8 +116,9 @@ cv::util::any VPLMediaFrameDX11Adapter::blobParams() const { /*GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully integrated" "in OpenVINO InferenceEngine and would be temporary disable.");*/ #ifdef HAVE_INF_ENGINE - Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + auto surface_ptr_copy = get_surface(); + Surface::data_t& data = surface_ptr_copy->get_data(); + const Surface::info_t& info = surface_ptr_copy->get_info(); NativeHandleAdapter* native_handle_getter = reinterpret_cast(data.MemId); mfxHDLPair handle{}; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp index ca6602353b..39528ca6a5 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp @@ -8,9 +8,7 @@ #define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_DX11_FRAME_ADAPTER_HPP #include -#include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS - +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" #include "streaming/onevpl/accelerators/utils/shared_lock.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -30,15 +28,13 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -class Surface; -class VPLMediaFrameDX11Adapter final: public cv::MediaFrame::IAdapter, +class VPLMediaFrameDX11Adapter final: public BaseFrameAdapter, public SharedLock { public: // GAPI_EXPORTS for tests - GAPI_EXPORTS VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface); + GAPI_EXPORTS VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle); GAPI_EXPORTS ~VPLMediaFrameDX11Adapter(); - cv::GFrameDesc meta() const override; MediaFrame::View access(MediaFrame::Access) override; // The default implementation does nothing @@ -48,9 +44,7 @@ public: static DXGI_FORMAT get_dx11_color_format(uint32_t mfx_fourcc); private: - std::shared_ptr parent_surface_ptr; mfxFrameAllocator allocator; - GFrameDesc frame_desc; }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp index d748825b1c..afb92f4443 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp @@ -94,75 +94,22 @@ std::vector get_params_from_string(const std::string& str) { ret.push_back(creator.create(name, cstr_to_mfx_accel_mode(value.c_str()))); } else if (name == "mfxImplDescription.ApiVersion.Version") { ret.push_back(creator.create(name, cstr_to_mfx_version(value.c_str()))); - } else if (name == CfgParam::frames_pool_size_name()) { + } else if ((name == CfgParam::frames_pool_size_name()) || (name == CfgParam::vpp_frames_pool_size_name())) { ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); - } else if (name == CfgParam::vpp_frames_pool_size_name()) { - ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); - } else if (name == CfgParam::vpp_in_width_name()) { + } else if ((name == CfgParam::vpp_in_width_name()) || (name == CfgParam::vpp_in_height_name()) || + (name == CfgParam::vpp_in_crop_w_name()) || (name == CfgParam::vpp_in_crop_h_name()) || + (name == CfgParam::vpp_in_crop_x_name()) || (name == CfgParam::vpp_in_crop_y_name()) || + (name == CfgParam::vpp_out_chroma_format_name()) || + (name == CfgParam::vpp_out_width_name()) || (name == CfgParam::vpp_out_height_name()) || + (name == CfgParam::vpp_out_crop_w_name()) || (name == CfgParam::vpp_out_crop_h_name()) || + (name == CfgParam::vpp_out_crop_x_name()) || (name == CfgParam::vpp_out_crop_y_name()) || + (name == CfgParam::vpp_out_pic_struct_name())) { ret.push_back(creator.create(name, static_cast(strtoul_or_throw(value.c_str())), false)); - } else if (name == CfgParam::vpp_in_height_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_w_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_h_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_x_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_y_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_fourcc_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_chroma_format_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_width_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_height_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_w_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_h_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_x_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_y_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_pic_struct_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_framerate_n_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_framerate_d_name()) { + } else if ((name == CfgParam::vpp_out_fourcc_name()) || + (name == CfgParam::vpp_out_framerate_n_name()) || + (name == CfgParam::vpp_out_framerate_d_name())) { ret.push_back(creator.create(name, static_cast(strtoul_or_throw(value.c_str())), false)); diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp index d8af94f939..e6afbb92fd 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp @@ -83,11 +83,8 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptrDataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -98,12 +95,12 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptrget_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -282,22 +279,19 @@ VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session, sess_ptr->init_surface_pool(param.decode_pool_key); // prepare working decode surface - sess_ptr->swap_surface(*this); + sess_ptr->swap_decode_surface(*this); return sess_ptr; } -ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::execute_op(operation_t& op, EngineSession& sess) { - return op(sess); -} - void VPLLegacyDecodeEngine::on_frame_ready(LegacyDecodeSession& sess, mfxFrameSurface1* ready_surface) { GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready"); // manage memory ownership rely on acceleration policy + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; auto frame_adapter = acceleration_policy->create_frame_adapter(sess.decoder_pool_id, - ready_surface); + args); ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); // pop away synced out object @@ -313,7 +307,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt { // prepare sync object for new surface try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); @@ -334,7 +328,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt // This applies to external memory allocations and should not be expected for // a simple internal allocation case like this try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); @@ -358,9 +352,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt // The decoder detected a new sequence header in the bitstream. // Video parameters may have changed. // In external memory allocation case, might need to reallocate the output surface - /*GAPI_DbgAssert(false && "VPLLegacyDecodeEngine::process_error - " - "MFX_WRN_VIDEO_PARAM_CHANGED is not processed"); - */ + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_VIDEO_PARAM_CHANGED"); return ExecutionStatus::Continue; break; case MFX_ERR_INCOMPATIBLE_VIDEO_PARAM: @@ -380,7 +372,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt break; case MFX_WRN_IN_EXECUTION: try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp index 1b7bee6a82..5b4142b693 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp @@ -41,7 +41,6 @@ protected: const std::vector& cfg_params, std::shared_ptr provider); - ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; ExecutionStatus process_error(mfxStatus status, LegacyDecodeSession& sess); void on_frame_ready(LegacyDecodeSession& sess, diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp index 56e51ffd9f..4d522e6db6 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp @@ -22,10 +22,11 @@ namespace onevpl { LegacyDecodeSession::LegacyDecodeSession(mfxSession sess, DecoderParams&& decoder_param, std::shared_ptr provider) : - EngineSession(sess, std::move(decoder_param.stream)), + EngineSession(sess), mfx_decoder_param(std::move(decoder_param.param)), data_provider(std::move(provider)), - procesing_surface_ptr(), + stream(std::move(decoder_param.stream)), + processing_surface_ptr(), sync_queue(), decoded_frames_count() { @@ -37,25 +38,10 @@ LegacyDecodeSession::~LegacyDecodeSession() MFXVideoDECODE_Close(session); } -void LegacyDecodeSession::swap_surface(VPLLegacyDecodeEngine& engine) { +void LegacyDecodeSession::swap_decode_surface(VPLLegacyDecodeEngine& engine) { VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); - try { - auto cand = acceleration_policy->get_free_surface(decoder_pool_id).lock(); - - GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" - ", old: " << (!procesing_surface_ptr.expired() - ? procesing_surface_ptr.lock()->get_handle() - : nullptr) << - ", new: "<< cand->get_handle()); - - procesing_surface_ptr = cand; - } catch (const std::runtime_error& ex) { - GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); - - // Delegate exception processing on caller - throw; - } + request_free_surface(session, decoder_pool_id, *acceleration_policy, processing_surface_ptr); } void LegacyDecodeSession::init_surface_pool(VPLAccelerationPolicy::pool_key_t key) { @@ -77,6 +63,11 @@ Data::Meta LegacyDecodeSession::generate_frame_meta() { const mfxFrameInfo& LegacyDecodeSession::get_video_param() const { return mfx_decoder_param.mfx.FrameInfo; } + +IDataProvider::mfx_bitstream *LegacyDecodeSession::get_mfx_bitstream_ptr() { + return (data_provider || (stream && stream->DataLength)) ? + stream.get() : nullptr; +} } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp index 356f9851cd..e87ce6cb65 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp @@ -13,7 +13,6 @@ #include #include "streaming/onevpl/engine/engine_session.hpp" -#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -21,11 +20,7 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -struct IDataProvider; class Surface; -struct VPLAccelerationPolicy; - class GAPI_EXPORTS LegacyDecodeSession : public EngineSession { public: friend class VPLLegacyDecodeEngine; @@ -35,19 +30,22 @@ public: ~LegacyDecodeSession(); using EngineSession::EngineSession; - void swap_surface(VPLLegacyDecodeEngine& engine); + void swap_decode_surface(VPLLegacyDecodeEngine& engine); void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); Data::Meta generate_frame_meta(); virtual const mfxFrameInfo& get_video_param() const override; + + IDataProvider::mfx_bitstream *get_mfx_bitstream_ptr(); private: mfxVideoParam mfx_decoder_param; - std::shared_ptr data_provider; VPLAccelerationPolicy::pool_key_t decoder_pool_id; - mfxFrameAllocRequest request; + + std::shared_ptr data_provider; + std::shared_ptr stream; protected: - std::weak_ptr procesing_surface_ptr; + std::weak_ptr processing_surface_ptr; using op_handle_t = std::pair; std::queue sync_queue; diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp index 4915b51e34..9a2b812db3 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp @@ -14,8 +14,10 @@ namespace gapi { namespace wip { namespace onevpl { -EngineSession::EngineSession(mfxSession sess, std::shared_ptr&& str) : - session(sess), stream(std::move(str)) {} +EngineSession::EngineSession(mfxSession sess) : + session(sess) { +} + EngineSession::~EngineSession() { GAPI_LOG_INFO(nullptr, "Close session: " << session); @@ -26,6 +28,31 @@ std::string EngineSession::error_code_to_str() const { return mfxstatus_to_string(last_status); } + +void EngineSession::request_free_surface(mfxSession session, + VPLAccelerationPolicy::pool_key_t key, + VPLAccelerationPolicy &acceleration_policy, + std::weak_ptr &surface_to_exchange, + bool reset_if_not_found) { + try { + auto cand = acceleration_policy.get_free_surface(key).lock(); + + GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" + ", old: " << (!surface_to_exchange.expired() + ? surface_to_exchange.lock()->get_handle() + : nullptr) << + ", new: "<< cand->get_handle()); + + surface_to_exchange = cand; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); + if (reset_if_not_found) { + surface_to_exchange.reset(); + } + // Delegate exception processing on caller side + throw; + } +} } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp index 8a1f4383eb..d38dad3812 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp @@ -16,6 +16,8 @@ #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS #include +#include "streaming/onevpl/data_provider_defines.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -38,15 +40,19 @@ struct GAPI_EXPORTS TranscoderParams { struct GAPI_EXPORTS EngineSession { mfxSession session; - std::shared_ptr stream; - mfxSyncPoint sync; mfxStatus last_status; - EngineSession(mfxSession sess, std::shared_ptr&& str); + EngineSession(mfxSession sess); std::string error_code_to_str() const; virtual ~EngineSession(); virtual const mfxFrameInfo& get_video_param() const = 0; + + static void request_free_surface(mfxSession session, + VPLAccelerationPolicy::pool_key_t key, + VPLAccelerationPolicy &acceleration_policy, + std::weak_ptr &surface_to_exchange, + bool reset_if_not_found = false); }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp new file mode 100644 index 0000000000..1fb9bd4195 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -0,0 +1,459 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include + +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +bool FrameInfoComparator::operator()(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) const { + return lhs < rhs; +} + +bool FrameInfoComparator::equal_to(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) { + return lhs == rhs; +} + +VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& accel) : + ProcessingEngineBase(std::move(accel)) { + GAPI_LOG_INFO(nullptr, "Create VPP preprocessing engine"); + preprocessed_frames_count = 0; + create_pipeline( + // 0) preproc decoded surface with VPP params + [this] (EngineSession& sess) -> ExecutionStatus + { + session_type &my_sess = static_cast(sess); + while (!my_sess.sync_in_queue.empty()) { + do { + if (!my_sess.processing_surface_ptr.expired()) { + session_type::incoming_task pending_op = my_sess.sync_in_queue.front(); + GAPI_LOG_DEBUG(nullptr, "pending IN operations count: " << + my_sess.sync_in_queue.size() << + ", sync id: " << + pending_op.sync_handle << + ", surface: " << + pending_op.decoded_surface_ptr); + + my_sess.sync_in_queue.pop(); + auto *vpp_suface = my_sess.processing_surface_ptr.lock()->get_handle(); + + /* TODO: consider CROP/ROI here + static int x_offset = 0; + static int y_offset = 0; + dec_surface->Info.CropX = x_offset; + dec_surface->Info.CropY = y_offset; + dec_surface->Info.CropW = 100 + x_offset++; + dec_surface->Info.CropH = 100 + y_offset++; + */ + session_type::outgoing_task vpp_pending_op {pending_op.sync_handle, nullptr}; + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, + pending_op.decoded_surface_ptr, + vpp_suface, + nullptr, &vpp_pending_op.sync_handle); + vpp_pending_op.vpp_surface_ptr = vpp_suface; + + GAPI_LOG_DEBUG(nullptr, "Got VPP async operation" << + ", sync id: " << + vpp_pending_op.sync_handle << + ", dec surface: " << + pending_op.decoded_surface_ptr << + ", trans surface: " << + vpp_pending_op.vpp_surface_ptr << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + // NB: process status + if (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_ERR_NONE) { + vpp_pending_op.vpp_surface_ptr->Data.Locked++; // TODO -S- workaround + my_sess.vpp_out_queue.emplace(vpp_pending_op); + } + } + + try { + my_sess.swap_surface(*this); + } catch (const std::runtime_error& ex) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + // But print WARNING to notify user about pipeline stuck + GAPI_LOG_WARNING(nullptr, "[" << my_sess.session << + "] has no VPP surface, reason: " << + ex.what()); + my_sess.processing_surface_ptr.reset(); + break; + } + } while(my_sess.last_status == MFX_ERR_MORE_SURFACE); + + if (my_sess.processing_surface_ptr.expired()) { + // TODO break main loop + break; + } + } + return ExecutionStatus::Continue; + }, + // 1) Wait for ASYNC decode result + [this] (EngineSession& sess) -> ExecutionStatus + { + session_type& my_sess = static_cast(sess); + do { + if (!my_sess.vpp_out_queue.empty()) { // FIFO: check the oldest async operation complete + session_type::outgoing_task& pending_op = my_sess.vpp_out_queue.front(); + sess.last_status = MFXVideoCORE_SyncOperation(sess.session, pending_op.sync_handle, 0); + + GAPI_LOG_DEBUG(nullptr, "pending VPP operations count: " << + my_sess.vpp_out_queue.size() << + ", sync id: " << + pending_op.sync_handle << + ", surface: " << + pending_op.vpp_surface_ptr << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + // put frames in ready queue on success + if (MFX_ERR_NONE == sess.last_status) { + on_frame_ready(my_sess, pending_op.vpp_surface_ptr); + } + } + } while (MFX_ERR_NONE == sess.last_status && !my_sess.vpp_out_queue.empty()); + return ExecutionStatus::Continue; + }, + // 2) Falls back on generic status procesing + [this] (EngineSession& sess) -> ExecutionStatus + { + return this->process_error(sess.last_status, static_cast(sess)); + } + ); +} + +cv::util::optional VPPPreprocEngine::is_applicable(const cv::MediaFrame& in_frame) { + // TODO consider something smarter than RTI + cv::util::optional ret; + BaseFrameAdapter *vpl_adapter = in_frame.get(); + GAPI_LOG_DEBUG(nullptr, "validate VPP preprocessing is applicable for frame"); + if (vpl_adapter) { + ret = cv::util::make_optional( + pp_params::create(vpl_adapter->get_session_handle(), + vpl_adapter->get_surface()->get_info())); + GAPI_LOG_DEBUG(nullptr, "VPP preprocessing applicable, session [" << + vpl_adapter->get_session_handle() << "]"); + } + return ret; +} + +pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) { + const vpp_pp_params ¶ms = initial_frame_param.get(); + + // adjust preprocessing settings + mfxVideoParam mfxVPPParams{0}; + // NB: IN params for VPP session must be equal to decoded surface params + mfxVPPParams.vpp.In = params.info; + + // NB: OUT params must refer to IN params of a network + GAPI_LOG_DEBUG(nullptr, "network input size: " << required_frame_descr.size.width << + "x" << required_frame_descr.size.height); + mfxVPPParams.vpp.Out = mfxVPPParams.vpp.In; + switch (required_frame_descr.fmt) { + case MediaFormat::NV12: + mfxVPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; + break; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported MediaFormat in preprocessing: " << + static_cast(required_frame_descr.fmt) << + ". Frame will be rejected"); + throw std::runtime_error("unsupported MediaFormat value in VPP preprocessing"); + } + + mfxVPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + mfxVPPParams.vpp.Out.Width = static_cast(required_frame_descr.size.width); + mfxVPPParams.vpp.Out.Height = static_cast(required_frame_descr.size.height); + mfxVPPParams.vpp.Out.CropW = mfxVPPParams.vpp.Out.Width; + mfxVPPParams.vpp.Out.CropH = mfxVPPParams.vpp.Out.Height; + + // check In & Out equally to bypass preproc + if (mfxVPPParams.vpp.Out == mfxVPPParams.vpp.In) { + GAPI_LOG_DEBUG(nullptr, "no preproc required"); + return pp_session::create(nullptr); + } + + // recalculate size param according to VPP alignment + mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width); + mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height); + mfxVPPParams.vpp.Out.CropW = mfxVPPParams.vpp.Out.Width; + mfxVPPParams.vpp.Out.CropH = mfxVPPParams.vpp.Out.Height; + + GAPI_LOG_DEBUG(nullptr, "\nFrom:\n{\n" << mfx_frame_info_to_string(mfxVPPParams.vpp.In) << + "}\nTo:\n{\n" << mfx_frame_info_to_string(mfxVPPParams.vpp.Out) << "}"); + + // find existing session + GAPI_LOG_DEBUG(nullptr, "Find existing VPPPreprocSession for requested frame params" + ", total sessions: " << preproc_session_map.size()); + auto it = preproc_session_map.find(mfxVPPParams.vpp.In); + if (it != preproc_session_map.end()) { + GAPI_LOG_DEBUG(nullptr, "[" << it->second->session << "] found"); + return pp_session::create(std::static_pointer_cast(it->second)); + } + + // NB: make some sanity checks + IDeviceSelector::DeviceScoreTable devices = acceleration_policy->get_device_selector()->select_devices(); + GAPI_Assert(devices.size() == 1 && "Multiple(or zero) acceleration devices case is unsupported"); + AccelType accel_type = devices.begin()->second.get_type(); + // assign acceleration + if (accel_type == AccelType::DX11) { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + } else { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; + } + + // clone existing VPL session to inherit VPL loader configuration + // and avoid refer to any global state + // TODO no clone due to clone issue + + mfxSession mfx_vpp_session = params.handle; + mfxStatus sts = MFX_ERR_NONE; + + // TODO: simply use clone after VPL bug fixing + //sts = MFXCloneSession(params.handle, &mfx_vpp_session); + sts = MFXCreateSession(mfx_handle, impl_number, &mfx_vpp_session); + if (sts != MFX_ERR_NONE) { + GAPI_LOG_WARNING(nullptr, "Cannot clone VPP session, error: " << mfxstatus_to_string(sts)); + GAPI_Assert(false && "Cannot continue VPP preprocessing"); + } + + sts = MFXJoinSession(params.handle, mfx_vpp_session); + if (sts != MFX_ERR_NONE) { + GAPI_LOG_WARNING(nullptr, "Cannot join VPP sessions, error: " << mfxstatus_to_string(sts)); + GAPI_Assert(false && "Cannot continue VPP preprocessing"); + } + + GAPI_LOG_INFO(nullptr, "[" << mfx_vpp_session << "] starting pool allocation"); + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key {}; + try { + // assign HW acceleration processor + acceleration_policy->init(mfx_vpp_session); + try { + // ask to allocate external memory pool + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + sts = MFXVideoVPP_QueryIOSurf(mfx_vpp_session, &mfxVPPParams, vppRequests); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot execute MFXVideoVPP_QueryIOSurf, error: " << + mfxstatus_to_string(sts)); + throw std::runtime_error("Cannot execute MFXVideoVPP_QueryIOSurf"); + } + + // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation + // from decode allocations witch started from 0: by local module convention + + static uint16_t request_id = 0; + vppRequests[1].AllocId = std::numeric_limits::max() - request_id++; + GAPI_Assert(request_id != std::numeric_limits::max() && "Something wrong"); + + vppRequests[1].Type |= MFX_MEMTYPE_FROM_VPPIN; + vpp_out_pool_key = acceleration_policy->create_surface_pool(vppRequests[1], + mfxVPPParams.vpp.Out); + + sts = MFXVideoVPP_Init(mfx_vpp_session, &mfxVPPParams); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot Init VPP, error: " << + mfxstatus_to_string(sts)); + // TODO consider deallocate pool + // but not necessary now cause every fail processed as GAPI_Assert + throw std::runtime_error("Cannot init VPP, error: " + + mfxstatus_to_string(sts)); + } + } catch (const std::exception&) { + GAPI_LOG_WARNING(nullptr, "[" << mfx_vpp_session << "] allocation failed, rollback"); + acceleration_policy->deinit(mfx_vpp_session); + throw; + } + } catch (const std::exception&) { + MFXClose(mfx_vpp_session); + GAPI_Assert(false && "Cannot init preproc resources"); + } + + // create engine session after all + session_ptr_type sess_ptr = register_session(mfx_vpp_session, + mfxVPPParams); + sess_ptr->init_surface_pool(vpp_out_pool_key); + sess_ptr->swap_surface(*this); + + bool inserted = preproc_session_map.emplace(mfxVPPParams.vpp.In, sess_ptr).second; + GAPI_Assert(inserted && "preproc session is exist"); + GAPI_LOG_INFO(nullptr, "VPPPreprocSession created, total sessions: " << preproc_session_map.size()); + return pp_session::create(std::static_pointer_cast(sess_ptr)); +} + +void VPPPreprocEngine::on_frame_ready(session_type& sess, + mfxFrameSurface1* ready_surface) +{ + GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready"); + + // manage memory ownership rely on acceleration policy + ready_surface->Data.Locked--; // TODO -S- workaround + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; + auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_pool_id, + args); + ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); + + // pop away synced out object + sess.vpp_out_queue.pop(); +} + +VPPPreprocEngine::session_ptr +VPPPreprocEngine::initialize_session(mfxSession, + const std::vector&, + std::shared_ptr) { + return {}; +} + +cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::MediaFrame& in_frame) { + + std::shared_ptr pp_sess_impl = sess.get(); + if (!pp_sess_impl) { + // bypass case + return in_frame; + } + session_ptr_type s = std::static_pointer_cast(pp_sess_impl); + GAPI_DbgAssert(s && "Session is nullptr"); + GAPI_DbgAssert(is_applicable(in_frame) && + "VPP preproc is not applicable for the given frame"); + BaseFrameAdapter *vpl_adapter = in_frame.get(); + if (!vpl_adapter) { + GAPI_LOG_WARNING(nullptr, "VPP preproc is inapplicable for a given frame. " + "Make sure the frame is collected using onevpl::GSource"); + throw std::runtime_error("VPP preproc is inapplicable for given frame"); + } + + // schedule decoded surface into preproc queue + session_type::incoming_task in_preproc_request {nullptr, + vpl_adapter->get_surface()->get_handle(), + in_frame}; + s->sync_in_queue.emplace(in_preproc_request); + + // invoke pipeline to transform decoded surface into preprocessed surface + try + { + ExecutionStatus status = ExecutionStatus::Continue; + while (0 == get_ready_frames_count() && + status == ExecutionStatus::Continue) { + status = process(s->session); + } + + if (get_ready_frames_count() == 0) { + GAPI_LOG_WARNING(nullptr, "failed: cannot obtain preprocessed frames, last status: " << + ProcessingEngineBase::status_to_string(status)); + throw std::runtime_error("cannot finalize VPP preprocessing operation"); + } + } catch(const std::exception&) { + throw; + } + // obtain new frame is available + cv::gapi::wip::Data data; + get_frame(data); + preprocessed_frames_count++; + GAPI_LOG_DEBUG(nullptr, "processed frames count: " << preprocessed_frames_count); + return cv::util::get(data); +} + +ProcessingEngineBase::ExecutionStatus VPPPreprocEngine::process_error(mfxStatus status, session_type& sess) { + GAPI_LOG_DEBUG(nullptr, "status: " << mfxstatus_to_string(status)); + + switch (status) { + case MFX_ERR_NONE: + { + // prepare sync object for new surface + try { + sess.swap_surface(*this); + return ExecutionStatus::Continue; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); + return ExecutionStatus::Continue; // read more data + } + } + case MFX_ERR_MORE_DATA: // The function requires more bitstream at input before decoding can proceed + return ExecutionStatus::Processed; + case MFX_ERR_MORE_SURFACE: + { + // The function requires more frame surface at output before decoding can proceed. + // This applies to external memory allocations and should not be expected for + // a simple internal allocation case like this + try { + sess.swap_surface(*this); + return ExecutionStatus::Continue; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); + return ExecutionStatus::Continue; // read more data + } + break; + } + case MFX_ERR_DEVICE_LOST: + // For non-CPU implementations, + // Cleanup if device is lost + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_DEVICE_LOST is not processed"); + break; + case MFX_WRN_DEVICE_BUSY: + // For non-CPU implementations, + // Wait a few milliseconds then try again + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_WRN_DEVICE_BUSY is not processed"); + break; + case MFX_WRN_VIDEO_PARAM_CHANGED: + // The decoder detected a new sequence header in the bitstream. + // Video parameters may have changed. + // In external memory allocation case, might need to reallocate the output surface + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_VIDEO_PARAM_CHANGED"); + return ExecutionStatus::Continue; + break; + case MFX_ERR_INCOMPATIBLE_VIDEO_PARAM: + // The function detected that video parameters provided by the application + // are incompatible with initialization parameters. + // The application should close the component and then reinitialize it + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is not processed"); + break; + case MFX_ERR_REALLOC_SURFACE: + // Bigger surface_work required. May be returned only if + // mfxInfoMFX::EnableReallocRequest was set to ON during initialization. + // This applies to external memory allocations and should not be expected for + // a simple internal allocation case like this + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_REALLOC_SURFACE is not processed"); + break; + case MFX_WRN_IN_EXECUTION: + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_IN_EXECUTION"); + return ExecutionStatus::Continue; + default: + GAPI_LOG_WARNING(nullptr, "Unknown status code: " << mfxstatus_to_string(status) << + ", decoded frames: " << sess.preprocessed_frames_count); + break; + } + + return ExecutionStatus::Failed; +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp new file mode 100644 index 0000000000..c4be48708a --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP +#define GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP +#include +#include +#include + +#include "streaming/onevpl/engine/processing_engine_base.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" + +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +// GAPI_EXPORTS for tests +struct GAPI_EXPORTS FrameInfoComparator { + bool operator()(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) const; + static bool equal_to(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs); +}; + +class VPPPreprocSession; +struct IDataProvider; +struct VPLAccelerationPolicy; + +// GAPI_EXPORTS for tests +class GAPI_EXPORTS VPPPreprocEngine final : public ProcessingEngineBase, + public cv::gapi::wip::IPreprocEngine { +public: + using session_type = VPPPreprocSession; + using session_ptr_type = std::shared_ptr; + + VPPPreprocEngine(std::unique_ptr&& accel); + + cv::util::optional is_applicable(const cv::MediaFrame& in_frame) override; + + pp_session initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) override; + + cv::MediaFrame run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame) override; + +private: + std::map preproc_session_map; + void on_frame_ready(session_type& sess, + mfxFrameSurface1* ready_surface); + ExecutionStatus process_error(mfxStatus status, session_type& sess); + session_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) override; + size_t preprocessed_frames_count; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp new file mode 100644 index 0000000000..059b7caea7 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/utils.hpp" +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +VPPPreprocSession::VPPPreprocSession(mfxSession sess, const mfxVideoParam& vpp_out_param) : + EngineSession(sess), + mfx_vpp_out_param(vpp_out_param), + processing_surface_ptr(), + sync_in_queue(), + vpp_out_queue(), + preprocessed_frames_count() +{ +} + +VPPPreprocSession::~VPPPreprocSession() { + GAPI_LOG_INFO(nullptr, "Close VPP for session: " << session); + MFXVideoVPP_Close(session); +} + +Data::Meta VPPPreprocSession::generate_frame_meta() { + const auto now = std::chrono::system_clock::now(); + const auto dur = std::chrono::duration_cast + (now.time_since_epoch()); + Data::Meta meta { + {cv::gapi::streaming::meta_tag::timestamp, int64_t{dur.count()} }, + {cv::gapi::streaming::meta_tag::seq_id, int64_t{preprocessed_frames_count++}} + }; + return meta; +} + +void VPPPreprocSession::swap_surface(VPPPreprocEngine& engine) { + VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); + GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); + request_free_surface(session, vpp_pool_id, *acceleration_policy, + processing_surface_ptr, true); +} + +void VPPPreprocSession::init_surface_pool(VPLAccelerationPolicy::pool_key_t key) { + GAPI_Assert(key && "Init preproc pull with empty key"); + vpp_pool_id = key; +} + +const mfxFrameInfo& VPPPreprocSession::get_video_param() const { + return mfx_vpp_out_param.vpp.Out; +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp new file mode 100644 index 0000000000..1f873fda56 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp @@ -0,0 +1,61 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP +#define GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP +#include +#include + +#include +#include "streaming/onevpl/engine/engine_session.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp" + +#ifdef HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +class VPPPreprocEngine; + +class VPPPreprocSession : public EngineSession { +public: + friend class VPPPreprocEngine; + VPPPreprocSession(mfxSession sess, const mfxVideoParam &vpp_out_param); + ~VPPPreprocSession(); + + Data::Meta generate_frame_meta(); + void swap_surface(VPPPreprocEngine& engine); + void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); + + virtual const mfxFrameInfo& get_video_param() const override; +private: + mfxVideoParam mfx_vpp_out_param; + VPLAccelerationPolicy::pool_key_t vpp_pool_id; + std::weak_ptr processing_surface_ptr; + + struct incoming_task { + mfxSyncPoint sync_handle; + mfxFrameSurface1* decoded_surface_ptr; + cv::MediaFrame decoded_frame_copy; + }; + + struct outgoing_task { + mfxSyncPoint sync_handle; + mfxFrameSurface1* vpp_surface_ptr; + }; + + std::queue sync_in_queue; + std::queue vpp_out_queue; + int64_t preprocessed_frames_count; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp new file mode 100644 index 0000000000..6cf7212f3e --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp @@ -0,0 +1,86 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include + +#include "streaming/onevpl/engine/preproc/utils.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +namespace utils { + +cv::MediaFormat fourcc_to_MediaFormat(int value) { + switch (value) + { + case MFX_FOURCC_BGRP: + return cv::MediaFormat::BGR; + case MFX_FOURCC_NV12: + return cv::MediaFormat::NV12; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported FourCC format requested: " << value << + ". Cannot cast to cv::MediaFrame"); + GAPI_Assert(false && "Unsupported FOURCC"); + + } +} + +int MediaFormat_to_fourcc(cv::MediaFormat value) { + switch (value) + { + case cv::MediaFormat::BGR: + return MFX_FOURCC_BGRP; + case cv::MediaFormat::NV12: + return MFX_FOURCC_NV12; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported cv::MediaFormat format requested: " << + static_cast::type>(value) << + ". Cannot cast to FourCC"); + GAPI_Assert(false && "Unsupported cv::MediaFormat"); + } +} +int MediaFormat_to_chroma(cv::MediaFormat value) { + switch (value) + { + case cv::MediaFormat::BGR: + return MFX_CHROMAFORMAT_MONOCHROME; + case cv::MediaFormat::NV12: + return MFX_CHROMAFORMAT_YUV420; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported cv::MediaFormat format requested: " << + static_cast::type>(value) << + ". Cannot cast to ChromaFormateIdc"); + GAPI_Assert(false && "Unsupported cv::MediaFormat"); + } +} + +mfxFrameInfo to_mfxFrameInfo(const cv::GFrameDesc& frame_info) { + mfxFrameInfo ret {0}; + ret.FourCC = MediaFormat_to_fourcc(frame_info.fmt); + ret.ChromaFormat = MediaFormat_to_chroma(frame_info.fmt); + ret.Width = frame_info.size.width; + ret.Height = frame_info.size.height; + ret.CropX = 0; + ret.CropY = 0; + ret.CropW = 0; + ret.CropH = 0; + ret.PicStruct = MFX_PICSTRUCT_UNKNOWN; + ret.FrameRateExtN = 0; + ret.FrameRateExtD = 0; + return ret; +} +} // namespace utils +} // namespace cv +} // namespace gapi +} // namespace wip +} // namespace onevpl + +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp new file mode 100644 index 0000000000..b52a8ad1e8 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp @@ -0,0 +1,32 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP +#define GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +#include + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +namespace utils { + +cv::MediaFormat fourcc_to_MediaFormat(int value); +int MediaFormat_to_fourcc(cv::MediaFormat value); +int MediaFormat_to_chroma(cv::MediaFormat value); + +mfxFrameInfo to_mfxFrameInfo(const cv::GFrameDesc& frame_info); +} // namespace utils +} // namespace cv +} // namespace gapi +} // namespace wip +} // namespace onevpl +#endif // #ifdef HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp new file mode 100644 index 0000000000..820510a55d --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#ifndef VPP_PREPROC_ENGINE +#define VPP_PREPROC_ENGINE +#include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/engine/engine_session.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +struct vpp_pp_params { + mfxSession handle; + mfxFrameInfo info; +}; + +using vpp_pp_session_ptr = std::shared_ptr; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // VPP_PREPROC_ENGINE +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp new file mode 100644 index 0000000000..2c72d7c547 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp @@ -0,0 +1,89 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP +#define GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/utils.hpp" +#include "streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp" +#endif // HAVE_ONEVPL + + +namespace cv { +namespace gapi { +namespace wip { + +#ifdef VPP_PREPROC_ENGINE +#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::onevpl::vpp_pp_params +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::onevpl::vpp_pp_session_ptr +#else // VPP_PREPROC_ENGINE +struct empty_pp_params {}; +struct empty_pp_session {}; +#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::empty_pp_params; +#define GAPI_BACKEND_PP_SESSIONS std::shared_ptr; +#endif // VPP_PREPROC_ENGINE + +struct pp_params { + using value_type = cv::util::variant; + + template + static pp_params create(Args&& ...args) { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificParamType requested"); + pp_params ret; + ret.value = BackendSpecificParamType{std::forward(args)...}; + return ret; + } + + template + BackendSpecificParamType& get() { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificParamType requested"); + return cv::util::get(value); + } + + template + const BackendSpecificParamType& get() const { + return static_cast(const_cast(this)->get()); + } +private: + value_type value; +}; + +struct pp_session { + using value_type = cv::util::variant; + + template + static pp_session create(std::shared_ptr session) { + static_assert(cv::detail::contains, + GAPI_BACKEND_PP_SESSIONS>::value, + "Invalid BackendSpecificSesionType requested"); + pp_session ret; + ret.value = session; + return ret; + } + + template + std::shared_ptr get() { + using ptr_type = std::shared_ptr; + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificSesionType requested"); + return cv::util::get(value); + } + + template + std::shared_ptr get() const { + return const_cast(this)->get(); + } +private: + value_type value; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp new file mode 100644 index 0000000000..4997a04562 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp @@ -0,0 +1,35 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP +#define GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP + +#include "precomp.hpp" +#include +#include + +#include "streaming/onevpl/engine/preproc_defines.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +struct IPreprocEngine { + virtual ~IPreprocEngine() = default; + + virtual cv::util::optional + is_applicable(const cv::MediaFrame& in_frame) = 0; + + virtual pp_session + initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) = 0; + virtual cv::MediaFrame + run_sync(const pp_session &sess, const cv::MediaFrame& in_frame) = 0; +}; +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp index 36d36d5ec0..7cb2c2e5f1 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp @@ -26,34 +26,29 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { +using vpp_param_storage = const std::map; +using vpp_param_storage_cit = typename vpp_param_storage::const_iterator; template -bool set_vpp_param(const char* name, Type& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session); +Type get_mfx_value(const vpp_param_storage_cit &cit); template<> -bool set_vpp_param(const char* name, uint32_t& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session) { - auto it = params_storage.find(name); - if (it != params_storage.end()) { - auto value = it->second.Data.U32; - GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << - "\": " << value); - out_vpp_param = value; - return true; - } - return false; +uint16_t get_mfx_value(const vpp_param_storage_cit& cit) { + return cit->second.Data.U16; } template<> -bool set_vpp_param(const char* name, uint16_t& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session) { +uint32_t get_mfx_value(const vpp_param_storage_cit& cit) { + return cit->second.Data.U32; +} + +template +bool set_vpp_param(const char* name, Type& out_vpp_param, + const vpp_param_storage ¶ms_storage, + mfxSession session) { auto it = params_storage.find(name); if (it != params_storage.end()) { - auto value = it->second.Data.U16; + auto value = get_mfx_value(it); GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << "\": " << value); out_vpp_param = value; @@ -81,7 +76,6 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr ExecutionStatus @@ -110,11 +104,8 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrDataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -122,7 +113,7 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrget_handle() << + my_sess.processing_surface_ptr.lock()->get_handle() << ", dec out surface: " << sync_pair.second << ", status: " << mfxstatus_to_string(my_sess.last_status)); @@ -134,12 +125,12 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrget_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -287,11 +278,11 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, // override some in-params if (set_vpp_param(CfgParam::vpp_in_width_name(), mfxVPPParams.vpp.In.Width, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.In.Width = ALIGN16(mfxVPPParams.vpp.In.Width); } if (set_vpp_param(CfgParam::vpp_in_height_name(), mfxVPPParams.vpp.In.Height, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.In.Height = ALIGN16(mfxVPPParams.vpp.In.Height); } set_vpp_param(CfgParam::vpp_in_crop_x_name(), mfxVPPParams.vpp.In.CropX, @@ -309,11 +300,11 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, set_vpp_param(CfgParam::vpp_out_chroma_format_name(), mfxVPPParams.vpp.Out.ChromaFormat, cfg_vpp_params, mfx_session); if (set_vpp_param(CfgParam::vpp_out_width_name(), mfxVPPParams.vpp.Out.Width, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width); } if (set_vpp_param(CfgParam::vpp_out_height_name(), mfxVPPParams.vpp.Out.Height, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height); } set_vpp_param(CfgParam::vpp_out_crop_x_name(), mfxVPPParams.vpp.Out.CropX, @@ -394,7 +385,7 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); // prepare working surfaces - sess_ptr->swap_surface(*this); + sess_ptr->swap_decode_surface(*this); sess_ptr->swap_transcode_surface(*this); return sess_ptr; } @@ -452,10 +443,6 @@ void VPLLegacyTranscodeEngine::validate_vpp_param(const mfxVideoParam& mfxVPPPar GAPI_LOG_INFO(nullptr, "Finished VPP param validation"); } -ProcessingEngineBase::ExecutionStatus VPLLegacyTranscodeEngine::execute_op(operation_t& op, EngineSession& sess) { - return op(sess); -} - void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess, mfxFrameSurface1* ready_surface) { @@ -463,8 +450,10 @@ void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess, // manage memory ownership rely on acceleration policy ready_surface->Data.Locked--; // TODO -S- workaround + + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_out_pool_id, - ready_surface); + args); ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); // pop away synced out object diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp index cf0621dd93..d06b76a13d 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp @@ -33,8 +33,6 @@ public: static std::map get_vpp_params(const std::vector &cfg_params); private: - ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; - void on_frame_ready(LegacyTranscodeSession& sess, mfxFrameSurface1* ready_surface); void validate_vpp_param(const mfxVideoParam& mfxVPPParams); diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp index 9fcabc7e10..8672a000ad 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp @@ -42,22 +42,7 @@ void LegacyTranscodeSession::init_transcode_surface_pool(VPLAccelerationPolicy:: void LegacyTranscodeSession::swap_transcode_surface(VPLLegacyTranscodeEngine& engine) { VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); - try { - auto cand = acceleration_policy->get_free_surface(vpp_out_pool_id).lock(); - - GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" - ", old: " << (!vpp_surface_ptr.expired() - ? vpp_surface_ptr.lock()->get_handle() - : nullptr) << - ", new: "<< cand->get_handle()); - - vpp_surface_ptr = cand; - } catch (const std::runtime_error& ex) { - GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); - - // Delegate exception processing on caller - throw; - } + request_free_surface(session, vpp_out_pool_id, *acceleration_policy, vpp_surface_ptr); } const mfxFrameInfo& LegacyTranscodeSession::get_video_param() const { diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp index aa6f70c587..ad9ed8e60b 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp @@ -14,10 +14,7 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -struct IDataProvider; class Surface; -struct VPLAccelerationPolicy; class GAPI_EXPORTS LegacyTranscodeSession : public LegacyDecodeSession { public: @@ -33,8 +30,8 @@ public: const mfxFrameInfo& get_video_param() const override; private: mfxVideoParam mfx_transcoder_param; - VPLAccelerationPolicy::pool_key_t vpp_out_pool_id; + std::weak_ptr vpp_surface_ptr; std::queue vpp_queue; }; diff --git a/modules/gapi/src/streaming/onevpl/onevpl_export.hpp b/modules/gapi/src/streaming/onevpl/onevpl_export.hpp index 44970ee7be..e93a30d869 100644 --- a/modules/gapi/src/streaming/onevpl/onevpl_export.hpp +++ b/modules/gapi/src/streaming/onevpl/onevpl_export.hpp @@ -16,6 +16,9 @@ #include #include + +extern mfxLoader mfx_handle; +extern int impl_number; #endif // HAVE_ONEVPL #if defined(_MSC_VER) diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp index d1ff15b06d..792110aaf9 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp @@ -36,6 +36,10 @@ GMetaArg GSource::Priv::descr_of() const { #else // HAVE_ONEVPL +// TODO global variable move it into Source after CloneSession issue resolving +mfxLoader mfx_handle = MFXLoad(); +int impl_number = 0; + namespace cv { namespace gapi { namespace wip { @@ -47,7 +51,7 @@ enum { }; GSource::Priv::Priv() : - mfx_handle(MFXLoad()), +// mfx_handle(MFXLoad()), mfx_impl_description(), mfx_handle_configs(), cfg_params(), @@ -187,7 +191,8 @@ GSource::Priv::Priv(std::shared_ptr provider, GAPI_Assert(max_match_it != matches_count.rend() && "Cannot find matched MFX implementation for requested configuration"); - int impl_number = max_match_it->second; + // TODO impl_number is global for now + impl_number = max_match_it->second; GAPI_LOG_INFO(nullptr, "Chosen implementation index: " << impl_number); // release unusable impl available_impl_descriptions @@ -261,7 +266,7 @@ GSource::Priv::~Priv() { GAPI_LOG_INFO(nullptr, "Unload MFX implementation description: " << mfx_impl_description); MFXDispReleaseImplDescription(mfx_handle, mfx_impl_description); GAPI_LOG_INFO(nullptr, "Unload MFX handle: " << mfx_handle); - MFXUnload(mfx_handle); + //MFXUnload(mfx_handle); } std::unique_ptr GSource::Priv::initializeHWAccel(std::shared_ptr selector) diff --git a/modules/gapi/src/streaming/onevpl/source_priv.hpp b/modules/gapi/src/streaming/onevpl/source_priv.hpp index b835850d35..21248df7fb 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.hpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.hpp @@ -44,7 +44,8 @@ private: Priv(); std::unique_ptr initializeHWAccel(std::shared_ptr selector); - mfxLoader mfx_handle; + // TODO not it is global variable. Waiting for FIX issue with CloneSession + // mfxLoader mfx_handle; mfxImplDescription *mfx_impl_description; std::vector mfx_handle_configs; std::vector cfg_params; diff --git a/modules/gapi/src/streaming/onevpl/utils.cpp b/modules/gapi/src/streaming/onevpl/utils.cpp index 3ec0dea8ae..37b4074209 100644 --- a/modules/gapi/src/streaming/onevpl/utils.cpp +++ b/modules/gapi/src/streaming/onevpl/utils.cpp @@ -25,6 +25,8 @@ #define APPEND_STRINGIFY_MASK_N_ERASE(value, pref, mask) \ if (value & mask) { ss << pref << #mask; value ^= mask; } +#define DUMP_MEMBER(stream, object, member) \ + stream << #member << ": " << object.member << "\n"; namespace cv { namespace gapi { @@ -359,6 +361,42 @@ std::string mfxstatus_to_string(mfxStatus err) { return ret; } +std::string mfx_frame_info_to_string(const mfxFrameInfo &info) { + std::stringstream ss; + DUMP_MEMBER(ss, info, FrameRateExtN) + DUMP_MEMBER(ss, info, FrameRateExtD) + DUMP_MEMBER(ss, info, AspectRatioW) + DUMP_MEMBER(ss, info, AspectRatioH) + DUMP_MEMBER(ss, info, CropX) + DUMP_MEMBER(ss, info, CropY) + DUMP_MEMBER(ss, info, CropW) + DUMP_MEMBER(ss, info, CropH) + DUMP_MEMBER(ss, info, ChannelId) + DUMP_MEMBER(ss, info, BitDepthLuma) + DUMP_MEMBER(ss, info, BitDepthChroma) + DUMP_MEMBER(ss, info, Shift) + DUMP_MEMBER(ss, info, FourCC) + DUMP_MEMBER(ss, info, Width) + DUMP_MEMBER(ss, info, Height) + DUMP_MEMBER(ss, info, BufferSize) + DUMP_MEMBER(ss, info, PicStruct) + DUMP_MEMBER(ss, info, ChromaFormat); + return ss.str(); +} + +int compare(const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + //NB: mfxFrameInfo is a `packed` struct declared in VPL + return memcmp(&lhs, &rhs, sizeof(mfxFrameInfo)); +} + +bool operator< (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + return (compare(lhs, rhs) < 0); +} + +bool operator== (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + return (compare(lhs, rhs) == 0); +} + std::string ext_mem_frame_type_to_cstr(int type) { std::stringstream ss; APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_DECODER_TARGET); diff --git a/modules/gapi/src/streaming/onevpl/utils.hpp b/modules/gapi/src/streaming/onevpl/utils.hpp index 76a66a63f4..459f612743 100644 --- a/modules/gapi/src/streaming/onevpl/utils.hpp +++ b/modules/gapi/src/streaming/onevpl/utils.hpp @@ -76,6 +76,10 @@ mfxU32 cstr_to_mfx_version(const char* cstr); std::string GAPI_EXPORTS mfxstatus_to_string(int64_t err); std::string GAPI_EXPORTS mfxstatus_to_string(mfxStatus err); +std::string mfx_frame_info_to_string(const mfxFrameInfo &info); +bool operator< (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs); +bool operator== (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs); + std::ostream& operator<< (std::ostream& out, const mfxImplDescription& idesc); std::string ext_mem_frame_type_to_cstr(int type); diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp index 51fb9f276a..59723dbd5e 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp @@ -68,7 +68,7 @@ struct EmptyDataProvider : public cv::gapi::wip::onevpl::IDataProvider { struct TestProcessingSession : public cv::gapi::wip::onevpl::EngineSession { TestProcessingSession(mfxSession mfx_session) : - EngineSession(mfx_session, {}) { + EngineSession(mfx_session) { } const mfxFrameInfo& get_video_param() const override { @@ -319,7 +319,8 @@ TEST(OneVPL_Source_CPU_FrameAdapter, InitFrameAdapter) EXPECT_EQ(0, surf->get_locks_count()); { - VPLMediaFrameCPUAdapter adapter(surf); + mfxSession stub_session = reinterpret_cast(0x1); + VPLMediaFrameCPUAdapter adapter(surf, stub_session); EXPECT_EQ(1, surf->get_locks_count()); } EXPECT_EQ(0, surf->get_locks_count()); @@ -528,9 +529,9 @@ TEST(OneVPL_Source_DX11_Accel, Init) cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); VPLDX11AccelerationPolicy accel(std::make_shared(cfg_params_w_dx11)); - mfxLoader mfx_handle = MFXLoad(); + mfxLoader test_mfx_handle = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -538,7 +539,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), mfx_param_0), MFX_ERR_NONE); - mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_1); mfxVariant mfx_param_1; mfx_param_1.Type = MFX_VARIANT_TYPE_U32; @@ -546,7 +547,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), mfx_param_1), MFX_ERR_NONE); - mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_2); mfxVariant mfx_param_2; mfx_param_2.Type = MFX_VARIANT_TYPE_U32; @@ -556,7 +557,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) // create session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // assign acceleration @@ -600,7 +601,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_NO_THROW(accel.deinit(mfx_session)); MFXClose(mfx_session); - MFXUnload(mfx_handle); + MFXUnload(test_mfx_handle); } TEST(OneVPL_Source_DX11_Accel_VPL, Init) @@ -611,9 +612,9 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); std::unique_ptr acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); - mfxLoader mfx_handle = MFXLoad(); + mfxLoader test_mfx_handle = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -621,7 +622,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), mfx_param_0), MFX_ERR_NONE); - mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_1); mfxVariant mfx_param_1; mfx_param_1.Type = MFX_VARIANT_TYPE_U32; @@ -629,7 +630,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), mfx_param_1), MFX_ERR_NONE); - mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_2); mfxVariant mfx_param_2; mfx_param_2.Type = MFX_VARIANT_TYPE_U32; @@ -637,7 +638,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), mfx_param_2), MFX_ERR_NONE); - mfxConfig cfg_inst_3 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_3 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_3); mfxVariant mfx_param_3; mfx_param_3.Type = MFX_VARIANT_TYPE_U32; @@ -647,7 +648,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) mfx_param_3), MFX_ERR_NONE); // create session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // assign acceleration @@ -732,7 +733,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); // prepare working surfaces - sess_ptr->swap_surface(engine); + sess_ptr->swap_decode_surface(engine); sess_ptr->swap_transcode_surface(engine); // launch pipeline @@ -756,11 +757,8 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) { my_sess.last_status = MFXVideoDECODE_DecodeFrameAsync(my_sess.session, - (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -771,12 +769,12 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) my_sess.last_status == MFX_WRN_DEVICE_BUSY) { try { if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { - my_sess.swap_surface(engine); + my_sess.swap_decode_surface(engine); } my_sess.last_status = MFXVideoDECODE_DecodeFrameAsync(my_sess.session, - my_sess.stream.get(), - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -808,6 +806,224 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) } } } + +TEST(OneVPL_Source_DX11_Accel_VPL, preproc) +{ + using namespace cv::gapi::wip::onevpl; + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + std::unique_ptr acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + mfxLoader test_mfx_handle = MFXLoad(); + + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = MFX_ACCEL_MODE_VIA_D3D11; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = MFX_CODEC_HEVC; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), + mfx_param_2), MFX_ERR_NONE); + + mfxConfig cfg_inst_3 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_3); + mfxVariant mfx_param_3; + mfx_param_3.Type = MFX_VARIANT_TYPE_U32; + mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param_3), MFX_ERR_NONE); + // create session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // assign acceleration + EXPECT_NO_THROW(acceleration_policy->init(mfx_decode_session)); + + // create proper bitstream + std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265"); + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(MFX_CODEC_HEVC)})); + IDataProvider::mfx_codec_id_type decoder_id_name = data_provider->get_mfx_codec_id(); + + // Prepare video param + mfxVideoParam mfxDecParams {}; + mfxDecParams.mfx.CodecId = decoder_id_name; + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + // try fetch & decode input data + sts = MFX_ERR_NONE; + std::shared_ptr bitstream{}; + do { + EXPECT_TRUE(data_provider->fetch_bitstream_data(bitstream)); + sts = MFXVideoDECODE_DecodeHeader(mfx_decode_session, bitstream.get(), &mfxDecParams); + EXPECT_TRUE(MFX_ERR_NONE == sts || MFX_ERR_MORE_DATA == sts); + } while (sts == MFX_ERR_MORE_DATA && !data_provider->empty()); + + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxFrameAllocRequest request{}; + memset(&request, 0, sizeof(request)); + sts = MFXVideoDECODE_QueryIOSurf(mfx_decode_session, &mfxDecParams, &request); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // Allocate surfaces for decoder + request.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN; + VPLAccelerationPolicy::pool_key_t decode_pool_key = acceleration_policy->create_surface_pool(request, + mfxDecParams.mfx.FrameInfo); + sts = MFXVideoDECODE_Init(mfx_decode_session, &mfxDecParams); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // initialize VPL session + mfxSession mfx_vpl_session{}; + sts = MFXCreateSession(test_mfx_handle, 0, &mfx_vpl_session); + // assign acceleration + EXPECT_NO_THROW(acceleration_policy->init(mfx_vpl_session)); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // request VPL surface + mfxU16 vppOutImgWidth = 672; + mfxU16 vppOutImgHeight = 382; + + mfxVideoParam mfxVPPParams{0}; + mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo; + + mfxVPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; + mfxVPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + mfxVPPParams.vpp.Out.Width = ALIGN16(vppOutImgWidth); + mfxVPPParams.vpp.Out.Height = ALIGN16(vppOutImgHeight); + mfxVPPParams.vpp.Out.CropX = 0; + mfxVPPParams.vpp.Out.CropY = 0; + mfxVPPParams.vpp.Out.CropW = vppOutImgWidth; + mfxVPPParams.vpp.Out.CropH = vppOutImgHeight; + mfxVPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; + mfxVPPParams.vpp.Out.FrameRateExtN = 30; + mfxVPPParams.vpp.Out.FrameRateExtD = 1; + + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + EXPECT_EQ(MFXVideoVPP_QueryIOSurf(mfx_vpl_session, &mfxVPPParams, vppRequests), MFX_ERR_NONE); + + vppRequests[1].AllocId = 666; + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key = + acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out); + EXPECT_EQ(MFXVideoVPP_Init(mfx_vpl_session, &mfxVPPParams), MFX_ERR_NONE); + + // finalize session creation + DecoderParams d_param{bitstream, mfxDecParams}; + TranscoderParams t_param{mfxVPPParams}; + VPLLegacyDecodeEngine engine(std::move(acceleration_policy)); + std::shared_ptr sess_ptr = + engine.register_session( + mfx_decode_session, + std::move(d_param), + data_provider); + + sess_ptr->init_surface_pool(decode_pool_key); + + // prepare working surfaces + sess_ptr->swap_decode_surface(engine); + + // launch pipeline + LegacyDecodeSession &my_sess = *sess_ptr; + + size_t min_available_frames_count = + std::min(engine.get_accel()->get_surface_count(decode_pool_key), + engine.get_accel()->get_surface_count(vpp_out_pool_key)); + size_t frame_num = 0; + do { + if (!my_sess.data_provider) { + my_sess.last_status = MFX_ERR_MORE_DATA; + } else { + my_sess.last_status = MFX_ERR_NONE; + if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) { + my_sess.last_status = MFX_ERR_MORE_DATA; + my_sess.data_provider.reset(); //close source + } + } + + // 2) enqueue ASYNC decode operation + // prepare sync object for new surface + LegacyTranscodeSession::op_handle_t sync_pair{}; + + // enqueue decode operation with current session surface + { + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + // process wait-like statuses in-place: + // It had better to use up all VPL decoding resources in pipeline + // as soon as possible. So waiting more free-surface or device free + while (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_WRN_DEVICE_BUSY) { + try { + if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { + my_sess.swap_decode_surface(engine); + } + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + } catch (const std::runtime_error&) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + EXPECT_TRUE(false); + } + } + } + { + do { + my_sess.last_status = MFXVideoCORE_SyncOperation(my_sess.session, sync_pair.first, 0); + // put frames in ready queue on success + if (MFX_ERR_NONE == my_sess.last_status) { + break; + } + } while (MFX_WRN_IN_EXECUTION == my_sess.last_status); + EXPECT_EQ(my_sess.last_status, MFX_ERR_NONE); + } + + // perform VPP operation on decoder synchronized surface + + auto vpp_out = engine.get_accel()->get_free_surface(vpp_out_pool_key).lock(); + EXPECT_TRUE(vpp_out.get()); + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(mfx_vpl_session, + sync_pair.second, + vpp_out->get_handle(), + nullptr, &sync_pair.first); + if (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_ERR_NONE) { + my_sess.last_status = MFXVideoCORE_SyncOperation(mfx_vpl_session, sync_pair.first, INFINITE); + EXPECT_EQ(my_sess.last_status, MFX_ERR_NONE); + frame_num++; + } + } while(frame_num < min_available_frames_count); +} #endif // HAVE_DIRECTX #endif // HAVE_D3D11 diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp index c8c27fa6a4..ebafb79695 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp @@ -73,9 +73,9 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) EXPECT_TRUE(dd_result); // initialize MFX - mfxLoader mfx_handle = MFXLoad(); + mfxLoader mfx = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -85,7 +85,7 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) // create MFX session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // create proper bitstream @@ -112,7 +112,7 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) MFXVideoDECODE_Close(mfx_session); MFXClose(mfx_session); - MFXUnload(mfx_handle); + MFXUnload(mfx); } diff --git a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp new file mode 100644 index 0000000000..c43dfa9496 --- /dev/null +++ b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp @@ -0,0 +1,495 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + + +#include "../test_precomp.hpp" + +#include "../common/gapi_tests_common.hpp" +#include "../common/gapi_streaming_tests_common.hpp" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#ifdef HAVE_ONEVPL + +#include +#include "streaming/onevpl/file_data_provider.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" + +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" +#define private public +#define protected public +#include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp" +#include "streaming/onevpl/engine/decode/decode_session.hpp" + +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" + +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" +#include "streaming/onevpl/engine/transcode/transcode_session.hpp" +#undef protected +#undef private +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) + +namespace opencv_test +{ +namespace +{ +template +cv::MediaFrame extract_decoded_frame(mfxSession sessId, ProcessingEngine& engine) { + using namespace cv::gapi::wip::onevpl; + ProcessingEngineBase::ExecutionStatus status = ProcessingEngineBase::ExecutionStatus::Continue; + while (0 == engine.get_ready_frames_count() && + status == ProcessingEngineBase::ExecutionStatus::Continue) { + status = engine.process(sessId); + } + + if (engine.get_ready_frames_count() == 0) { + GAPI_LOG_WARNING(nullptr, "failed: cannot obtain preprocessed frames, last status: " << + ProcessingEngineBase::status_to_string(status)); + throw std::runtime_error("cannot finalize VPP preprocessing operation"); + } + cv::gapi::wip::Data data; + engine.get_frame(data); + return cv::util::get(data); +} + +std::tuple prepare_mfx(int mfx_codec, int mfx_accel_mode) { + using namespace cv::gapi::wip::onevpl; + mfxLoader mfx = MFXLoad(); + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = mfx_accel_mode; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = mfx_codec; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), + mfx_param_2), MFX_ERR_NONE); + + mfxConfig cfg_inst_3 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_3); + mfxVariant mfx_param_3; + mfx_param_3.Type = MFX_VARIANT_TYPE_U32; + mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param_3), MFX_ERR_NONE); + return std::make_tuple(mfx, cfg_inst_3); +} + +class SafeQueue { +public: + void push(cv::MediaFrame&& f) { + std::unique_lock lock(mutex); + queue.push(std::move(f)); + cv.notify_all(); + } + + cv::MediaFrame pop() { + cv::MediaFrame ret; + std::unique_lock lock(mutex); + cv.wait(lock, [this] () { + return !queue.empty(); + }); + ret = queue.front(); + queue.pop(); + return ret; + } + + void push_stop() { + push(cv::MediaFrame::Create()); + } + + static bool is_stop(const cv::MediaFrame &f) { + try { + return f.get(); + } catch(...) {} + return false; + } + +private: + struct IStopAdapter final : public cv::MediaFrame::IAdapter { + ~IStopAdapter() {} + cv::GFrameDesc meta() const { return {}; }; + MediaFrame::View access(MediaFrame::Access) { return {{}, {}}; }; + }; +private: + std::condition_variable cv; + std::mutex mutex; + std::queue queue; +}; + +struct EmptyDataProvider : public cv::gapi::wip::onevpl::IDataProvider { + + bool empty() const override { + return true; + } + mfx_codec_id_type get_mfx_codec_id() const override { + return std::numeric_limits::max(); + } + bool fetch_bitstream_data(std::shared_ptr &) override { + return false; + } +}; +} + +using source_t = std::string; +using decoder_t = int; +using acceleration_t = int; +using out_frame_info_t = cv::GFrameDesc; +using preproc_args_t = std::tuple; + +class VPPPreprocParams : public ::testing::TestWithParam {}; + +preproc_args_t files[] = { + preproc_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + preproc_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}} +}; + +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 +TEST(OneVPL_Source_PreprocEngine, functional_single_thread) +{ + using namespace cv::gapi::wip::onevpl; + using namespace cv::gapi::wip; + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265"); + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(MFX_CODEC_HEVC)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engine + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + + // simulate net info + cv::GFrameDesc required_frame_param {cv::MediaFormat::NV12, + {1920, 1080}}; + + // create VPP preproc engine + VPPPreprocEngine preproc_engine(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + + // launch pipeline + // 1) decode frame + cv::MediaFrame first_decoded_frame; + ASSERT_NO_THROW(first_decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); + cv::GFrameDesc first_frame_decoded_desc = first_decoded_frame.desc(); + + // 1.5) create preproc session based on frame description & network info + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // 2) make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, first_decoded_frame); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + ASSERT_FALSE(first_frame_decoded_desc == first_outcome_pp_desc); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // make test in loop + bool in_progress = false; + size_t frames_processed_count = 1; + const auto &first_pp_param_value_impl = + cv::util::get(first_pp_params.value().value); + try { + while(true) { + cv::MediaFrame decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine); + in_progress = true; + ASSERT_EQ(decoded_frame.desc(), first_frame_decoded_desc); + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + const auto &cur_pp_param_value_impl = + cv::util::get(params.value().value); + + ASSERT_EQ(first_pp_param_value_impl.handle, cur_pp_param_value_impl.handle); + ASSERT_TRUE(FrameInfoComparator::equal_to(first_pp_param_value_impl.info, cur_pp_param_value_impl.info)); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + ASSERT_EQ(pp_sess.get().get(), + first_pp_sess.get().get()); + + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + frames_processed_count++; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(frames_processed_count, 1); +} + + +TEST_P(VPPPreprocParams, functional_different_threads) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel; + out_frame_info_t required_frame_param; + std::tie(file_path, decoder_id, accel, required_frame_param) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engine + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + + // create VPP preproc engine + VPPPreprocEngine preproc_engine(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + + // launch threads + SafeQueue queue; + size_t decoded_number = 1; + size_t preproc_number = 0; + + std::thread decode_thread([&decode_engine, sess_ptr, + &queue, &decoded_number] () { + // decode first frame + { + cv::MediaFrame decoded_frame; + ASSERT_NO_THROW(decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); + queue.push(std::move(decoded_frame)); + } + + // launch pipeline + try { + while(true) { + queue.push(extract_decoded_frame(sess_ptr->session, decode_engine)); + decoded_number++; + } + } catch (...) {} + + // send stop + queue.push_stop(); + }); + + std::thread preproc_thread([&preproc_engine, &queue, &preproc_number, required_frame_param] () { + // create preproc session based on frame description & network info + cv::MediaFrame first_decoded_frame = queue.pop(); + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = + preproc_engine.initialize_preproc(first_pp_params.value(), required_frame_param); + + // make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, first_decoded_frame); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // launch pipeline + bool in_progress = false; + // let's allow counting of preprocessed frames to check this value later: + // Currently, it looks redundant to implement any kind of gracefull shutdown logic + // in this test - so let's apply agreement that media source is processed + // succesfully when preproc_number != 1 in result + preproc_number = 1; + try { + while(true) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + break; + } + in_progress = true; + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + ASSERT_TRUE(0 == memcmp(¶ms.value(), &first_pp_params.value(), sizeof(pp_params))); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + ASSERT_EQ(pp_sess.get().get(), + first_pp_sess.get().get()); + + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + preproc_number++; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(preproc_number, 1); + }); + + decode_thread.join(); + preproc_thread.join(); + ASSERT_EQ(preproc_number, decoded_number); +} + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocEngine, VPPPreprocParams, + testing::ValuesIn(files)); + +using VPPInnerPreprocParams = VPPPreprocParams; +TEST_P(VPPInnerPreprocParams, functional_inner_preproc_size) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel; + out_frame_info_t required_frame_param; + std::tie(file_path, decoder_id, accel, required_frame_param) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11_vpp; + + // create accel policy + cfg_params_w_dx11_vpp.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11_vpp))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + // create decode session + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // fill vpp params beforehand: resolution + cfg_params_w_dx11_vpp.push_back(CfgParam::create_vpp_out_width( + static_cast(required_frame_param.size.width))); + cfg_params_w_dx11_vpp.push_back(CfgParam::create_vpp_out_height( + static_cast(required_frame_param.size.height))); + + // create transcode engine + auto device_selector = accel_policy->get_device_selector(); + VPLLegacyTranscodeEngine engine(std::move(accel_policy)); + auto sess_ptr = engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11_vpp, + data_provider); + // make test in loop + bool in_progress = false; + size_t frames_processed_count = 1; + try { + while(true) { + cv::MediaFrame decoded_frame = extract_decoded_frame(sess_ptr->session, engine); + in_progress = true; + ASSERT_EQ(decoded_frame.desc().size.width, + ALIGN16(required_frame_param.size.width)); + ASSERT_EQ(decoded_frame.desc().size.height, + ALIGN16(required_frame_param.size.height)); + ASSERT_EQ(decoded_frame.desc().fmt, required_frame_param.fmt); + frames_processed_count++; + in_progress = false; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(frames_processed_count, 1); +} + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocInner, VPPInnerPreprocParams, + testing::ValuesIn(files)); +#endif // HAVE_DIRECTX +#endif // HAVE_D3D11 +} // namespace opencv_test +#endif // HAVE_ONEVPL From 119d8b3acadf8e12047db365da3770fc8a753ab2 Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Fri, 25 Feb 2022 01:17:43 +0300 Subject: [PATCH 04/84] Merge pull request #21553 from VadimLevin:dev/vlevin/scope-for-classes-4x-port 4.x: submodule or a class scope for exported classes * feature: submodule or a class scope for exported classes All classes are registered in the scope that corresponds to C++ namespace or exported class. Example: `cv::ml::Boost` is exported as `cv.ml.Boost` `cv::SimpleBlobDetector::Params` is exported as `cv.SimpleBlobDetector.Params` For backward compatibility all classes are registered in the global module with their mangling name containing scope information. Example: `cv::ml::Boost` has `cv.ml_Boost` alias to `cv.ml.Boost` type * refactor: remove redundant GAPI aliases * fix: use explicit string literals in CVPY_TYPE macro * fix: add handling for class aliases --- .../include/opencv2/core/bindings_utils.hpp | 47 +++++ .../gapi/misc/python/package/gapi/__init__.py | 11 - modules/python/src2/cv2.cpp | 191 +++++++++++++++++- modules/python/src2/gen2.py | 112 +++++++--- modules/python/src2/pycompat.hpp | 144 +++++++------ modules/python/test/test_misc.py | 68 +++++++ 6 files changed, 466 insertions(+), 107 deletions(-) diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index 7a50390aed..4f7eb532b9 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -223,6 +223,53 @@ namespace nested { CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { return flag; } + +class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_PROP_RW int int_value; + CV_PROP_RW float float_value; + + CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f) + { + int_value = int_param; + float_value = float_param; + } + }; + + explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + params_ = params; + } + + CV_WRAP int getIntParam() const + { + return params_.int_value; + } + + CV_WRAP float getFloatParam() const + { + return params_.float_value; + } + + CV_WRAP static std::string originalName() + { + return "OriginalClassName"; + } + + CV_WRAP static Ptr + create(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + return makePtr(params); + } + +private: + OriginalClassName::Params params_; +}; + +typedef OriginalClassName::Params OriginalClassName_Params; } // namespace nested namespace fs { diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index 6323582f5b..a2983e30ff 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -287,15 +287,4 @@ def kernel(op_cls): return kernel_with_params -# FIXME: On the c++ side every class is placed in cv2 module. -cv.gapi.wip.draw.Rect = cv.gapi_wip_draw_Rect -cv.gapi.wip.draw.Text = cv.gapi_wip_draw_Text -cv.gapi.wip.draw.Circle = cv.gapi_wip_draw_Circle -cv.gapi.wip.draw.Line = cv.gapi_wip_draw_Line -cv.gapi.wip.draw.Mosaic = cv.gapi_wip_draw_Mosaic -cv.gapi.wip.draw.Image = cv.gapi_wip_draw_Image -cv.gapi.wip.draw.Poly = cv.gapi_wip_draw_Poly - -cv.gapi.streaming.queue_capacity = cv.gapi_streaming_queue_capacity - cv.gapi.wip.GStreamerPipeline = cv.gapi_wip_gst_GStreamerPipeline diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 294905c783..5d952412f3 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -79,9 +79,9 @@ static int convert_to_char(PyObject *o, char *dst, const ArgInfo& info) #include "pyopencv_generated_enums.h" #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #else -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #endif #include "pyopencv_generated_types.h" #undef CVPY_TYPE @@ -281,6 +281,189 @@ static bool init_submodule(PyObject * root, const char * name, PyMethodDef * met return true; } +static inline +bool registerTypeInModuleScope(PyObject* module, const char* type_name, PyObject* type_obj) +{ + if (PyModule_AddObject(module, type_name, type_obj) < 0) + { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in module scope '%s'", + type_name, PyModule_GetName(module) + ); + Py_DECREF(type_obj); + return false; + } + return true; +} + +static inline +bool registerTypeInClassScope(PyObject* cls, const char* type_name, PyObject* type_obj) +{ + if (!PyType_CheckExact(cls)) { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in class scope. " + "Scope class object has a wrong type", type_name + ); + return false; + } + if (PyObject_SetAttrString(cls, type_name, type_obj) < 0) + { + #ifndef Py_LIMITED_API + PyObject* cls_dict = reinterpret_cast(cls)->tp_dict; + if (PyDict_SetItemString(cls_dict, type_name, type_obj) >= 0) { + /// Clearing the error set by PyObject_SetAttrString: + /// TypeError: can't set attributes of built-in/extension type NAME + PyErr_Clear(); + return true; + } + #endif + const std::string cls_name = getPyObjectNameAttr(cls); + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in '%s' class scope. Can't update scope dictionary", + type_name, cls_name.c_str() + ); + return false; + } + return true; +} + +static inline +PyObject* getScopeFromTypeObject(PyObject* obj, const std::string& scope_name) +{ + if (!PyType_CheckExact(obj)) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Scope class object has a wrong type", type_name.c_str() + ); + } + /// When using LIMITED API all classes are registered in the heap +#if defined(Py_LIMITED_API) + return PyObject_GetAttrString(obj, scope_name.c_str()); +#else + /// Otherwise classes may be registed on the stack or heap + PyObject* type_dict = reinterpret_cast(obj)->tp_dict; + if (!type_dict) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Type dictionary is not available", type_name.c_str() + ); + } + return PyDict_GetItemString(type_dict, scope_name.c_str()); +#endif // Py_LIMITED_API +} + +static inline +PyObject* findTypeScope(PyObject* root_module, const std::string& scope_name) +{ + PyObject* scope = root_module; + if (scope_name.empty()) + { + return scope; + } + /// Starting with 1 to omit leading dot in the scope name + size_t name_end = scope_name.find('.', 1); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + for (size_t name_start = 1; name_start < scope_name.size() && scope; ) + { + const std::string current_scope_name = scope_name.substr(name_start, + name_end - name_start); + + if (PyModule_CheckExact(scope)) + { + PyObject* scope_dict = PyModule_GetDict(scope); + if (!scope_dict) + { + return PyErr_Format(PyExc_ImportError, + "Scope '%s' dictionary is not available during the search for " + " the '%s' scope object", current_scope_name.c_str(), + scope_name.c_str() + ); + } + + scope = PyDict_GetItemString(scope_dict, current_scope_name.c_str()); + } + else if (PyType_CheckExact(scope)) + { + scope = getScopeFromTypeObject(scope, current_scope_name); + } + else + { + return PyErr_Format(PyExc_ImportError, + "Can't find scope '%s'. '%s' doesn't reference a module or a class", + scope_name.c_str(), current_scope_name.c_str() + ); + } + + + name_start = name_end + 1; + name_end = scope_name.find('.', name_start); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + } + if (!scope) + { + return PyErr_Format(PyExc_ImportError, + "Module or class with name '%s' can't be found in '%s' module", + scope_name.c_str(), PyModule_GetName(root_module) + ); + } + return scope; +} + +static bool registerNewType(PyObject* root_module, const char* type_name, + PyObject* type_obj, const std::string& scope_name) +{ + PyObject* scope = findTypeScope(root_module, scope_name); + + /// If scope can't be found it means that there is an error during + /// bindings generation + if (!scope) { + return false; + } + + if (PyModule_CheckExact(scope)) + { + if (!registerTypeInModuleScope(scope, type_name, type_obj)) + { + return false; + } + } + else + { + /// In Python 2 it is disallowed to register an inner classes + /// via modifing dictionary of the built-in type. + if (!registerTypeInClassScope(scope, type_name, type_obj)) + { + return false; + } + } + + /// Expose all classes that are defined in the submodules as aliases in the + /// root module for backward compatibility + /// If submodule and root module are same than no aliases registration are + /// required + if (scope != root_module) + { + std::string type_name_str(type_name); + + std::string alias_name; + alias_name.reserve(scope_name.size() + type_name_str.size()); + std::replace_copy(scope_name.begin() + 1, scope_name.end(), std::back_inserter(alias_name), '.', '_'); + alias_name += '_'; + alias_name += type_name_str; + + return registerTypeInModuleScope(root_module, alias_name.c_str(), type_obj); + } + return true; +} + #include "pyopencv_generated_modules_content.h" static bool init_body(PyObject * m) @@ -294,10 +477,10 @@ static bool init_body(PyObject * m) #undef CVPY_MODULE #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyObject * pyopencv_NoBase_TypePtr = NULL; #else -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_STATIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyTypeObject * pyopencv_NoBase_TypePtr = NULL; #endif #include "pyopencv_generated_types.h" diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index fbdf5677c4..10161e2ea0 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -245,10 +245,20 @@ class ClassProp(object): self.readonly = False class ClassInfo(object): - def __init__(self, name, decl=None): + def __init__(self, name, decl=None, codegen=None): + # Scope name can be a module or other class e.g. cv::SimpleBlobDetector::Params + scope_name, self.original_name = name.rsplit(".", 1) + + # In case scope refer the outer class exported with different name + if codegen: + scope_name = codegen.get_export_scope_name(scope_name) + self.scope_name = re.sub(r"^cv\.?", "", scope_name) + + self.export_name = self.original_name + + self.class_id = normalize_class_name(name) + self.cname = name.replace(".", "::") - self.name = self.wname = normalize_class_name(name) - self.sname = name[name.rfind('.') + 1:] self.ismap = False self.issimple = False self.isalgorithm = False @@ -258,12 +268,11 @@ class ClassInfo(object): self.consts = {} self.base = None self.constructor = None - customname = False if decl: bases = decl[1].split()[1:] if len(bases) > 1: - print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,)) + print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.cname,)) print(" Bases: ", " ".join(bases)) print(" Only the first base class will be used") #return sys.exit(-1) @@ -277,21 +286,43 @@ class ClassInfo(object): for m in decl[2]: if m.startswith("="): - wname = m[1:] - npos = name.rfind('.') - if npos >= 0: - self.wname = normalize_class_name(name[:npos] + '.' + wname) - else: - self.wname = wname - customname = True + # Aliasing only affects the exported class name, not class identifier + self.export_name = m[1:] elif m == "/Map": self.ismap = True elif m == "/Simple": self.issimple = True self.props = [ClassProp(p) for p in decl[3]] - if not customname and self.wname.startswith("Cv"): - self.wname = self.wname[2:] + if not self.has_export_alias and self.original_name.startswith("Cv"): + self.export_name = self.export_name[2:] + + @property + def wname(self): + if len(self.scope_name) > 0: + return self.scope_name.replace(".", "_") + "_" + self.export_name + + return self.export_name + + @property + def name(self): + return self.class_id + + @property + def full_scope_name(self): + return "cv." + self.scope_name if len(self.scope_name) else "cv" + + @property + def full_export_name(self): + return self.full_scope_name + "." + self.export_name + + @property + def full_original_name(self): + return self.full_scope_name + "." + self.original_name + + @property + def has_export_alias(self): + return self.export_name != self.original_name def gen_map_code(self, codegen): all_classes = codegen.classes @@ -345,9 +376,11 @@ class ClassInfo(object): methods_code.write(m.gen_code(codegen)) methods_inits.write(m.get_tab_entry()) - code = gen_template_type_impl.substitute(name=self.name, wname=self.wname, cname=self.cname, - getset_code=getset_code.getvalue(), getset_inits=getset_inits.getvalue(), - methods_code=methods_code.getvalue(), methods_inits=methods_inits.getvalue()) + code = gen_template_type_impl.substitute(name=self.name, + getset_code=getset_code.getvalue(), + getset_inits=getset_inits.getvalue(), + methods_code=methods_code.getvalue(), + methods_inits=methods_inits.getvalue()) return code @@ -361,13 +394,15 @@ class ClassInfo(object): if self.constructor is not None: constructor_name = self.constructor.get_wrapper_name() - return "CVPY_TYPE({}, {}, {}, {}, {}, {});\n".format( - self.wname, - self.name, + return 'CVPY_TYPE({}, {}, {}, {}, {}, {}, "{}");\n'.format( + self.export_name, + self.class_id, self.cname if self.issimple else "Ptr<{}>".format(self.cname), - self.sname if self.issimple else "Ptr", + self.original_name if self.issimple else "Ptr", baseptr, - constructor_name + constructor_name, + # Leading dot is required to provide correct class naming + "." + self.scope_name if len(self.scope_name) > 0 else self.scope_name ) @@ -823,12 +858,12 @@ class FuncInfo(object): classinfo = all_classes[self.classname] #if dump: pprint(vars(classinfo)) if self.isconstructor: - py_name = 'cv.' + classinfo.wname - elif self.is_static: - py_name = '.'.join([self.namespace, classinfo.sname + '_' + self.variants[0].wname]) + py_name = classinfo.full_export_name else: + py_name = classinfo.full_export_name + "." + self.variants[0].wname + + if not self.is_static: cname = classinfo.cname + '::' + cname - py_name = 'cv.' + classinfo.wname + '.' + self.variants[0].wname else: py_name = '.'.join([self.namespace, self.variants[0].wname]) #if dump: print(cname + " => " + py_name) @@ -870,7 +905,7 @@ class PythonWrapperGenerator(object): self.class_idx = 0 def add_class(self, stype, name, decl): - classinfo = ClassInfo(name, decl) + classinfo = ClassInfo(name, decl, self) classinfo.decl_idx = self.class_idx self.class_idx += 1 @@ -880,16 +915,30 @@ class PythonWrapperGenerator(object): sys.exit(-1) self.classes[classinfo.name] = classinfo - # Add Class to json file. - namespace, classes, name = self.split_decl_name(name) + namespace, _, _ = self.split_decl_name(name) namespace = '.'.join(namespace) - name = '_'.join(classes+[name]) + # Registering a namespace if it is not already handled or + # doesn't have anything except classes defined in it + self.namespaces.setdefault(namespace, Namespace()) - py_name = 'cv.' + classinfo.wname # use wrapper name + # Add Class to json file. + py_name = classinfo.full_export_name # use wrapper name py_signatures = self.py_signatures.setdefault(classinfo.cname, []) py_signatures.append(dict(name=py_name)) #print('class: ' + classinfo.cname + " => " + py_name) + def get_export_scope_name(self, original_scope_name): + # Outer classes should be registered before their content - inner classes in this case + class_scope = self.classes.get(normalize_class_name(original_scope_name), None) + + if class_scope: + return class_scope.full_export_name + + # Otherwise it is a namespace. + # If something is messed up at this point - it will be revelead during + # library import + return original_scope_name + def split_decl_name(self, name): chunks = name.split('.') namespace = chunks[:-1] @@ -979,6 +1028,7 @@ class PythonWrapperGenerator(object): w_classes.append(w_classname) g_wname = "_".join(w_classes+[name]) func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs + # Exports static function with internal name (backward compatibility) func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace_str, False)) func.add_variant(decl, isphantom) if g_wname != g_name: # TODO OpenCV 5.0 diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp index 8b44726d5f..18336d4295 100644 --- a/modules/python/src2/pycompat.hpp +++ b/modules/python/src2/pycompat.hpp @@ -62,6 +62,10 @@ #endif // PY_MAJOR >=3 +#ifndef PyType_CheckExact +#define PyType_CheckExact(obj) (Py_TYPE(op) == &PyType_Type) +#endif // !PyType_CheckExact + static inline bool getUnicodeString(PyObject * obj, std::string &str) { bool res = false; @@ -93,6 +97,26 @@ static inline bool getUnicodeString(PyObject * obj, std::string &str) return res; } +static inline +std::string getPyObjectNameAttr(PyObject* obj) +{ + std::string obj_name; + PyObject* cls_name_obj = PyObject_GetAttrString(obj, "__name__"); + if (cls_name_obj && !getUnicodeString(cls_name_obj, obj_name)) { + obj_name.clear(); + } + #ifndef Py_LIMITED_API + if (PyType_CheckExact(obj) && obj_name.empty()) + { + obj_name = reinterpret_cast(obj)->tp_name; + } + #endif + if (obj_name.empty()) { + obj_name = ""; + } + return obj_name; +} + //================================================================================================== #define CV_PY_FN_WITH_KW_(fn, flags) (PyCFunction)(void*)(PyCFunctionWithKeywords)(fn), (flags) | METH_VARARGS | METH_KEYWORDS @@ -174,107 +198,106 @@ PyObject* pyopencv_from(const TYPE& src) #endif -#define CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyTypeObject pyopencv_##NAME##_TypeXXX = \ + static PyTypeObject pyopencv_##CLASS_ID##_TypeXXX = \ { \ CVPY_TYPE_HEAD \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ }; \ - static PyTypeObject * pyopencv_##NAME##_TypePtr = &pyopencv_##NAME##_TypeXXX; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyTypeObject * pyopencv_##CLASS_ID##_TypePtr = &pyopencv_##CLASS_ID##_TypeXXX; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_NEW(pyopencv_##NAME##_t, pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_NEW(pyopencv_##CLASS_ID##_t, pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } -#define CVPY_TYPE_INIT_STATIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ - pyopencv_##NAME##_TypePtr->tp_dealloc = pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_TypePtr->tp_repr = pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_TypePtr->tp_getset = pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ - pyopencv_##NAME##_TypePtr->tp_methods = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_TypePtr->tp_alloc = PyType_GenericAlloc; \ - pyopencv_##NAME##_TypePtr->tp_new = PyType_GenericNew; \ - pyopencv_##NAME##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ - if (PyType_Ready(pyopencv_##NAME##_TypePtr) != 0) \ + pyopencv_##CLASS_ID##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_dealloc = pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_repr = pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_getset = pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_TypePtr->tp_methods = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_TypePtr->tp_alloc = PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_new = PyType_GenericNew; \ + pyopencv_##CLASS_ID##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ + if (PyType_Ready(pyopencv_##CLASS_ID##_TypePtr) != 0) \ { \ ERROR_HANDLER; \ } \ - CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + CVPY_TYPE_INCREF(pyopencv_##CLASS_ID##_TypePtr); \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } //================================================================================================== -#define CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyObject * pyopencv_##NAME##_TypePtr = 0; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyObject * pyopencv_##CLASS_ID##_TypePtr = 0; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_New(pyopencv_##NAME##_t, (PyTypeObject*)pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_New(pyopencv_##CLASS_ID##_t, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } \ - static PyType_Slot pyopencv_##NAME##_Slots[] = \ + static PyType_Slot pyopencv_##CLASS_ID##_Slots[] = \ { \ {Py_tp_dealloc, 0}, \ {Py_tp_repr, 0}, \ @@ -285,37 +308,36 @@ PyObject* pyopencv_from(const TYPE& src) {Py_tp_new, 0}, \ {0, 0} \ }; \ - static PyType_Spec pyopencv_##NAME##_Spec = \ + static PyType_Spec pyopencv_##CLASS_ID##_Spec = \ { \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ 0, \ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \ - pyopencv_##NAME##_Slots \ + pyopencv_##CLASS_ID##_Slots \ }; -#define CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ - pyopencv_##NAME##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ - pyopencv_##NAME##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ + pyopencv_##CLASS_ID##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ PyObject * bases = 0; \ if (pyopencv_##BASE##_TypePtr) \ bases = PyTuple_Pack(1, pyopencv_##BASE##_TypePtr); \ - pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \ - if (!pyopencv_##NAME##_TypePtr) \ + pyopencv_##CLASS_ID##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##CLASS_ID##_Spec, bases); \ + if (!pyopencv_##CLASS_ID##_TypePtr) \ { \ - printf("Failed to create type from spec: " #WNAME ", base (" #BASE ")\n"); \ + printf("Failed to create type from spec: " #CLASS_ID ", base (" #BASE ")\n"); \ ERROR_HANDLER; \ } \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 48657d595c..9e83cd6856 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -633,6 +633,74 @@ class Arguments(NewOpenCVTests): self.assertEqual(flag, cv.utils.nested.testEchoBooleanFunction(flag), msg="Function in nested module returns wrong result") + def test_class_from_submodule_has_global_alias(self): + self.assertTrue(hasattr(cv.ml, "Boost"), + msg="Class is not registered in the submodule") + self.assertTrue(hasattr(cv, "ml_Boost"), + msg="Class from submodule doesn't have alias in the " + "global module") + self.assertEqual(cv.ml.Boost, cv.ml_Boost, + msg="Classes from submodules and global module don't refer " + "to the same type") + + def test_inner_class_has_global_alias(self): + self.assertTrue(hasattr(cv.SimpleBlobDetector, "Params"), + msg="Class is not registered as inner class") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + self.assertEqual(cv.SimpleBlobDetector.Params, cv.SimpleBlobDetector_Params, + msg="Inner class and class in global module don't refer " + "to the same type") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + + def test_export_class_with_different_name(self): + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName"), + msg="Class with export alias is not registered in the submodule") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName"), + msg="Class with export alias doesn't have alias in the " + "global module") + self.assertEqual(cv.utils.nested.ExportClassName.originalName(), "OriginalClassName") + + instance = cv.utils.nested.ExportClassName.create() + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName_create"), + msg="Factory function should have alias in the same module as the class") + # self.assertFalse(hasattr(cv.utils.nested, "OriginalClassName_create"), + # msg="Factory function should not be registered with original class name, "\ + # "when class has different export name") + + def test_export_inner_class_of_class_exported_with_different_name(self): + if not hasattr(cv.utils.nested, "ExportClassName"): + raise unittest.SkipTest("Outer class with export alias is not registered in the submodule") + + self.assertTrue(hasattr(cv.utils.nested.ExportClassName, "Params"), + msg="Inner class with export alias is not registered in " + "the outer class") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName_Params"), + msg="Inner class with export alias is not registered in " + "global module") + params = cv.utils.nested.ExportClassName.Params() + params.int_value = 45 + params.float_value = 4.5 + + instance = cv.utils.nested.ExportClassName.create(params) + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertEqual( + params.int_value, instance.getIntParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.int_value, instance.getIntParam() + )) + self.assertEqual( + params.float_value, instance.getFloatParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.float_value, instance.getFloatParam() + )) + + + class CanUsePurePythonModuleFunction(NewOpenCVTests): def test_can_get_ocv_version(self): From d354ad1c34c3b83427ca67bedc89b6c7b8784ce7 Mon Sep 17 00:00:00 2001 From: Tatsuro Shibamura Date: Sun, 27 Feb 2022 02:35:03 +0900 Subject: [PATCH 05/84] Merge pull request #21630 from shibayan:arm64-msvc-neon * Added NEON support in builds for Windows on ARM * Fixed `HAVE_CPU_NEON_SUPPORT` display broken during compiler test * Fixed a build error prior to Visual Studio 2022 --- cmake/OpenCVCompilerOptions.cmake | 4 +++ cmake/checks/cpu_neon.cpp | 1 + .../include/opencv2/core/hal/intrin_neon.hpp | 30 ++++++++++--------- modules/core/src/system.cpp | 3 ++ 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index bcb8a3e203..4f5c353980 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -314,6 +314,10 @@ if(MSVC) set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS") set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS") endif() + + if(AARCH64 AND NOT MSVC_VERSION LESS 1930) + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES") + endif() endif() if(PROJECT_NAME STREQUAL "OpenCV") diff --git a/cmake/checks/cpu_neon.cpp b/cmake/checks/cpu_neon.cpp index c309e85049..bb103ec366 100644 --- a/cmake/checks/cpu_neon.cpp +++ b/cmake/checks/cpu_neon.cpp @@ -1,6 +1,7 @@ #include #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) +# define _ARM64_DISTINCT_NEON_TYPES # include # include # define CV_NEON 1 diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index e17972a3fc..28cf813379 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { +#if CV_NEON_AARCH64 + int32x4_t c = vmull_high_s16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_int16x8(vcombine_s16( vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), - vshrn_n_s32( -#if CV_NEON_AARCH64 - vmull_high_s16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_s32(c, 16) )); } inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { +#if CV_NEON_AARCH64 + uint32x4_t c = vmull_high_u16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_uint16x8(vcombine_u16( vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), - vshrn_n_u32( -#if CV_NEON_AARCH64 - vmull_high_u16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_u32(c, 16) )); } @@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a) { float32x4_t a_ = a.val; int32x4_t result; +#if defined _MSC_VER + result = vcvtnq_s32_f32(a_); +#else __asm__ ("fcvtns %0.4s, %1.4s" : "=w"(result) : "w"(a_) : /* No clobbers */); +#endif return v_int32x4(result); } #else diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index ebafee59e0..d2231fe952 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -615,6 +615,9 @@ struct HWFeatures #if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800) have[CV_CPU_NEON] = true; #endif + #if defined _M_ARM64 + have[CV_CPU_NEON] = true; + #endif #ifdef __riscv_vector have[CV_CPU_RISCVV] = true; #endif From 375fe81311171349f4d1572286eabdeb126b69b0 Mon Sep 17 00:00:00 2001 From: Egor Smirnov Date: Thu, 17 Feb 2022 20:30:44 +0300 Subject: [PATCH 06/84] fix slice and expand --- modules/dnn/src/darknet/darknet_io.cpp | 2 +- modules/dnn/src/layers/slice_layer.cpp | 52 +++++++++++++++------- modules/dnn/src/onnx/onnx_importer.cpp | 23 +++++----- modules/dnn/src/tensorflow/tf_importer.cpp | 13 +++--- modules/dnn/src/torch/torch_importer.cpp | 2 +- modules/dnn/test/test_layers.cpp | 2 +- 6 files changed, 58 insertions(+), 36 deletions(-) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 99715df829..11aad453e3 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -376,7 +376,7 @@ namespace cv { int begin[] = {0, split_size * group_id, 0, 0}; cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin, 4); - int end[] = {-1, begin[1] + split_size, -1, -1}; + int end[] = {INT_MAX, begin[1] + split_size, INT_MAX, INT_MAX}; cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end, 4); darknet::LayerParameter lp; diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index a470772813..71de70e93f 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -58,12 +58,32 @@ namespace cv namespace dnn { -void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector >& sliceRanges) +Range normalizeRange(const Range& input_range, int n) { + Range range = input_range; + + range.start = std::min(std::max(range.start, -n), n - 1); + if (range.start < 0) + { + range.start += n; + } + + range.end = std::min(std::max(range.end, -n), n); + if (range.end < 0) + { + range.end += n; + } + + return range; +} + +std::vector > finalizeSliceRange(const MatShape& inpShape, int& axis, + const std::vector >& inputSliceRanges) +{ + std::vector > sliceRanges = inputSliceRanges; CV_Assert(inpShape.size() > 0); bool axisNeg = (axis < 0); axis = (axis + static_cast(inpShape.size())) % inpShape.size(); - int n = inpShape[axis]; for (size_t i = 0; i < sliceRanges.size(); ++i){ std::vector& ranges = sliceRanges[i]; @@ -71,16 +91,20 @@ void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector= 0) + for (size_t j = 0; j < ranges.size(); ++j) { - continue; - } + int n = inpShape[j]; + if (n <= 0) + { + continue; + } - CV_Assert(n != 0); - range.start = (n + range.start) % n; + ranges[j] = normalizeRange(ranges[j], n); + } } + + return sliceRanges; } class SliceLayerImpl : public SliceLayer @@ -130,7 +154,7 @@ public: { int size = sizeOrEnd; CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size). - sliceRanges[0][i].end = size > 0 ? (start + size) : -1; // We'll finalize a negative value later. + sliceRanges[0][i].end = size > 0 ? (start + size) : INT_MAX; // We'll finalize a negative value later. } else { @@ -181,8 +205,7 @@ public: MatShape inpShape = inputs[0]; int axis_rw = axis; - std::vector > sliceRanges_rw = sliceRanges; - sliceRangesFromShape(inpShape, axis_rw, sliceRanges_rw); + std::vector > sliceRanges_rw = finalizeSliceRange(inpShape, axis_rw, sliceRanges); if (!sliceRanges_rw.empty()) { @@ -193,7 +216,7 @@ public: for (int j = 0; j < sliceRanges_rw[i].size(); ++j) { if (shapesInitialized || inpShape[j] > 0) - outputs[i][j] = normalize_axis_range(sliceRanges_rw[i][j], inpShape[j]).size(); + outputs[i][j] = normalizeRange(sliceRanges_rw[i][j], inpShape[j]).size(); if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1)) outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j]; @@ -230,8 +253,7 @@ public: CV_Assert(inputs.size() == 1); const MatSize& inpShape = inputs[0].size; - sliceRangesFromShape(shape(inputs[0]), axis, sliceRanges); - finalSliceRanges = sliceRanges; + finalSliceRanges = finalizeSliceRange(shape(inputs[0]), axis, sliceRanges); if (sliceRanges.empty()) { @@ -261,7 +283,7 @@ public: // Clamp. for (int j = 0; j < finalSliceRanges[i].size(); ++j) { - finalSliceRanges[i][j] = normalize_axis_range(finalSliceRanges[i][j], inpShape[j]); + finalSliceRanges[i][j] = normalizeRange(finalSliceRanges[i][j], inpShape[j]); } } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 736f3a27de..e753fbb103 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1013,13 +1013,12 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP if (axis > 0) { begin.resize(axis, 0); - end.resize(axis, -1); + end.resize(axis, INT_MAX); } for (int i = 0; i < starts.size(); ++i) { begin.push_back(starts.get(i)); - int finish = ends.get(i); - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + end.push_back(ends.get(i)); } } else { // inp_size > 1 CV_Assert(inp_size >= 3); @@ -1043,14 +1042,10 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP const int* ends = end_blob.ptr(); if (axis > 0) { begin.resize(axis, 0); - end.resize(axis, -1); + end.resize(axis, INT_MAX); } std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); - for (int i = 0; i < end_blob.total(); ++i) - { - int finish = ends[i]; - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim - } + std::copy(ends, ends + end_blob.total(), std::back_inserter(end)); if (inp_size == 5) { CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); @@ -2133,9 +2128,15 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node if (!haveVariables) { - if (broadcast_axes.size() != 1) + if (broadcast_axes.size() > 1) CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); + if (broadcast_axes.empty()) + { + addConstant(output_name, getBlob(node_proto, 0)); + return; + } + Mat input = getBlob(node_proto, 0); input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); @@ -2354,7 +2355,7 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node sliceLp.type = "Slice"; sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; std::vector begin(inpShape.size(), 0); - std::vector end(inpShape.size(), -1); + std::vector end(inpShape.size(), INT_MAX); begin[axis] = index; end[axis] = index + 1; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 8cbe1c4b23..f9e129622c 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1490,10 +1490,8 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow:: int end_mask = getLayerAttr(layer, "end_mask").i(); for (int i = 0; i < num; ++i) { - if (ends.at(i) < 0) - ends.at(i) -= 1; if (end_mask & (1 << i)) - ends.at(i) = -1; + ends.at(i) = INT_MAX; if (strides.at(i) != 1) CV_Error(Error::StsNotImplemented, format("StridedSlice with stride %d", strides.at(i))); @@ -1791,15 +1789,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso int64_t pads[8]; bool explicit_pads = getExplicitPadding(layerParams, layer, pads); int64_t begs[4] = {}; - int64_t ends[4] = {-1, -1, -1, -1}; + int64_t ends[4] = {}; if (explicit_pads) { name += "/deconv"; layerParams.set("pad_mode", "VALID"); + ends[0] = ends[1] = INT_MAX; for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d] { begs[i] = pads[2*i]; - ends[i] = -1 - pads[2*i + 1]; + ends[i] = -pads[2*i + 1]; } } @@ -1819,8 +1818,8 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso const int strideX = layerParams.get("stride_w"); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); - const int outH = outShape.at(1 + shift) + begs[2] - 1 - ends[2]; - const int outW = outShape.at(2 + shift) + begs[3] - 1 - ends[3]; + const int outH = outShape.at(1 + shift) + begs[2] - ends[2]; + const int outW = outShape.at(2 + shift) + begs[3] - ends[3]; if (layerParams.get("pad_mode") == "SAME") { layerParams.set("adj_w", (outW - 1) % strideX); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 5dd9e3e290..e595e993ef 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -949,7 +949,7 @@ struct TorchImporter int size = scalarParams.get("size"); int begins[] = {0, 0, size, size}; - int ends[] = {-1, -1, -size - 1, -size - 1}; + int ends[] = {INT_MAX, INT_MAX, -size, -size}; newModule->apiType = "Slice"; layerParams.set("begin", DictValue::arrayInt(&begins[0], 4)); diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 2d4f78c88c..2b17e6fa24 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -2028,7 +2028,7 @@ TEST_P(Layer_Test_Slice, variable_input_shape) int targetId = get<1>(GetParam()); int begin[] = {0, 0, 0, 0}; - int end[] = {-1, -1, -1, -1}; + int end[] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX}; Net net; LayerParams lp; From a332509e02bdd00c1f8b8ffdb4ba47fa7aade053 Mon Sep 17 00:00:00 2001 From: Sinitsina Maria <49319156+SinM9@users.noreply.github.com> Date: Mon, 28 Feb 2022 18:23:00 +0300 Subject: [PATCH 07/84] Merge pull request #21458 from SinM9:speech_recognition_cpp AudioIO: add dnn speech recognition sample on C++ * add speech recognition cpp * fix warnings * fixes * fix warning * microphone fix --- samples/dnn/speech_recognition.cpp | 587 +++++++++++++++++++++++++++++ 1 file changed, 587 insertions(+) create mode 100644 samples/dnn/speech_recognition.cpp diff --git a/samples/dnn/speech_recognition.cpp b/samples/dnn/speech_recognition.cpp new file mode 100644 index 0000000000..7e9ee1f54d --- /dev/null +++ b/samples/dnn/speech_recognition.cpp @@ -0,0 +1,587 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace cv; +using namespace std; + +class FilterbankFeatures { + +// Initializes pre-processing class. Default values are the values used by the Jasper +// architecture for pre-processing. For more details, refer to the paper here: +// https://arxiv.org/abs/1904.03288 + +private: + int sample_rate = 16000; + double window_size = 0.02; + double window_stride = 0.01; + int win_length = static_cast(sample_rate * window_size); // Number of samples in window + int hop_length = static_cast(sample_rate * window_stride); // Number of steps to advance between frames + int n_fft = 512; // Size of window for STFT + + // Parameters for filterbanks calculation + int n_filt = 64; + double lowfreq = 0.; + double highfreq = sample_rate / 2; + +public: + // Mel filterbanks preperation + double hz_to_mel(double frequencies) + { + //Converts frequencies from hz to mel scale + // Fill in the linear scale + double f_min = 0.0; + double f_sp = 200.0 / 3; + double mels = (frequencies - f_min) / f_sp; + // Fill in the log-scale part + double min_log_hz = 1000.0; // beginning of log region (Hz) + double min_log_mel = (min_log_hz - f_min) / f_sp; // same (Mels) + double logstep = std::log(6.4) / 27.0; // step size for log region + + if (frequencies >= min_log_hz) + { + mels = min_log_mel + std::log(frequencies / min_log_hz) / logstep; + } + return mels; + } + + vector mel_to_hz(vector& mels) + { + // Converts frequencies from mel to hz scale + + // Fill in the linear scale + double f_min = 0.0; + double f_sp = 200.0 / 3; + vector freqs; + for (size_t i = 0; i < mels.size(); i++) + { + freqs.push_back(f_min + f_sp * mels[i]); + } + + // And now the nonlinear scale + double min_log_hz = 1000.0; // beginning of log region (Hz) + double min_log_mel = (min_log_hz - f_min) / f_sp; // same (Mels) + double logstep = std::log(6.4) / 27.0; // step size for log region + + for(size_t i = 0; i < mels.size(); i++) + { + if (mels[i] >= min_log_mel) + { + freqs[i] = min_log_hz * exp(logstep * (mels[i] - min_log_mel)); + } + } + return freqs; + } + + vector mel_frequencies(int n_mels, double fmin, double fmax) + { + // Calculates n mel frequencies between 2 frequencies + double min_mel = hz_to_mel(fmin); + double max_mel = hz_to_mel(fmax); + + vector mels; + double step = (max_mel - min_mel) / (n_mels - 1); + for(double i = min_mel; i < max_mel; i += step) + { + mels.push_back(i); + } + mels.push_back(max_mel); + + vector res = mel_to_hz(mels); + return res; + } + + vector> mel(int n_mels, double fmin, double fmax) + { + // Generates mel filterbank matrix + + double num = 1 + n_fft / 2; + vector> weights(n_mels, vector(static_cast(num), 0.)); + + // Center freqs of each FFT bin + vector fftfreqs; + double step = (sample_rate / 2) / (num - 1); + for(double i = 0; i <= sample_rate / 2; i += step) + { + fftfreqs.push_back(i); + } + // 'Center freqs' of mel bands - uniformly spaced between limits + vector mel_f = mel_frequencies(n_mels + 2, fmin, fmax); + + vector fdiff; + for(size_t i = 1; i < mel_f.size(); ++i) + { + fdiff.push_back(mel_f[i]- mel_f[i - 1]); + } + + vector> ramps(mel_f.size(), vector(fftfreqs.size())); + for (size_t i = 0; i < mel_f.size(); ++i) + { + for (size_t j = 0; j < fftfreqs.size(); ++j) + { + ramps[i][j] = mel_f[i] - fftfreqs[j]; + } + } + + double lower, upper, enorm; + for (int i = 0; i < n_mels; ++i) + { + // using Slaney-style mel which is scaled to be approx constant energy per channel + enorm = 2./(mel_f[i + 2] - mel_f[i]); + + for (int j = 0; j < static_cast(num); ++j) + { + // lower and upper slopes for all bins + lower = (-1) * ramps[i][j] / fdiff[i]; + upper = ramps[i + 2][j] / fdiff[i + 1]; + + weights[i][j] = max(0., min(lower, upper)) * enorm; + } + } + return weights; + } + + // STFT preperation + vector pad_window_center(vector&data, int size) + { + // Pad the window out to n_fft size + int n = static_cast(data.size()); + int lpad = static_cast((size - n) / 2); + vector pad_array; + + for(int i = 0; i < lpad; ++i) + { + pad_array.push_back(0.); + } + + for(size_t i = 0; i < data.size(); ++i) + { + pad_array.push_back(data[i]); + } + + for(int i = 0; i < lpad; ++i) + { + pad_array.push_back(0.); + } + return pad_array; + } + + vector> frame(vector& x) + { + // Slices a data array into overlapping frames. + int n_frames = static_cast(1 + (x.size() - n_fft) / hop_length); + vector> new_x(n_fft, vector(n_frames)); + + for (int i = 0; i < n_fft; ++i) + { + for (int j = 0; j < n_frames; ++j) + { + new_x[i][j] = x[i + j * hop_length]; + } + } + return new_x; + } + + vector hanning() + { + // https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows + vector window_tensor; + for (int j = 1 - win_length; j < win_length; j+=2) + { + window_tensor.push_back(1 - (0.5 * (1 - cos(CV_PI * j / (win_length - 1))))); + } + return window_tensor; + } + + vector> stft_power(vector& y) + { + // Short Time Fourier Transform. The STFT represents a signal in the time-frequency + // domain by computing discrete Fourier transforms (DFT) over short overlapping windows. + // https://en.wikipedia.org/wiki/Short-time_Fourier_transform + + // Pad the time series so that frames are centered + vector new_y; + int num = int(n_fft / 2); + + for (int i = 0; i < num; ++i) + { + new_y.push_back(y[num - i]); + } + for (size_t i = 0; i < y.size(); ++i) + { + new_y.push_back(y[i]); + } + for (size_t i = y.size() - 2; i >= y.size() - num - 1; --i) + { + new_y.push_back(y[i]); + } + + // Compute a window function + vector window_tensor = hanning(); + + // Pad the window out to n_fft size + vector fft_window = pad_window_center(window_tensor, n_fft); + + // Window the time series + vector> y_frames = frame(new_y); + + // Multiply on fft_window + for (size_t i = 0; i < y_frames.size(); ++i) + { + for (size_t j = 0; j < y_frames[0].size(); ++j) + { + y_frames[i][j] *= fft_window[i]; + } + } + + // Transpose frames for computing stft + vector> y_frames_transpose(y_frames[0].size(), vector(y_frames.size())); + for (size_t i = 0; i < y_frames[0].size(); ++i) + { + for (size_t j = 0; j < y_frames.size(); ++j) + { + y_frames_transpose[i][j] = y_frames[j][i]; + } + } + + // Short Time Fourier Transform + // and get power of spectrum + vector> spectrum_power(y_frames_transpose[0].size() / 2 + 1 ); + for (size_t i = 0; i < y_frames_transpose.size(); ++i) + { + Mat dstMat; + dft(y_frames_transpose[i], dstMat, DFT_COMPLEX_OUTPUT); + + // we need only the first part of the spectrum, the second part is symmetrical + for (int j = 0; j < static_cast(y_frames_transpose[0].size()) / 2 + 1; ++j) + { + double power_re = dstMat.at(2 * j) * dstMat.at(2 * j); + double power_im = dstMat.at(2 * j + 1) * dstMat.at(2 * j + 1); + spectrum_power[j].push_back(power_re + power_im); + } + } + return spectrum_power; + } + + Mat calculate_features(vector& x) + { + // Calculates filterbank features matrix. + + // Do preemphasis + std::default_random_engine generator; + std::normal_distribution normal_distr(0, 1); + double dither = 1e-5; + for(size_t i = 0; i < x.size(); ++i) + { + x[i] += dither * static_cast(normal_distr(generator)); + } + double preemph = 0.97; + for (size_t i = x.size() - 1; i > 0; --i) + { + x[i] -= preemph * x[i-1]; + } + + // Calculate Short Time Fourier Transform and get power of spectrum + auto spectrum_power = stft_power(x); + + vector> filterbanks = mel(n_filt, lowfreq, highfreq); + + // Calculate log of multiplication of filterbanks matrix on spectrum_power matrix + vector> x_stft(filterbanks.size(), vector(spectrum_power[0].size(), 0)); + + for (size_t i = 0; i < filterbanks.size(); ++i) + { + for (size_t j = 0; j < filterbanks[0].size(); ++j) + { + for (size_t k = 0; k < spectrum_power[0].size(); ++k) + { + x_stft[i][k] += filterbanks[i][j] * spectrum_power[j][k]; + } + } + for (size_t k = 0; k < spectrum_power[0].size(); ++k) + { + x_stft[i][k] = std::log(x_stft[i][k] + 1e-20); + } + } + + // normalize data + auto elments_num = x_stft[0].size(); + for(size_t i = 0; i < x_stft.size(); ++i) + { + double x_mean = std::accumulate(x_stft[i].begin(), x_stft[i].end(), 0.) / elments_num; // arithmetic mean + double x_std = 0; // standard deviation + for(size_t j = 0; j < elments_num; ++j) + { + double subtract = x_stft[i][j] - x_mean; + x_std += subtract * subtract; + } + x_std /= elments_num; + x_std = sqrt(x_std) + 1e-10; // make sure x_std is not zero + + for(size_t j = 0; j < elments_num; ++j) + { + x_stft[i][j] = (x_stft[i][j] - x_mean) / x_std; // standard score + } + } + + Mat calculate_features(static_cast(x_stft.size()), static_cast(x_stft[0].size()), CV_32F); + for(int i = 0; i < calculate_features.size[0]; ++i) + { + for(int j = 0; j < calculate_features.size[1]; ++j) + { + calculate_features.at(i, j) = static_cast(x_stft[i][j]); + } + } + return calculate_features; + } +}; + +class Decoder { + // Used for decoding the output of jasper model +private: + unordered_map labels_map = fillMap(); + int blank_id = 28; + +public: + unordered_map fillMap() + { + vector labels={' ','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p' + ,'q','r','s','t','u','v','w','x','y','z','\''}; + unordered_map map; + for(int i = 0; i < static_cast(labels.size()); ++i) + { + map[i] = labels[i]; + } + return map; + } + + string decode(Mat& x) + { + // Takes output of Jasper model and performs ctc decoding algorithm to + // remove duplicates and special symbol. Returns prediction + + vector prediction; + for(int i = 0; i < x.size[1]; ++i) + { + double maxEl = -1e10; + int ind = 0; + for(int j = 0; j < x.size[2]; ++j) + { + if (maxEl <= x.at(0, i, j)) + { + maxEl = x.at(0, i, j); + ind = j; + } + } + prediction.push_back(ind); + } + // CTC decoding procedure + vector decoded_prediction = {}; + int previous = blank_id; + + for(int i = 0; i < static_cast(prediction.size()); ++i) + { + if (( prediction[i] != previous || previous == blank_id) && prediction[i] != blank_id) + { + decoded_prediction.push_back(prediction[i]); + } + previous = prediction[i]; + } + + string hypotheses = {}; + for(size_t i = 0; i < decoded_prediction.size(); ++i) + { + auto it = labels_map.find(static_cast(decoded_prediction[i])); + if (it != labels_map.end()) + hypotheses.push_back(it->second); + } + return hypotheses; + } + +}; + +static string predict(Mat& features, dnn::Net net, Decoder decoder) +{ + // Passes the features through the Jasper model and decodes the output to english transcripts. + + // expand 2d features matrix to 3d + vector sizes = {1, static_cast(features.size[0]), + static_cast(features.size[1])}; + features = features.reshape(0, sizes); + + // make prediction + net.setInput(features); + Mat output = net.forward(); + + // decode output to transcript + auto prediction = decoder.decode(output); + return prediction; +} + +static int readAudioFile(vector& inputAudio, string file, int audioStream) +{ + VideoCapture cap; + int samplingRate = 16000; + vector params { CAP_PROP_AUDIO_STREAM, audioStream, + CAP_PROP_VIDEO_STREAM, -1, + CAP_PROP_AUDIO_DATA_DEPTH, CV_32F, + CAP_PROP_AUDIO_SAMPLES_PER_SECOND, samplingRate + }; + cap.open(file, CAP_ANY, params); + if (!cap.isOpened()) + { + cerr << "Error : Can't read audio file: '" << file << "' with audioStream = " << audioStream << endl; + return -1; + } + const int audioBaseIndex = (int)cap.get(CAP_PROP_AUDIO_BASE_INDEX); + vector frameVec; + Mat frame; + for (;;) + { + if (cap.grab()) + { + cap.retrieve(frame, audioBaseIndex); + frameVec = frame; + inputAudio.insert(inputAudio.end(), frameVec.begin(), frameVec.end()); + } + else + { + break; + } + } + return samplingRate; +} + +static int readAudioMicrophone(vector& inputAudio, int microTime) +{ + VideoCapture cap; + int samplingRate = 16000; + vector params { CAP_PROP_AUDIO_STREAM, 0, + CAP_PROP_VIDEO_STREAM, -1, + CAP_PROP_AUDIO_DATA_DEPTH, CV_32F, + CAP_PROP_AUDIO_SAMPLES_PER_SECOND, samplingRate + }; + cap.open(0, CAP_ANY, params); + if (!cap.isOpened()) + { + cerr << "Error: Can't open microphone" << endl; + return -1; + } + + const int audioBaseIndex = (int)cap.get(CAP_PROP_AUDIO_BASE_INDEX); + vector frameVec; + Mat frame; + if (microTime <= 0) + { + cerr << "Error: Duration of audio chunk must be > 0" << endl; + return -1; + } + size_t sizeOfData = static_cast(microTime * samplingRate); + while (inputAudio.size() < sizeOfData) + { + if (cap.grab()) + { + cap.retrieve(frame, audioBaseIndex); + frameVec = frame; + inputAudio.insert(inputAudio.end(), frameVec.begin(), frameVec.end()); + } + else + { + cerr << "Error: Grab error" << endl; + break; + } + } + return samplingRate; +} + +int main(int argc, char** argv) +{ + const String keys = + "{help h usage ? | | This script runs Jasper Speech recognition model }" + "{input_file i | | Path to input audio file. If not specified, microphone input will be used }" + "{audio_duration t | 15 | Duration of audio chunk to be captured from microphone }" + "{audio_stream a | 0 | CAP_PROP_AUDIO_STREAM value }" + "{show_spectrogram s | false | Show a spectrogram of the input audio: true / false / 1 / 0 }" + "{model m | jasper.onnx | Path to the onnx file of Jasper. You can download the converted onnx model " + "from https://drive.google.com/drive/folders/1wLtxyao4ItAg8tt4Sb63zt6qXzhcQoR6?usp=sharing}" + "{backend b | dnn::DNN_BACKEND_DEFAULT | Select a computation backend: " + "dnn::DNN_BACKEND_DEFAULT, " + "dnn::DNN_BACKEND_INFERENCE_ENGINE, " + "dnn::DNN_BACKEND_OPENCV }" + "{target t | dnn::DNN_TARGET_CPU | Select a target device: " + "dnn::DNN_TARGET_CPU, " + "dnn::DNN_TARGET_OPENCL, " + "dnn::DNN_TARGET_OPENCL_FP16 }" + ; + CommandLineParser parser(argc, argv, keys); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + // Load Network + dnn::Net net = dnn::readNetFromONNX(parser.get("model")); + net.setPreferableBackend(parser.get("backend")); + net.setPreferableTarget(parser.get("target")); + + // Get audio + vectorinputAudio = {}; + int samplingRate = 0; + if (parser.has("input_file")) + { + string audio = samples::findFile(parser.get("input_file")); + samplingRate = readAudioFile(inputAudio, audio, parser.get("audio_stream")); + } + else + { + samplingRate = readAudioMicrophone(inputAudio, parser.get("audio_duration")); + } + + if ((inputAudio.size() == 0) || samplingRate <= 0) + { + cerr << "Error: problems with audio reading, check input arguments" << endl; + return -1; + } + + if (inputAudio.size() / samplingRate < 6) + { + cout << "Warning: For predictable network performance duration of audio must exceed 6 sec." + " Audio will be extended with zero samples" << endl; + for(int i = static_cast(inputAudio.size()) - 1; i < samplingRate * 6; ++i) + { + inputAudio.push_back(0); + } + } + + // Calculate features + FilterbankFeatures filter; + auto calculated_features = filter.calculate_features(inputAudio); + + // Show spectogram if required + if (parser.get("show_spectrogram") == true) + { + Mat spectogram; + normalize(calculated_features, spectogram, 0, 255, NORM_MINMAX, CV_8U); + applyColorMap(spectogram, spectogram, COLORMAP_INFERNO); + imshow("spectogram", spectogram); + waitKey(0); + } + + Decoder decoder; + string prediction = predict(calculated_features, net, decoder); + for( auto &transcript: prediction) + { + cout << transcript; + } + + return 0; +} From ebb6915e588fcee1e6664cce670f0253bac0e67b Mon Sep 17 00:00:00 2001 From: Pierre Chatelier Date: Tue, 1 Mar 2022 17:55:00 +0100 Subject: [PATCH 08/84] Merge pull request #21645 from chacha21:applyColorMap_8UC1_optimized Optimize cv::applyColorMap() for simple case * Optimize cv::applyColorMap() for simple case PR for 21640 For regular cv::Mat CV_8UC1 src, applying the colormap is simpler than calling the cv::LUT() mechanism. * add support for src as CV_8UC3 src as CV_8UC3 is handled with a BGR2GRAY conversion, the same optimized code being used afterwards * code style rely on cv::Mat.ptr() to index data * Move new implementation to ColorMap::operator() Changes as suggested by reviewer * style improvements suggsted by reviewer * typo * tune parallel work * better usage of parallel_for_ use nstripes parameter of parallel_for_ assume _lut is continuous to bring faster pixel indexing optimize src/dst access by contiguous rows of pixels do not locally copy the LUT any more, it is no more relevant with the new optimizations --- modules/imgproc/src/colormap.cpp | 58 ++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/modules/imgproc/src/colormap.cpp b/modules/imgproc/src/colormap.cpp index 26371edad6..966e39eaab 100644 --- a/modules/imgproc/src/colormap.cpp +++ b/modules/imgproc/src/colormap.cpp @@ -734,12 +734,57 @@ namespace colormap Mat src = _src.getMat(); if(src.type() != CV_8UC1 && src.type() != CV_8UC3) CV_Error(Error::StsBadArg, "cv::ColorMap only supports source images of type CV_8UC1 or CV_8UC3"); - // Turn into a BGR matrix into its grayscale representation. - if(src.type() == CV_8UC3) - cvtColor(src.clone(), src, COLOR_BGR2GRAY); - cvtColor(src.clone(), src, COLOR_GRAY2BGR); - // Apply the ColorMap. - LUT(src, _lut, _dst); + + CV_CheckEQ(src.dims, 2, "Not supported"); + + CV_Assert(_lut.isContinuous()); + const int lut_type = _lut.type(); + CV_CheckType(lut_type, (lut_type == CV_8UC1) || (lut_type == CV_8UC3), + "Only CV_8UC1 and CV_8UC3 LUT are supported"); + + Mat srcGray; + if (src.channels() == 1) + srcGray = src; + else + cv::cvtColor(src, srcGray, cv::COLOR_BGR2GRAY);//BGR because of historical cv::LUT() usage + + _dst.create(src.size(), lut_type); + Mat dstMat = _dst.getMat(); + + //we do not use cv::LUT() which requires src.channels() == dst.channels() + const int rows = srcGray.rows; + const int cols = srcGray.cols; + const int minimalPixelsPerPacket = 1<<12; + const int rowsPerPacket = std::max(1, minimalPixelsPerPacket/cols); + const int rowsPacketsCount = (rows+rowsPerPacket-1)/rowsPerPacket; + const Range all(0, rows); + + if (lut_type == CV_8UC1) { + typedef unsigned char lut_pixel_t; + const lut_pixel_t* srcLUT = _lut.ptr(0); + auto body = [&, cols](const Range& range) -> void { + for(int row = range.start ; row(row); + lut_pixel_t* dstRow = dstMat.ptr(row); + for(int col = 0 ; col(0); + auto body = [&, cols](const Range& range) -> void { + for(int row = range.start ; row(row); + lut_pixel_t* dstRow = dstMat.ptr(row); + for(int col = 0 ; col Date: Thu, 20 Jan 2022 15:21:47 +0300 Subject: [PATCH 09/84] feature: submodule or a class scope for exported classes All classes are registered in the scope that corresponds to C++ namespace or exported class. Example: `cv::ml::Boost` is exported as `cv.ml.Boost` `cv::SimpleBlobDetector::Params` is exported as `cv.SimpleBlobDetector.Params` For backward compatibility all classes are registered in the global module with their mangling name containing scope information. Example: `cv::ml::Boost` has `cv.ml_Boost` alias to `cv.ml.Boost` type --- .../include/opencv2/core/bindings_utils.hpp | 47 +++++ modules/python/src2/cv2.cpp | 191 +++++++++++++++++- modules/python/src2/gen2.py | 110 +++++++--- modules/python/src2/pycompat.hpp | 144 +++++++------ modules/python/test/test_misc.py | 85 ++++++++ 5 files changed, 484 insertions(+), 93 deletions(-) diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index f091606c4a..6e825ec816 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -217,6 +217,53 @@ namespace nested { CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { return flag; } + +class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_PROP_RW int int_value; + CV_PROP_RW float float_value; + + CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f) + { + int_value = int_param; + float_value = float_param; + } + }; + + explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + params_ = params; + } + + CV_WRAP int getIntParam() const + { + return params_.int_value; + } + + CV_WRAP float getFloatParam() const + { + return params_.float_value; + } + + CV_WRAP static std::string originalName() + { + return "OriginalClassName"; + } + + CV_WRAP static Ptr + create(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + return makePtr(params); + } + +private: + OriginalClassName::Params params_; +}; + +typedef OriginalClassName::Params OriginalClassName_Params; } // namespace nested //! @} // core_utils diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index a82086f315..f41cd6f389 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -2066,9 +2066,9 @@ static int convert_to_char(PyObject *o, char *dst, const ArgInfo& info) #include "pyopencv_custom_headers.h" #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #else -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #endif #include "pyopencv_generated_types.h" #undef CVPY_TYPE @@ -2251,6 +2251,189 @@ static bool init_submodule(PyObject * root, const char * name, PyMethodDef * met return true; } +static inline +bool registerTypeInModuleScope(PyObject* module, const char* type_name, PyObject* type_obj) +{ + if (PyModule_AddObject(module, type_name, type_obj) < 0) + { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in module scope '%s'", + type_name, PyModule_GetName(module) + ); + Py_DECREF(type_obj); + return false; + } + return true; +} + +static inline +bool registerTypeInClassScope(PyObject* cls, const char* type_name, PyObject* type_obj) +{ + if (!PyType_CheckExact(cls)) { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in class scope. " + "Scope class object has a wrong type", type_name + ); + return false; + } + if (PyObject_SetAttrString(cls, type_name, type_obj) < 0) + { + #ifndef Py_LIMITED_API + PyObject* cls_dict = reinterpret_cast(cls)->tp_dict; + if (PyDict_SetItemString(cls_dict, type_name, type_obj) >= 0) { + /// Clearing the error set by PyObject_SetAttrString: + /// TypeError: can't set attributes of built-in/extension type NAME + PyErr_Clear(); + return true; + } + #endif + const std::string cls_name = getPyObjectNameAttr(cls); + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in '%s' class scope. Can't update scope dictionary", + type_name, cls_name.c_str() + ); + return false; + } + return true; +} + +static inline +PyObject* getScopeFromTypeObject(PyObject* obj, const std::string& scope_name) +{ + if (!PyType_CheckExact(obj)) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Scope class object has a wrong type", type_name.c_str() + ); + } + /// When using LIMITED API all classes are registered in the heap +#if defined(Py_LIMITED_API) + return PyObject_GetAttrString(obj, scope_name.c_str()); +#else + /// Otherwise classes may be registed on the stack or heap + PyObject* type_dict = reinterpret_cast(obj)->tp_dict; + if (!type_dict) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Type dictionary is not available", type_name.c_str() + ); + } + return PyDict_GetItemString(type_dict, scope_name.c_str()); +#endif // Py_LIMITED_API +} + +static inline +PyObject* findTypeScope(PyObject* root_module, const std::string& scope_name) +{ + PyObject* scope = root_module; + if (scope_name.empty()) + { + return scope; + } + /// Starting with 1 to omit leading dot in the scope name + size_t name_end = scope_name.find('.', 1); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + for (size_t name_start = 1; name_start < scope_name.size() && scope; ) + { + const std::string current_scope_name = scope_name.substr(name_start, + name_end - name_start); + + if (PyModule_CheckExact(scope)) + { + PyObject* scope_dict = PyModule_GetDict(scope); + if (!scope_dict) + { + return PyErr_Format(PyExc_ImportError, + "Scope '%s' dictionary is not available during the search for " + " the '%s' scope object", current_scope_name.c_str(), + scope_name.c_str() + ); + } + + scope = PyDict_GetItemString(scope_dict, current_scope_name.c_str()); + } + else if (PyType_CheckExact(scope)) + { + scope = getScopeFromTypeObject(scope, current_scope_name); + } + else + { + return PyErr_Format(PyExc_ImportError, + "Can't find scope '%s'. '%s' doesn't reference a module or a class", + scope_name.c_str(), current_scope_name.c_str() + ); + } + + + name_start = name_end + 1; + name_end = scope_name.find('.', name_start); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + } + if (!scope) + { + return PyErr_Format(PyExc_ImportError, + "Module or class with name '%s' can't be found in '%s' module", + scope_name.c_str(), PyModule_GetName(root_module) + ); + } + return scope; +} + +static bool registerNewType(PyObject* root_module, const char* type_name, + PyObject* type_obj, const std::string& scope_name) +{ + PyObject* scope = findTypeScope(root_module, scope_name); + + /// If scope can't be found it means that there is an error during + /// bindings generation + if (!scope) { + return false; + } + + if (PyModule_CheckExact(scope)) + { + if (!registerTypeInModuleScope(scope, type_name, type_obj)) + { + return false; + } + } + else + { + /// In Python 2 it is disallowed to register an inner classes + /// via modifing dictionary of the built-in type. + if (!registerTypeInClassScope(scope, type_name, type_obj)) + { + return false; + } + } + + /// Expose all classes that are defined in the submodules as aliases in the + /// root module for backward compatibility + /// If submodule and root module are same than no aliases registration are + /// required + if (scope != root_module) + { + std::string type_name_str(type_name); + + std::string alias_name; + alias_name.reserve(scope_name.size() + type_name_str.size()); + std::replace_copy(scope_name.begin() + 1, scope_name.end(), std::back_inserter(alias_name), '.', '_'); + alias_name += '_'; + alias_name += type_name_str; + + return registerTypeInModuleScope(root_module, alias_name.c_str(), type_obj); + } + return true; +} + #include "pyopencv_generated_modules_content.h" static bool init_body(PyObject * m) @@ -2264,10 +2447,10 @@ static bool init_body(PyObject * m) #undef CVPY_MODULE #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyObject * pyopencv_NoBase_TypePtr = NULL; #else -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_STATIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyTypeObject * pyopencv_NoBase_TypePtr = NULL; #endif #include "pyopencv_generated_types.h" diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 51566fc248..79853648c5 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -243,10 +243,20 @@ class ClassProp(object): self.readonly = False class ClassInfo(object): - def __init__(self, name, decl=None): + def __init__(self, name, decl=None, codegen=None): + # Scope name can be a module or other class e.g. cv::SimpleBlobDetector::Params + scope_name, self.original_name = name.rsplit(".", 1) + + # In case scope refer the outer class exported with different name + if codegen: + scope_name = codegen.get_export_scope_name(scope_name) + self.scope_name = re.sub(r"^cv\.?", "", scope_name) + + self.export_name = self.original_name + + self.class_id = normalize_class_name(name) + self.cname = name.replace(".", "::") - self.name = self.wname = normalize_class_name(name) - self.sname = name[name.rfind('.') + 1:] self.ismap = False self.issimple = False self.isalgorithm = False @@ -261,7 +271,7 @@ class ClassInfo(object): if decl: bases = decl[1].split()[1:] if len(bases) > 1: - print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,)) + print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.cname,)) print(" Bases: ", " ".join(bases)) print(" Only the first base class will be used") #return sys.exit(-1) @@ -275,22 +285,47 @@ class ClassInfo(object): for m in decl[2]: if m.startswith("="): - wname = m[1:] - npos = name.rfind('.') - if npos >= 0: - self.wname = normalize_class_name(name[:npos] + '.' + wname) - else: - self.wname = wname - customname = True + # Aliasing only affects the exported class name, not class identifier + self.export_name = m[1:] elif m == "/Map": self.ismap = True elif m == "/Simple": self.issimple = True self.props = [ClassProp(p) for p in decl[3]] + if not self.has_export_alias and self.original_name.startswith("Cv"): + self.export_name = self.export_name[2:] + if not customname and self.wname.startswith("Cv"): self.wname = self.wname[2:] + @property + def wname(self): + if len(self.scope_name) > 0: + return self.scope_name.replace(".", "_") + "_" + self.export_name + + return self.export_name + + @property + def name(self): + return self.class_id + + @property + def full_scope_name(self): + return "cv." + self.scope_name if len(self.scope_name) else "cv" + + @property + def full_export_name(self): + return self.full_scope_name + "." + self.export_name + + @property + def full_original_name(self): + return self.full_scope_name + "." + self.original_name + + @property + def has_export_alias(self): + return self.export_name != self.original_name + def gen_map_code(self, codegen): all_classes = codegen.classes code = "static bool pyopencv_to(PyObject* src, %s& dst, const ArgInfo& info)\n{\n PyObject* tmp;\n bool ok;\n" % (self.cname) @@ -343,9 +378,11 @@ class ClassInfo(object): methods_code.write(m.gen_code(codegen)) methods_inits.write(m.get_tab_entry()) - code = gen_template_type_impl.substitute(name=self.name, wname=self.wname, cname=self.cname, - getset_code=getset_code.getvalue(), getset_inits=getset_inits.getvalue(), - methods_code=methods_code.getvalue(), methods_inits=methods_inits.getvalue()) + code = gen_template_type_impl.substitute(name=self.name, + getset_code=getset_code.getvalue(), + getset_inits=getset_inits.getvalue(), + methods_code=methods_code.getvalue(), + methods_inits=methods_inits.getvalue()) return code @@ -359,13 +396,15 @@ class ClassInfo(object): if self.constructor is not None: constructor_name = self.constructor.get_wrapper_name() - return "CVPY_TYPE({}, {}, {}, {}, {}, {});\n".format( - self.wname, - self.name, + return 'CVPY_TYPE({}, {}, {}, {}, {}, {}, "{}");\n'.format( + self.export_name, + self.class_id, self.cname if self.issimple else "Ptr<{}>".format(self.cname), - self.sname if self.issimple else "Ptr", + self.original_name if self.issimple else "Ptr", baseptr, - constructor_name + constructor_name, + # Leading dot is required to provide correct class naming + "." + self.scope_name if len(self.scope_name) > 0 else self.scope_name ) @@ -815,12 +854,12 @@ class FuncInfo(object): classinfo = all_classes[self.classname] #if dump: pprint(vars(classinfo)) if self.isconstructor: - py_name = 'cv.' + classinfo.wname - elif self.is_static: - py_name = '.'.join([self.namespace, classinfo.sname + '_' + self.variants[0].wname]) + py_name = classinfo.full_export_name else: + py_name = classinfo.full_export_name + "." + self.variants[0].wname + + if not self.is_static: cname = classinfo.cname + '::' + cname - py_name = 'cv.' + classinfo.wname + '.' + self.variants[0].wname else: py_name = '.'.join([self.namespace, self.variants[0].wname]) #if dump: print(cname + " => " + py_name) @@ -862,7 +901,7 @@ class PythonWrapperGenerator(object): self.class_idx = 0 def add_class(self, stype, name, decl): - classinfo = ClassInfo(name, decl) + classinfo = ClassInfo(name, decl, self) classinfo.decl_idx = self.class_idx self.class_idx += 1 @@ -872,16 +911,30 @@ class PythonWrapperGenerator(object): sys.exit(-1) self.classes[classinfo.name] = classinfo - # Add Class to json file. - namespace, classes, name = self.split_decl_name(name) + namespace, _, _ = self.split_decl_name(name) namespace = '.'.join(namespace) - name = '_'.join(classes+[name]) + # Registering a namespace if it is not already handled or + # doesn't have anything except classes defined in it + self.namespaces.setdefault(namespace, Namespace()) - py_name = 'cv.' + classinfo.wname # use wrapper name + # Add Class to json file. + py_name = classinfo.full_export_name # use wrapper name py_signatures = self.py_signatures.setdefault(classinfo.cname, []) py_signatures.append(dict(name=py_name)) #print('class: ' + classinfo.cname + " => " + py_name) + def get_export_scope_name(self, original_scope_name): + # Outer classes should be registered before their content - inner classes in this case + class_scope = self.classes.get(normalize_class_name(original_scope_name), None) + + if class_scope: + return class_scope.full_export_name + + # Otherwise it is a namespace. + # If something is messed up at this point - it will be revelead during + # library import + return original_scope_name + def split_decl_name(self, name): chunks = name.split('.') namespace = chunks[:-1] @@ -971,6 +1024,7 @@ class PythonWrapperGenerator(object): w_classes.append(w_classname) g_wname = "_".join(w_classes+[name]) func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs + # Exports static function with internal name (backward compatibility) func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace_str, False)) func.add_variant(decl, isphantom) if g_wname != g_name: # TODO OpenCV 5.0 diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp index 5c1bc2354f..b9eca7bc18 100644 --- a/modules/python/src2/pycompat.hpp +++ b/modules/python/src2/pycompat.hpp @@ -60,6 +60,10 @@ #endif // PY_MAJOR >=3 +#ifndef PyType_CheckExact +#define PyType_CheckExact(obj) (Py_TYPE(op) == &PyType_Type) +#endif // !PyType_CheckExact + static inline bool getUnicodeString(PyObject * obj, std::string &str) { bool res = false; @@ -91,6 +95,26 @@ static inline bool getUnicodeString(PyObject * obj, std::string &str) return res; } +static inline +std::string getPyObjectNameAttr(PyObject* obj) +{ + std::string obj_name; + PyObject* cls_name_obj = PyObject_GetAttrString(obj, "__name__"); + if (cls_name_obj && !getUnicodeString(cls_name_obj, obj_name)) { + obj_name.clear(); + } + #ifndef Py_LIMITED_API + if (PyType_CheckExact(obj) && obj_name.empty()) + { + obj_name = reinterpret_cast(obj)->tp_name; + } + #endif + if (obj_name.empty()) { + obj_name = ""; + } + return obj_name; +} + //================================================================================================== #define CV_PY_FN_WITH_KW_(fn, flags) (PyCFunction)(void*)(PyCFunctionWithKeywords)(fn), (flags) | METH_VARARGS | METH_KEYWORDS @@ -172,107 +196,106 @@ PyObject* pyopencv_from(const TYPE& src) #endif -#define CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyTypeObject pyopencv_##NAME##_TypeXXX = \ + static PyTypeObject pyopencv_##CLASS_ID##_TypeXXX = \ { \ CVPY_TYPE_HEAD \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ }; \ - static PyTypeObject * pyopencv_##NAME##_TypePtr = &pyopencv_##NAME##_TypeXXX; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyTypeObject * pyopencv_##CLASS_ID##_TypePtr = &pyopencv_##CLASS_ID##_TypeXXX; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_NEW(pyopencv_##NAME##_t, pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_NEW(pyopencv_##CLASS_ID##_t, pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } -#define CVPY_TYPE_INIT_STATIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ - pyopencv_##NAME##_TypePtr->tp_dealloc = pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_TypePtr->tp_repr = pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_TypePtr->tp_getset = pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ - pyopencv_##NAME##_TypePtr->tp_methods = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_TypePtr->tp_alloc = PyType_GenericAlloc; \ - pyopencv_##NAME##_TypePtr->tp_new = PyType_GenericNew; \ - pyopencv_##NAME##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ - if (PyType_Ready(pyopencv_##NAME##_TypePtr) != 0) \ + pyopencv_##CLASS_ID##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_dealloc = pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_repr = pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_getset = pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_TypePtr->tp_methods = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_TypePtr->tp_alloc = PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_new = PyType_GenericNew; \ + pyopencv_##CLASS_ID##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ + if (PyType_Ready(pyopencv_##CLASS_ID##_TypePtr) != 0) \ { \ ERROR_HANDLER; \ } \ - CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + CVPY_TYPE_INCREF(pyopencv_##CLASS_ID##_TypePtr); \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } //================================================================================================== -#define CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyObject * pyopencv_##NAME##_TypePtr = 0; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyObject * pyopencv_##CLASS_ID##_TypePtr = 0; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_New(pyopencv_##NAME##_t, (PyTypeObject*)pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_New(pyopencv_##CLASS_ID##_t, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } \ - static PyType_Slot pyopencv_##NAME##_Slots[] = \ + static PyType_Slot pyopencv_##CLASS_ID##_Slots[] = \ { \ {Py_tp_dealloc, 0}, \ {Py_tp_repr, 0}, \ @@ -283,37 +306,36 @@ PyObject* pyopencv_from(const TYPE& src) {Py_tp_new, 0}, \ {0, 0} \ }; \ - static PyType_Spec pyopencv_##NAME##_Spec = \ + static PyType_Spec pyopencv_##CLASS_ID##_Spec = \ { \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ 0, \ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \ - pyopencv_##NAME##_Slots \ + pyopencv_##CLASS_ID##_Slots \ }; -#define CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ - pyopencv_##NAME##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ - pyopencv_##NAME##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ + pyopencv_##CLASS_ID##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ PyObject * bases = 0; \ if (pyopencv_##BASE##_TypePtr) \ bases = PyTuple_Pack(1, pyopencv_##BASE##_TypePtr); \ - pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \ - if (!pyopencv_##NAME##_TypePtr) \ + pyopencv_##CLASS_ID##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##CLASS_ID##_Spec, bases); \ + if (!pyopencv_##CLASS_ID##_TypePtr) \ { \ - printf("Failed to create type from spec: " #WNAME ", base (" #BASE ")\n"); \ + printf("Failed to create type from spec: " #CLASS_ID ", base (" #BASE ")\n"); \ ERROR_HANDLER; \ } \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 41e5c6ba4b..ec56585ace 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -618,6 +618,91 @@ class Arguments(NewOpenCVTests): self.assertEqual(flag, cv.utils.nested.testEchoBooleanFunction(flag), msg="Function in nested module returns wrong result") + def test_class_from_submodule_has_global_alias(self): + self.assertTrue(hasattr(cv.ml, "Boost"), + msg="Class is not registered in the submodule") + self.assertTrue(hasattr(cv, "ml_Boost"), + msg="Class from submodule doesn't have alias in the " + "global module") + self.assertEqual(cv.ml.Boost, cv.ml_Boost, + msg="Classes from submodules and global module don't refer " + "to the same type") + + def test_inner_class_has_global_alias(self): + self.assertTrue(hasattr(cv.SimpleBlobDetector, "Params"), + msg="Class is not registered as inner class") + self.assertEqual(cv.SimpleBlobDetector.Params, cv.SimpleBlobDetector_Params, + msg="Inner class and class in global module don't refer " + "to the same type") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + + def test_class_from_submodule_has_global_alias(self): + self.assertTrue(hasattr(cv.ml, "Boost"), + msg="Class is not registered in the submodule") + self.assertTrue(hasattr(cv, "ml_Boost"), + msg="Class from submodule doesn't have alias in the " + "global module") + self.assertEqual(cv.ml.Boost, cv.ml_Boost, + msg="Classes from submodules and global module don't refer " + "to the same type") + + def test_inner_class_has_global_alias(self): + self.assertTrue(hasattr(cv.SimpleBlobDetector, "Params"), + msg="Class is not registered as inner class") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + self.assertEqual(cv.SimpleBlobDetector.Params, cv.SimpleBlobDetector_Params, + msg="Inner class and class in global module don't refer " + "to the same type") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + + def test_export_class_with_different_name(self): + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName"), + msg="Class with export alias is not registered in the submodule") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName"), + msg="Class with export alias doesn't have alias in the " + "global module") + self.assertEqual(cv.utils.nested.ExportClassName.originalName(), "OriginalClassName") + + instance = cv.utils.nested.ExportClassName.create() + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName_create"), + msg="Factory function should have alias in the same module as the class") + # self.assertFalse(hasattr(cv.utils.nested, "OriginalClassName_create"), + # msg="Factory function should not be registered with original class name, "\ + # "when class has different export name") + + def test_export_inner_class_of_class_exported_with_different_name(self): + if not hasattr(cv.utils.nested, "ExportClassName"): + raise unittest.SkipTest("Outer class with export alias is not registered in the submodule") + + self.assertTrue(hasattr(cv.utils.nested.ExportClassName, "Params"), + msg="Inner class with export alias is not registered in " + "the outer class") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName_Params"), + msg="Inner class with export alias is not registered in " + "global module") + params = cv.utils.nested.ExportClassName.Params() + params.int_value = 45 + params.float_value = 4.5 + + instance = cv.utils.nested.ExportClassName.create(params) + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertEqual( + params.int_value, instance.getIntParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.int_value, instance.getIntParam() + )) + self.assertEqual( + params.float_value, instance.getFloatParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.float_value, instance.getFloatParam() + )) + class SamplesFindFile(NewOpenCVTests): From 9c7adb72489de9e116bab4023f4a10b0ecd399e0 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Wed, 2 Mar 2022 16:36:16 +0300 Subject: [PATCH 10/84] Merge pull request #21530 from anna-khakimova:ak/simd_divrc * GAPI Fluid: SIMD for DivRC kernel. * Fluid: Div kernel's SIMD refactoring * SIMD for DivRC 3 channel case * Applied comments --- .../perf/common/gapi_core_perf_tests_inl.hpp | 4 + .../perf/cpu/gapi_core_perf_tests_fluid.cpp | 4 +- .../gapi/src/backends/fluid/gfluidcore.cpp | 77 +- .../fluid/gfluidcore_func.dispatch.cpp | 27 + .../src/backends/fluid/gfluidcore_func.hpp | 23 + .../backends/fluid/gfluidcore_func.simd.hpp | 859 +++++++++++++----- 6 files changed, 735 insertions(+), 259 deletions(-) diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index 72837da199..c644fd1587 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); + //This condition need as workaround the bug in the OpenCV. + //It reinitializes divider matrix without zero values for CV_16S DST type. + if (dtype == CV_16S || (type == CV_16S && dtype == -1)) + cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255)); // OpenCV code /////////////////////////////////////////////////////////// cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype); diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index 796d05101e..a142109315 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest, INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(1.0), Values(cv::compile_args(CORE_FLUID)))); diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 22f73e553c..bb33c45d85 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -936,8 +936,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca } template -static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm, - float scale=1) +CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[], + Arithm arithm, float scale=1) { const auto *in = src.InLine(0); auto *out = dst.OutLine(); @@ -955,15 +955,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A w = subrc_simd(scalar, in, out, length, chan); #endif for (; w < length; ++w) + { out[w] = subr(in[w], scalar[w % chan]); + } break; } - // TODO: optimize division case ARITHM_DIVIDE: - for (int w=0; w < width; w++) - for (int c=0; c < chan; c++) - out[chan*w + c] = div(scalar[c], in[chan*w + c], scale); + { + int w = 0; +#if CV_SIMD + w = divrc_simd(scalar, in, out, length, chan, scale); +#endif + for (; w < length; ++w) + { + out[w] = div(scalar[w % chan], in[w], scale); + } break; + } default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation"); } } @@ -1319,7 +1327,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch, #endif for (; w < length; ++w) + { out[w] = div(in[w], scalar[w % chan], scale); + } } GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) @@ -1402,32 +1412,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) } }; -GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false) +GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true) { static const int Window = 1; - static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/, - Buffer &dst) + static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/, + Buffer& dst, Buffer& scratch) { - const float scalar[4] = { - static_cast(_scalar[0]), - static_cast(_scalar[1]), - static_cast(_scalar[2]), - static_cast(_scalar[3]) - }; + GAPI_Assert(src.meta().chan <= 4); + + if (dst.y() == 0) + { + const int chan = src.meta().chan; + float* _scratch = scratch.OutLine(); + + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); + } + + const float* scalar = scratch.OutLine(); const float scale = static_cast(_scale); // DST SRC OP __VA_ARGS__ - UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } + + static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch) + { + initScratchBuffer(scratch); + } + + static void resetScratch(Buffer& /*scratch*/) + { + } }; //------------------- diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 9afac9ceb4..a682fb7dbb 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale) \ +{ \ + CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width) { diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 868923932d..975383a8d9 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale); + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index 2424a57677..b92d92d0cf 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -150,8 +150,8 @@ SUBRC_SIMD(float, float) #undef SUBRC_SIMD -#define MULC_SIMD(SRC, DST) \ -int mulc_simd(const SRC in[], const float scalar[], DST out[], \ +#define MULC_SIMD(SRC, DST) \ +int mulc_simd(const SRC in[], const float scalar[], DST out[], \ const int length, const int chan, const float scale); MULC_SIMD(uchar, uchar) @@ -173,9 +173,9 @@ MULC_SIMD(float, float) #undef MULC_SIMD -#define DIVC_SIMD(SRC, DST) \ -int divc_simd(const SRC in[], const float scalar[], DST out[], \ - const int length, const int chan, const float scale, \ +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ const int set_mask_flag); DIVC_SIMD(uchar, uchar) @@ -208,6 +208,29 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale); + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -236,6 +259,28 @@ template<> struct vector_type_of { using type = v_uint16; }; template<> struct vector_type_of { using type = v_int16; }; template<> struct vector_type_of { using type = v_float32; }; +template +struct zero_vec_type_of; + +template +using zero_vec_type_of_t = typename zero_vec_type_of::type; + +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_float32; }; + +template +struct univ_zero_vec_type_of; + +template +using univ_zero_vec_type_of_t = typename univ_zero_vec_type_of::type; + +template<> struct univ_zero_vec_type_of { using type = v_uint8; }; +template<> struct univ_zero_vec_type_of { using type = v_int16; }; +template<> struct univ_zero_vec_type_of { using type = v_int16; }; +template<> struct univ_zero_vec_type_of { using type = v_float32; }; + CV_ALWAYS_INLINE v_float32 vg_load_f32(const float* in) { return vx_load(in); @@ -295,165 +340,111 @@ CV_ALWAYS_INLINE void v_store_select(short* dst, const v_int16& div, const v_int CV_ALWAYS_INLINE void v_store_select(ushort* dst, const v_int16& div, const v_int16& v_zero, const v_int32& res1, const v_int32& res2) { - v_uint16 sel = v_reinterpret_as_u16(v_select(div == v_zero, v_zero, v_pack(res1, res2))); - vx_store(dst, sel); + vx_store(dst, v_select(v_reinterpret_as_u16(div == v_zero), + v_reinterpret_as_u16(v_zero), v_pack_u(res1, res2))); } -//================================================================================================= +//============================================================================= -template +template CV_ALWAYS_INLINE -typename std::enable_if<(std::is_same::value && std::is_same::value) || - (std::is_same::value && std::is_same::value) || - (std::is_same::value && std::is_same::value), int>::type -div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], DST out[], const int length, double _scale) +void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const v_float32& a3, const v_float32& a4, const uchar* in2x, + uchar* outx, const v_float32& v_scale, const v_int16& v_zero) { - constexpr int nlanes = vector_type_of_t::nlanes; + constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_int16 div1 = v_reinterpret_as_s16(vx_load_expand(in2x)); + v_int16 div2 = v_reinterpret_as_s16(vx_load_expand(&in2x[nlanes/2])); - v_int16 v_zero = vx_setall_s16(0); - v_float32 scale = vx_setall_f32(static_cast(_scale)); + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); + v_int32 sum1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)), + sum2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)), + sum3 = v_round(div_op(s_tag, a3, fdiv3, v_scale)), + sum4 = v_round(div_op(s_tag, a4, fdiv4, v_scale)); - v_int16 div = v_reinterpret_as_s16(vx_load(&in2[x])); + v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); + v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - - v_int32 r1 = v_round(div_op(t, a1, fdiv1, scale)); - v_int32 r2 = v_round(div_op(t, a2, fdiv2, scale)); - - v_store_select(&out[x], div, v_zero, r1, r2); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + vx_store(outx, v_pack_u(res1, res2)); } -//------------------------------------------------------------------------------------------------- - template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], uchar out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const v_float32& a3, const v_float32& a4, const SRC* in2x, + uchar* outx, const v_float32& v_scale, const v_int16& v_zero) { constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_int16 div1 = v_reinterpret_as_s16(vx_load(in2x)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&in2x[nlanes/2])); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_int16 v_zero = vx_setall_s16(0); + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 4]); - v_float32 a3 = vg_load_f32(&in1[x + nlanes / 2]); - v_float32 a4 = vg_load_f32(&in1[x + 3 * nlanes / 4]); + v_int32 sum1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)), + sum2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)), + sum3 = v_round(div_op(s_tag, a3, fdiv3, v_scale)), + sum4 = v_round(div_op(s_tag, a4, fdiv4, v_scale)); - v_int16 div1 = v_reinterpret_as_s16(vx_load(&in2[x])); - v_int16 div2 = v_reinterpret_as_s16(vx_load(&in2[x + nlanes/2])); + v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); + v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); - v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); - v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - - v_int32 sum1 = v_round(div_op(t, a1, fdiv1, scale)), - sum2 = v_round(div_op(t, a2, fdiv2, scale)), - sum3 = v_round(div_op(t, a3, fdiv3, scale)), - sum4 = v_round(div_op(t, a4, fdiv4, scale)); - - v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); - v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - - vx_store(&out[x], v_pack_u(res1, res2)); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + vx_store(outx, v_pack_u(res1, res2)); } -//------------------------------------------------------------------------------------------------- - template -CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const float in1[], const float in2[], uchar out[], - const int length, double _scale) +CV_ALWAYS_INLINE void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, + const v_float32& a2, const v_float32& a3, + const v_float32& a4, const float* in2x, uchar* outx, + const v_float32& v_scale, const v_float32& v_zero) { constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_float32 div1 = vg_load_f32(in2x); + v_float32 div2 = vg_load_f32(&in2x[nlanes / 4]); + v_float32 div3 = vg_load_f32(&in2x[nlanes / 2]); + v_float32 div4 = vg_load_f32(&in2x[3 * nlanes / 4]); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_float32 v_zero = vx_setall_f32(0); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 4]); - v_float32 a3 = vg_load_f32(&in1[x + nlanes / 2]); - v_float32 a4 = vg_load_f32(&in1[x + 3 * nlanes / 4]); + v_float32 r1 = div_op(s_tag, a1, div1, v_scale); + v_float32 r2 = div_op(s_tag, a2, div2, v_scale); + v_float32 r3 = div_op(s_tag, a3, div3, v_scale); + v_float32 r4 = div_op(s_tag, a4, div4, v_scale); - v_float32 div1 = vg_load_f32(&in2[x]); - v_float32 div2 = vg_load_f32(&in2[x + nlanes / 4]); - v_float32 div3 = vg_load_f32(&in2[x + nlanes / 2]); - v_float32 div4 = vg_load_f32(&in2[x + 3 * nlanes / 4]); + v_float32 sel1 = v_select((div1 == v_zero), v_zero, r1); + v_float32 sel2 = v_select((div2 == v_zero), v_zero, r2); + v_float32 sel3 = v_select((div3 == v_zero), v_zero, r3); + v_float32 sel4 = v_select((div4 == v_zero), v_zero, r4); - v_float32 r1 = div_op(t, a1, div1, scale); - v_float32 r2 = div_op(t, a2, div2, scale); - v_float32 r3 = div_op(t, a3, div3, scale); - v_float32 r4 = div_op(t, a4, div4, scale); + v_int32 res1 = v_round(sel1); + v_int32 res2 = v_round(sel2); + v_int32 res3 = v_round(sel3); + v_int32 res4 = v_round(sel4); - v_float32 sel1 = v_select((div1 == v_zero), v_zero, r1); - v_float32 sel2 = v_select((div2 == v_zero), v_zero, r2); - v_float32 sel3 = v_select((div3 == v_zero), v_zero, r3); - v_float32 sel4 = v_select((div4 == v_zero), v_zero, r4); + vx_store(outx, v_pack_u(v_pack(res1, res2), v_pack(res3, res4))); +} - v_int32 res1 = v_round(sel1); - v_int32 res2 = v_round(sel2); - v_int32 res3 = v_round(sel3); - v_int32 res4 = v_round(sel4); +template +CV_ALWAYS_INLINE void div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, uchar* outx, + const v_float32& v_scale, const Vtype& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; - vx_store(&out[x], v_pack_u(v_pack(res1, res2), v_pack(res3, res4))); - } + v_float32 a1 = vg_load_f32(in1x); + v_float32 a2 = vg_load_f32(&in1x[nlanes / 4]); + v_float32 a3 = vg_load_f32(&in1x[nlanes / 2]); + v_float32 a4 = vg_load_f32(&in1x[3 * nlanes / 4]); - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + div_simd_impl(s_tag, a1, a2, a3, a4, in2x, outx, v_scale, v_zero); } //------------------------------------------------------------------------------------------------- @@ -461,113 +452,117 @@ CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const float in1[], const float in2[] template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const uchar in1[], const uchar in2[], DST out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const uchar* in2x, DST* outx, const v_float32& v_scale, + const v_int16& v_zero) { - constexpr int nlanes = vector_type_of_t::nlanes; + v_int16 div = v_reinterpret_as_s16(vx_load_expand(in2x)); - if (length < nlanes) - return 0; + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_int16 v_zero = vx_setall_s16(0); + v_int32 r1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)); + v_int32 r2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); - - v_int16 div = v_reinterpret_as_s16(vx_load_expand(&in2[x])); - - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - - v_int32 r1 = v_round(div_op(t, a1, fdiv1, scale)); - v_int32 r2 = v_round(div_op(t, a2, fdiv2, scale)); - - v_store_select(&out[x], div, v_zero, r1, r2); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + v_store_select(outx, div, v_zero, r1, r2); } -//------------------------------------------------------------------------------------------------- +template +CV_ALWAYS_INLINE +typename std::enable_if<(std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value), void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const SRC* in2x, DST* outx, const v_float32& v_scale, const v_int16& v_zero) +{ + v_int16 div = v_reinterpret_as_s16(vx_load(in2x)); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); + + v_int32 r1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)); + v_int32 r2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)); + + v_store_select(outx, div, v_zero, r1, r2); +} template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const float in1[], const float in2[], DST out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const float* in2x, DST* outx, const v_float32& v_scale, + const v_float32& v_zero) { constexpr int nlanes = vector_type_of_t::nlanes; - if (length < nlanes) - return 0; + v_float32 fdiv1 = vg_load_f32(in2x); + v_float32 fdiv2 = vg_load_f32(&in2x[nlanes / 2]); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_float32 v_zero = vx_setall_f32(0); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); + v_float32 r1 = div_op(s_tag, a1, fdiv1, v_scale); + v_float32 r2 = div_op(s_tag, a2, fdiv2, v_scale); - v_float32 fdiv1 = vg_load_f32(&in2[x]); - v_float32 fdiv2 = vg_load_f32(&in2[x + nlanes / 2]); + v_int32 res1 = v_round(v_select((fdiv1 == v_zero), v_zero, r1)); + v_int32 res2 = v_round(v_select((fdiv2 == v_zero), v_zero, r2)); - v_float32 r1 = div_op(t, a1, fdiv1, scale); - v_float32 r2 = div_op(t, a2, fdiv2, scale); + v_store_i16(outx, res1, res2); +} - v_int32 res1 = v_round(v_select((fdiv1 == v_zero), v_zero, r1)); - v_int32 res2 = v_round(v_select((fdiv2 == v_zero), v_zero, r2)); +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, DST* outx, + const v_float32& v_scale, const Vtype& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; - v_store_i16(&out[x], res1, res2); - } + v_float32 a1 = vg_load_f32(in1x); + v_float32 a2 = vg_load_f32(&in1x[nlanes / 2]); - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + div_simd_impl(s_tag, a1, a2, in2x, outx, v_scale, v_zero); } //------------------------------------------------------------------------------------------------- template -CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], float out[], - const int length, double _scale) +CV_ALWAYS_INLINE void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const SRC* in2x, + float* outx, const v_float32& v_scale) { - constexpr int nlanes = v_float32::nlanes; + v_float32 b1 = vg_load_f32(in2x); + vx_store(outx, div_op(s_tag, a1, b1, v_scale)); +} + +template +CV_ALWAYS_INLINE void div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, float* outx, + const v_float32& v_scale, const Tvec&) +{ + v_float32 a1 = vg_load_f32(in1x); + div_simd_impl(s_tag, a1, in2x, outx, v_scale); +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int div_simd_common(scale_tag_t s_tag, const SRC in1[], const SRC in2[], + DST out[], const int length, float scale) +{ + constexpr int nlanes = vector_type_of_t::nlanes; if (length < nlanes) return 0; - v_float32 scale = vx_setall_f32(static_cast(_scale)); + const zero_vec_type_of_t v_zero = vx_setall::lane_type>(0); + v_float32 v_scale = vx_setall_f32(scale); int x = 0; for (;;) { for (; x <= length - nlanes; x += nlanes) { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 b1 = vg_load_f32(&in2[x]); - - vx_store(&out[x], div_op(t, a1, b1, scale)); + div_hal(s_tag, &in1[x], &in2[x], &out[x], v_scale, v_zero); } if (x < length) @@ -580,28 +575,6 @@ CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], fl return x; } -//------------------------------------------------------------------------------------------------- - -template -CV_ALWAYS_INLINE int div_hal(scale_tag_t, const uchar in1[], const uchar in2[], uchar out[], - const int length, double scale) -{ - hal::div8u(in1, static_cast(length), in2, static_cast(length), - out, static_cast(length), length, 1, &scale); - return length; -} - -template -CV_ALWAYS_INLINE int div_hal(scale_tag_t, const short in1[], const short in2[], short out[], - const int length, double scale) -{ - hal::div16s(in1, static_cast(length), in2, static_cast(length), - out, static_cast(length), length, 1, &scale); - return length; -} - -//------------------------------------------------------------------------------------------------- - #define DIV_SIMD(SRC, DST) \ int div_simd(const SRC in1[], const SRC in2[], DST out[], \ const int length, double _scale) \ @@ -610,13 +583,11 @@ int div_simd(const SRC in1[], const SRC in2[], DST out[], float fscale = static_cast(_scale); \ if (std::fabs(fscale - 1.0f) <= FLT_EPSILON) \ { \ - not_scale_tag t; \ - x = div_hal(t, in1, in2, out, length, _scale); \ + x = div_simd_common(not_scale_tag{}, in1, in2, out, length, fscale); \ } \ else \ { \ - scale_tag t; \ - x = div_hal(t, in1, in2, out, length, _scale); \ + x = div_simd_common(scale_tag{}, in1, in2, out, length, fscale); \ } \ return x; \ } @@ -1553,7 +1524,7 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[], \ else \ { \ return arithmOpScalarScaled_simd_common(op_t, in, scalar, out, \ - length, scale); \ + length, scale); \ } \ } \ case 3: \ @@ -1743,11 +1714,11 @@ divc_simd_c3_impl(scale_tag_t s_tag, SRC in[], DST out[], const v_float32& s1, v_float32 a6 = vg_load_f32(&in[x + 5 * nlanes / 2]); arithmOpScalar_pack_store_c3(&out[x], v_round(v_select(v_mask1, v_zero, div_op(s_tag, a1, s1, v_scale))), - v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))), - v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))), - v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))), - v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))), - v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale)))); + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))), + v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale)))); } if (x < length) @@ -1976,14 +1947,432 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec& v_zero) +{ + div_simd_impl(s_tag, v_scalar, v_scalar, inx, outx, v_scale, v_zero); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec& v_zero) +{ + div_simd_impl(s_tag, v_scalar, v_scalar, v_scalar, v_scalar, inx, outx, v_scale, v_zero); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec&) +{ + div_simd_impl(s_tag, v_scalar, inx, outx, v_scale); +} + +template +CV_ALWAYS_INLINE int divrc_simd_common(scale_tag_t s_tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + if (length < nlanes) + return 0; + + v_float32 v_scalar = vx_load(scalar); + v_float32 v_scale = vx_setall_f32(scale); + zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + divrc_simd_common_impl(s_tag, &in[x], v_scalar, &out[x], v_scale, v_zero); + } + + if (x < length) + { + x = length - nlanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE void divrc_simd_c3_calc(scale_tag_t s_tag, const uchar* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_uint8& v_zero) +{ + v_uint8 div = vx_load(inx); + v_uint8 v_mask = (div == v_zero); + + v_uint16 div1 = v_expand_low(div); + v_uint16 div2 = v_expand_high(div); + + v_float32 fdiv1 = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(div1))); + v_float32 fdiv2 = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(div1))); + v_float32 fdiv3 = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(div2))); + v_float32 fdiv4 = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(div2))); + + vx_store(outx, + v_select(v_mask, v_zero, v_pack_u(v_pack(v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))), + v_pack(v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale)))))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const SRC* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int16 div1 = v_reinterpret_as_s16(vx_load(inx)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&inx[nlanes / 2])); + + v_int16 v_mask1 = (div1 == v_zero); + v_int16 v_mask2 = (div2 == v_zero); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + + vx_store(outx, + v_pack_u(v_select(v_mask1, v_zero, + v_pack(v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale)))), + v_select(v_mask2, v_zero, + v_pack(v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale)))))); +} + +template +CV_ALWAYS_INLINE void divrc_simd_c3_calc(scale_tag_t s_tag, const float* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_float32& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 fdiv1 = vg_load_f32(inx); + v_float32 fdiv2 = vg_load_f32(&inx[nlanes / 4]); + v_float32 fdiv3 = vg_load_f32(&inx[nlanes / 2]); + v_float32 fdiv4 = vg_load_f32(&inx[3 * nlanes / 4]); + + v_float32 v_mask1 = (fdiv1 == v_zero); + v_float32 v_mask2 = (fdiv2 == v_zero); + v_float32 v_mask3 = (fdiv3 == v_zero); + v_float32 v_mask4 = (fdiv4 == v_zero); + + vx_store(outx, + v_pack_u(v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, s1, fdiv1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, s2, fdiv2, v_scale)))), + v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, s3, fdiv3, v_scale))), + v_round(v_select(v_mask4, v_zero, div_op(s_tag, s1, fdiv4, v_scale)))))); + +} + +template +CV_ALWAYS_INLINE int divrc_simd_c3_impl(scale_tag_t s_tag, const SRC in[], uchar out[], + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int length, const int nlanes, const int lanes) +{ + univ_zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + divrc_simd_c3_calc(s_tag, &in[x], &out[x], s1, s2, s3, v_scale, v_zero); + divrc_simd_c3_calc(s_tag, &in[x + nlanes], &out[x + nlanes], s2, s3, s1, v_scale, v_zero); + divrc_simd_c3_calc(s_tag, &in[x + 2 * nlanes], &out[x + 2 * nlanes], s3, s1, s2, v_scale, v_zero); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const uchar* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + v_uint8 div = vx_load(inx); + + v_int16 div1 = v_reinterpret_as_s16(v_expand_low(div)); + v_int16 div2 = v_reinterpret_as_s16(v_expand_high(div)); + v_int16 div3 = v_reinterpret_as_s16(vx_load_expand(&inx[2 * nlanes])); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + v_float32 fdiv5 = v_cvt_f32(v_expand_low(div3)); + v_float32 fdiv6 = v_cvt_f32(v_expand_high(div3)); + + v_store_select(outx, div1, v_zero, v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))); + v_store_select(&outx[nlanes], div2, v_zero, v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale))); + v_store_select(&outx[2*nlanes], div3, v_zero, v_round(div_op(s_tag, s2, fdiv5, v_scale)), + v_round(div_op(s_tag, s3, fdiv6, v_scale))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if<(std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value), void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const SRC* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_int16 div1 = v_reinterpret_as_s16(vx_load(inx)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&inx[nlanes])); + v_int16 div3 = v_reinterpret_as_s16(vx_load(&inx[2*nlanes])); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + v_float32 fdiv5 = v_cvt_f32(v_expand_low(div3)); + v_float32 fdiv6 = v_cvt_f32(v_expand_high(div3)); + + v_store_select(outx, div1, v_zero, v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))); + v_store_select(&outx[nlanes], div2, v_zero, v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale))); + v_store_select(&outx[2*nlanes], div3, v_zero, v_round(div_op(s_tag, s2, fdiv5, v_scale)), + v_round(div_op(s_tag, s3, fdiv6, v_scale))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const float* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_float32& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_float32 fdiv1 = vg_load_f32(inx); + v_float32 fdiv2 = vg_load_f32(&inx[nlanes/2]); + v_float32 fdiv3 = vg_load_f32(&inx[nlanes]); + v_float32 fdiv4 = vg_load_f32(&inx[3*nlanes/2]); + v_float32 fdiv5 = vg_load_f32(&inx[2*nlanes]); + v_float32 fdiv6 = vg_load_f32(&inx[5*nlanes/2]); + + v_store_i16(outx, v_round(v_select(fdiv1 == v_zero, v_zero, div_op(s_tag, s1, fdiv1, v_scale))), + v_round(v_select(fdiv2 == v_zero, v_zero, div_op(s_tag, s2, fdiv2, v_scale)))); + v_store_i16(&outx[nlanes], v_round(v_select(fdiv3 == v_zero, v_zero, div_op(s_tag, s3, fdiv3, v_scale))), + v_round(v_select(fdiv4 == v_zero, v_zero, div_op(s_tag, s1, fdiv4, v_scale)))); + v_store_i16(&outx[2*nlanes], v_round(v_select(fdiv5 == v_zero, v_zero, div_op(s_tag, s2, fdiv5, v_scale))), + v_round(v_select(fdiv6 == v_zero, v_zero, div_op(s_tag, s3, fdiv6, v_scale)))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, int>::type +divrc_simd_c3_impl(scale_tag_t s_tag, const SRC in[], DST out[], const v_float32& s1, + const v_float32& s2, const v_float32& s3, + const v_float32& v_scale, const int length, + const int, const int lanes) +{ + zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + divrc_simd_c3_calc(s_tag, &in[x], &out[x], s1, s2, s3, v_scale, v_zero); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divrc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, float* out, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int length, const int nlanes, const int lanes) +{ + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + v_float32 div1 = vg_load_f32(&in[x]); + v_float32 div2 = vg_load_f32(&in[x + nlanes]); + v_float32 div3 = vg_load_f32(&in[x + 2*nlanes]); + + vx_store(&out[x], div_op(s_tag, s1, div1, v_scale)); + vx_store(&out[x + nlanes], div_op(s_tag, s2, div2, v_scale)); + vx_store(&out[x + 2*nlanes], div_op(s_tag, s3, div3, v_scale)); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divrc_simd_c3(scale_tag_t s_tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int chan = 3; + constexpr int nlanes = vector_type_of_t::nlanes; + constexpr int lanes = chan * nlanes; + + if (length < lanes) + return 0; + + v_float32 v_scale = vx_setall_f32(scale); + + v_float32 s1 = vx_load(scalar); +#if CV_SIMD_WIDTH == 32 + v_float32 s2 = vx_load(&scalar[2]); + v_float32 s3 = vx_load(&scalar[1]); +#else + v_float32 s2 = vx_load(&scalar[1]); + v_float32 s3 = vx_load(&scalar[2]); +#endif + return divrc_simd_c3_impl(s_tag, in, out, s1, s2, s3, v_scale, length, nlanes, lanes); +} + +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale) \ +{ \ + switch (chan) \ + { \ + case 1: \ + case 2: \ + case 4: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + return divrc_simd_common(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + } \ + else \ + { \ + return divrc_simd_common(scale_tag{}, in, scalar, out, \ + length, scale); \ + } \ + } \ + case 3: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + return divrc_simd_c3(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + } \ + else \ + { \ + return divrc_simd_c3(scale_tag{}, in, scalar, out, \ + length, scale); \ + } \ + } \ + default: \ + GAPI_Assert(chan <= 4); \ + break; \ + } \ + return 0; \ +} + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + //------------------------- // // Fluid kernels: Split3 // //------------------------- -int split3_simd(const uchar in[], uchar out1[], uchar out2[], - uchar out3[], const int width) +int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], + const int width) { constexpr int nlanes = v_uint8::nlanes; if (width < nlanes) From 54693cf7b1a32d15504308b7499b8b9160f016b5 Mon Sep 17 00:00:00 2001 From: Jeremy Ephron Date: Wed, 2 Mar 2022 11:01:53 -0800 Subject: [PATCH 11/84] Update stitching_detailed.py ### Critical bugs fixed: - `seam_finder.find()` returns None and overwrites `masks_warped` - `indices` is only 1-dimensional ### Nice-to-have bugs fixed: - avoid invalid value in sqrt and subsequent runtime warning - avoid printing help string on each run (use argparse builtin behavior) ### New features: - added graphcut seam finder support ### Test Summary: Tested on Ubuntu 20.04 with python 3.8.10 and opencv-python-contrib 4.5.5.62 --- samples/python/stitching_detailed.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py index 333ea15648..56d6965733 100644 --- a/samples/python/stitching_detailed.py +++ b/samples/python/stitching_detailed.py @@ -324,7 +324,10 @@ def main(): is_work_scale_set = True img = cv.resize(src=full_img, dsize=None, fx=work_scale, fy=work_scale, interpolation=cv.INTER_LINEAR_EXACT) if is_seam_scale_set is False: - seam_scale = min(1.0, np.sqrt(seam_megapix * 1e6 / (full_img.shape[0] * full_img.shape[1]))) + if seam_megapix > 0: + seam_scale = min(1.0, np.sqrt(seam_megapix * 1e6 / (full_img.shape[0] * full_img.shape[1]))) + else: + seam_scale = 1.0 seam_work_aspect = seam_scale / work_scale is_seam_scale_set = True img_feat = cv.detail.computeImageFeatures2(finder, img) @@ -345,9 +348,9 @@ def main(): img_names_subset = [] full_img_sizes_subset = [] for i in range(len(indices)): - img_names_subset.append(img_names[indices[i, 0]]) - img_subset.append(images[indices[i, 0]]) - full_img_sizes_subset.append(full_img_sizes[indices[i, 0]]) + img_names_subset.append(img_names[indices[i]]) + img_subset.append(images[indices[i]]) + full_img_sizes_subset.append(full_img_sizes[indices[i]]) images = img_subset img_names = img_names_subset full_img_sizes = full_img_sizes_subset @@ -479,7 +482,7 @@ def main(): blender = cv.detail.Blender_createDefault(cv.detail.Blender_NO) elif blend_type == "multiband": blender = cv.detail_MultiBandBlender() - blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int)) + blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int32)) elif blend_type == "feather": blender = cv.detail_FeatherBlender() blender.setSharpness(1. / blend_width) @@ -513,6 +516,5 @@ def main(): if __name__ == '__main__': - print(__doc__) main() cv.destroyAllWindows() From 1ae2320e09ee8f01c41f4b6d5e63efa51871116b Mon Sep 17 00:00:00 2001 From: masa-iwm Date: Thu, 3 Mar 2022 07:21:53 +0900 Subject: [PATCH 12/84] bugfix convertFromD3D11Texture2D --- modules/core/src/directx.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp index 2dbc3e2763..7b423cc29f 100644 --- a/modules/core/src/directx.cpp +++ b/modules/core/src/directx.cpp @@ -1385,6 +1385,7 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst OpenCL_D3D11_NV* impl_nv = ctx.getUserContext().get(); if (impl_nv) { __convertFromD3D11Texture2DNV(pD3D11Texture2D,dst); + return; } #endif OpenCL_D3D11* impl = ctx.getUserContext().get(); From 057c3da82ab132d2c7dad5351a2184a01f900a1a Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Fri, 4 Mar 2022 11:57:14 +0100 Subject: [PATCH 13/84] Allow Matx static function to work with Vec. --- modules/core/include/opencv2/core/matx.hpp | 20 +++++++++++++++++++ .../core/include/opencv2/core/operations.hpp | 16 +++++++++++++++ modules/core/test/test_mat.cpp | 10 ++++++++++ 3 files changed, 46 insertions(+) diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index f25c8bce57..be1c26bb64 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -378,6 +378,14 @@ public: Vec(const Vec<_Tp, cn>& v); static Vec all(_Tp alpha); + static Vec ones(); + static Vec randn(_Tp a, _Tp b); + static Vec randu(_Tp a, _Tp b); + static Vec zeros(); +#ifdef CV_CXX11 + static Vec diag(_Tp alpha) = delete; + static Vec eye() = delete; +#endif //! per-element multiplication Vec mul(const Vec<_Tp, cn>& v) const; @@ -1063,6 +1071,18 @@ Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha) return v; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::ones() +{ + return Vec::all(1); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::zeros() +{ + return Vec::all(0); +} + template inline Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const { diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp index ef1808a8a1..4a393f51ba 100644 --- a/modules/core/include/opencv2/core/operations.hpp +++ b/modules/core/include/opencv2/core/operations.hpp @@ -220,6 +220,22 @@ Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b) return M; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randu(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randu(V, Scalar(a), Scalar(b)); + return V; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randn(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randn(V, Scalar(a), Scalar(b)); + return V; +} + template inline Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const { diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 041e7d979f..a264330bf3 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2381,4 +2381,14 @@ TEST(Mat, ptrVecni_20044) EXPECT_EQ(int(6), *(ci)); } +TEST(Mat, VecMatx_4650) +{ + // Makes sure the following compiles. + cv::Vec3b a; + a = cv::Vec3b::ones(); + a = cv::Vec3b::zeros(); + a = cv::Vec3b::randn(0, 10); + a = cv::Vec3b::randu(0, 10); +} + }} // namespace From 5be5efdacfeb3db855ada3aa84feec6b5d8e1de6 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Fri, 4 Mar 2022 21:57:49 +0300 Subject: [PATCH 14/84] GAPI Fluid: Dynamic dispatching for Add kernel. --- .../perf/cpu/gapi_core_perf_tests_fluid.cpp | 4 +- .../gapi/src/backends/fluid/gfluidcore.cpp | 157 ++------------ .../fluid/gfluidcore_func.dispatch.cpp | 27 +++ .../src/backends/fluid/gfluidcore_func.hpp | 22 ++ .../backends/fluid/gfluidcore_func.simd.hpp | 194 ++++++++++++++++++ 5 files changed, 263 insertions(+), 141 deletions(-) diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index a142109315..d91ce65fff 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -28,8 +28,8 @@ INSTANTIATE_TEST_CASE_P(SqrtPerfTestFluid, SqrtPerfTest, INSTANTIATE_TEST_CASE_P(AddPerfTestFluid, AddPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(AddCPerfTestFluid, AddCPerfTest, diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index bb33c45d85..866381f39b 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -379,136 +379,6 @@ CV_ALWAYS_INLINE int absdiff_simd(const T in1[], const T in2[], T out[], int len return 0; } -template -CV_ALWAYS_INLINE int add_simd_sametype(const T in1[], const T in2[], T out[], int length) -{ - constexpr int nlanes = static_cast(VT::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - VT a = vx_load(&in1[x]); - VT b = vx_load(&in2[x]); - vx_store(&out[x], a + b); - } - - if (x < length && (in1 != out) && (in2 != out)) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; -} - -template -CV_ALWAYS_INLINE int add_simd(const SRC in1[], const SRC in2[], DST out[], int length) -{ - if (std::is_same::value && !std::is_same::value) - return 0; - - if (std::is_same::value) - { - if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_int16 a1 = vx_load(reinterpret_cast(&in1[x])); - v_int16 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 2])); - v_int16 b1 = vx_load(reinterpret_cast(&in2[x])); - v_int16 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 2])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(a1 + b1, a2 + b2)); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vx_load(reinterpret_cast(&in1[x])); - v_float32 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 4])); - v_float32 a3 = vx_load(reinterpret_cast(&in1[x + 2 * nlanes / 4])); - v_float32 a4 = vx_load(reinterpret_cast(&in1[x + 3 * nlanes / 4])); - - v_float32 b1 = vx_load(reinterpret_cast(&in2[x])); - v_float32 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 4])); - v_float32 b3 = vx_load(reinterpret_cast(&in2[x + 2 * nlanes / 4])); - v_float32 b4 = vx_load(reinterpret_cast(&in2[x + 3 * nlanes / 4])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(v_pack(v_round(a1 + b1), v_round(a2 + b2)), - v_pack(v_round(a3 + b3), v_round(a4 + b4)))); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - - return 0; -} - template CV_ALWAYS_INLINE int sub_simd_sametype(const T in1[], const T in2[], T out[], int length) { @@ -641,7 +511,7 @@ CV_ALWAYS_INLINE int sub_simd(const SRC in1[], const SRC in2[], DST out[], int l #endif // CV_SIMD template -static CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const View &src2, +CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm arithm, double scale=1) { static_assert(std::is_same::value, "wrong types"); @@ -652,7 +522,7 @@ static CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const Vie int width = dst.length(); int chan = dst.meta().chan; - int length = width * chan; + const int length = width * chan; // NB: assume in/out types are not 64-bits float _scale = static_cast( scale ); @@ -708,13 +578,22 @@ GAPI_FLUID_KERNEL(GFluidAdd, cv::gapi::core::GAdd, false) static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst) { // DST SRC1 SRC2 OP __VA_ARGS__ - BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index a682fb7dbb..d80a6b29c0 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -290,6 +290,33 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], CV_CPU_DISPATCH_MODES_ALL); } +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + \ + CV_CPU_DISPATCH(add_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 975383a8d9..052adbe2fd 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -222,6 +222,28 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index b92d92d0cf..4c324daa25 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -231,6 +231,28 @@ DIVRC_SIMD(float, float) #undef DIVRC_SIMD +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -2503,6 +2525,178 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], return x; } +//------------------------- +// +// Fluid kernels: Add +// +//------------------------- + +CV_ALWAYS_INLINE void add_uchar_store(uchar* outx, const v_uint16& c1, const v_uint16& c2) +{ + vx_store(outx, v_pack(c1, c2)); +} + +CV_ALWAYS_INLINE void add_uchar_store(uchar* outx, const v_int16& c1, const v_int16& c2) +{ + vx_store(outx, v_pack_u(c1, c2)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +add_simd_impl(const SRC* in1x, const SRC* in2x, DST* outx) +{ + vector_type_of_t a = vx_load(in1x); + vector_type_of_t b = vx_load(in2x); + vx_store(outx, a + b); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +add_simd_impl(const SRC* in1x, const SRC* in2x, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + vector_type_of_t a1 = vx_load(in1x); + vector_type_of_t a2 = vx_load(&in1x[nlanes / 2]); + vector_type_of_t b1 = vx_load(in2x); + vector_type_of_t b2 = vx_load(&in2x[nlanes / 2]); + + add_uchar_store(outx, a1 + b1, a2 + b2); +} + +CV_ALWAYS_INLINE void add_simd_impl(const float* in1x, const float* in2x, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 a1 = vx_load(in1x); + v_float32 a2 = vx_load(&in1x[nlanes / 4]); + v_float32 a3 = vx_load(&in1x[2 * nlanes / 4]); + v_float32 a4 = vx_load(&in1x[3 * nlanes / 4]); + + v_float32 b1 = vx_load(in2x); + v_float32 b2 = vx_load(&in2x[nlanes / 4]); + v_float32 b3 = vx_load(&in2x[2 * nlanes / 4]); + v_float32 b4 = vx_load(&in2x[3 * nlanes / 4]); + + vx_store(outx, v_pack_u(v_pack(v_round(a1 + b1), v_round(a2 + b2)), + v_pack(v_round(a3 + b3), v_round(a4 + b4)))); +} + +CV_ALWAYS_INLINE void add_simd_impl(const uchar* in1x, const uchar* in2x, short* outx) +{ + v_int16 a = v_reinterpret_as_s16(vx_load_expand(in1x)); + v_int16 b = v_reinterpret_as_s16(vx_load_expand(in2x)); + + vx_store(outx, a + b); +} + +CV_ALWAYS_INLINE void add_simd_impl(const uchar* in1x, const uchar* in2x, ushort* outx) +{ + v_uint16 a = vx_load_expand(in1x); + v_uint16 b = vx_load_expand(in2x); + + vx_store(outx, a + b); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +add_simd_impl(const float* in1x, const float* in2x, DST* outx) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + v_float32 a1 = vx_load(in1x); + v_float32 a2 = vx_load(&in1x[nlanes/2]); + v_float32 b1 = vx_load(in2x); + v_float32 b2 = vx_load(&in2x[nlanes/2]); + + v_store_i16(outx, v_round(a1 + b1), v_round(a2 + b2)); +} + +CV_ALWAYS_INLINE void add_simd_impl(const short* in1x, const short* in2x, ushort* outx) +{ + v_int16 a = vx_load(in1x); + v_int32 a1 = v_expand_low(a); + v_int32 a2 = v_expand_high(a); + + v_int16 b = vx_load(in2x); + v_int32 b1 = v_expand_low(b); + v_int32 b2 = v_expand_high(b); + + vx_store(outx, v_pack_u(a1 + b1, a2 + b2)); +} + +CV_ALWAYS_INLINE void add_simd_impl(const ushort* in1x, const ushort* in2x, short* outx) +{ + v_uint16 a = vx_load(in1x); + v_uint32 a1 = v_expand_low(a); + v_uint32 a2 = v_expand_high(a); + + v_uint16 b = vx_load(in2x); + v_uint32 b1 = v_expand_low(b); + v_uint32 b2 = v_expand_high(b); + + vx_store(outx, v_reinterpret_as_s16(v_pack(a1 + b1, a2 + b2))); +} + +template +CV_ALWAYS_INLINE void add_simd_impl(const SRC* in1x, const SRC* in2x, float* outx) +{ + v_float32 a = vg_load_f32(in1x); + v_float32 b = vg_load_f32(in2x); + + vx_store(outx, a + b); +} + +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + \ + if (length < nlanes) \ + return 0; \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= length - nlanes; x += nlanes) \ + { \ + add_simd_impl(&in1[x], &in2[x], &out[x]); \ + } \ + \ + if (x < length) \ + { \ + x = length - nlanes; \ + continue; \ + } \ + break; \ + } \ + \ + return x; \ +} + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END From 44c2c77548fcece6a8f4e37c6542163fadf1f397 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Sat, 5 Mar 2022 13:50:40 +0300 Subject: [PATCH 15/84] Merge pull request #21658 from sivanov-work:vpp_core_add_roi G-API: Add ROI processing in VPP preproc * Add ROI in VPP prepro * Apply comments --- .../gapi_streaming_source_perf_tests.cpp | 6 +- .../onevpl/engine/preproc/preproc_engine.cpp | 48 ++-- .../onevpl/engine/preproc/preproc_engine.hpp | 3 +- .../onevpl/engine/preproc/preproc_session.cpp | 18 ++ .../onevpl/engine/preproc/preproc_session.hpp | 12 + .../engine/preproc_engine_interface.hpp | 3 +- .../gapi_streaming_vpp_preproc_test.cpp | 217 +++++++++++------- 7 files changed, 206 insertions(+), 101 deletions(-) diff --git a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp index 513d4d1f56..eacdef6eba 100644 --- a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp +++ b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp @@ -205,6 +205,7 @@ PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Test, TestPerformance) } VPPPreprocEngine preproc_engine(std::move(policy)); cv::gapi::wip::Data out; + cv::util::optional empty_roi; TEST_CYCLE() { source_ptr->pull(out); @@ -212,7 +213,7 @@ PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Test, TestPerformance) cv::util::optional param = preproc_engine.is_applicable(frame); pp_session sess = preproc_engine.initialize_preproc(param.value(), required_frame_param); - (void)preproc_engine.run_sync(sess, frame); + (void)preproc_engine.run_sync(sess, frame, empty_roi); } SANITY_CHECK_NOTHING(); @@ -269,6 +270,7 @@ PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Bypass_Test, TestPerformance) } VPPPreprocEngine preproc_engine(std::move(policy)); cv::gapi::wip::Data out; + cv::util::optional empty_roi; TEST_CYCLE() { source_ptr->pull(out); @@ -276,7 +278,7 @@ PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Bypass_Test, TestPerformance) cv::util::optional param = preproc_engine.is_applicable(frame); pp_session sess = preproc_engine.initialize_preproc(param.value(), required_frame_param); - (void)preproc_engine.run_sync(sess, frame); + (void)preproc_engine.run_sync(sess, frame, empty_roi); } SANITY_CHECK_NOTHING(); diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp index 1fb9bd4195..7de363fad5 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -34,6 +34,21 @@ bool FrameInfoComparator::equal_to(const mfxFrameInfo& lhs, const mfxFrameInfo& return lhs == rhs; } +void apply_roi(mfxFrameSurface1* surface_handle, + const cv::util::optional &opt_roi) { + if (opt_roi.has_value()) { + const cv::Rect &roi = opt_roi.value(); + surface_handle->Info.CropX = static_cast(roi.x); + surface_handle->Info.CropY = static_cast(roi.y); + surface_handle->Info.CropW = static_cast(roi.width); + surface_handle->Info.CropH = static_cast(roi.height); + GAPI_LOG_DEBUG(nullptr, "applied ROI {" << surface_handle->Info.CropX << + ", " << surface_handle->Info.CropY << "}, " + "{ " << surface_handle->Info.CropX + surface_handle->Info.CropW << + ", " << surface_handle->Info.CropY + surface_handle->Info.CropH << "}"); + } +} + VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& accel) : ProcessingEngineBase(std::move(accel)) { GAPI_LOG_INFO(nullptr, "Create VPP preprocessing engine"); @@ -57,31 +72,25 @@ VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& acce my_sess.sync_in_queue.pop(); auto *vpp_suface = my_sess.processing_surface_ptr.lock()->get_handle(); - /* TODO: consider CROP/ROI here - static int x_offset = 0; - static int y_offset = 0; - dec_surface->Info.CropX = x_offset; - dec_surface->Info.CropY = y_offset; - dec_surface->Info.CropW = 100 + x_offset++; - dec_surface->Info.CropH = 100 + y_offset++; - */ - session_type::outgoing_task vpp_pending_op {pending_op.sync_handle, nullptr}; + apply_roi(pending_op.decoded_surface_ptr, pending_op.roi); + + mfxSyncPoint vpp_sync_handle{}; my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, pending_op.decoded_surface_ptr, vpp_suface, - nullptr, &vpp_pending_op.sync_handle); - vpp_pending_op.vpp_surface_ptr = vpp_suface; - + nullptr, &vpp_sync_handle); + session_type::outgoing_task vpp_pending_op {vpp_sync_handle, + vpp_suface, + std::move(pending_op) }; GAPI_LOG_DEBUG(nullptr, "Got VPP async operation" << ", sync id: " << vpp_pending_op.sync_handle << ", dec surface: " << - pending_op.decoded_surface_ptr << + vpp_pending_op.original_surface_ptr << ", trans surface: " << vpp_pending_op.vpp_surface_ptr << ", status: " << mfxstatus_to_string(my_sess.last_status)); - // NB: process status if (my_sess.last_status == MFX_ERR_MORE_SURFACE || my_sess.last_status == MFX_ERR_NONE) { @@ -131,6 +140,7 @@ VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& acce // put frames in ready queue on success if (MFX_ERR_NONE == sess.last_status) { + pending_op.release_frame(); on_frame_ready(my_sess, pending_op.vpp_surface_ptr); } } @@ -327,8 +337,8 @@ VPPPreprocEngine::initialize_session(mfxSession, return {}; } -cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::MediaFrame& in_frame) { - +cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::MediaFrame& in_frame, + const cv::util::optional &roi) { std::shared_ptr pp_sess_impl = sess.get(); if (!pp_sess_impl) { // bypass case @@ -347,8 +357,10 @@ cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::Medi // schedule decoded surface into preproc queue session_type::incoming_task in_preproc_request {nullptr, - vpl_adapter->get_surface()->get_handle(), - in_frame}; + vpl_adapter->get_surface()->get_handle(), + vpl_adapter->get_surface()->get_info(), + in_frame, + roi}; s->sync_in_queue.emplace(in_preproc_request); // invoke pipeline to transform decoded surface into preprocessed surface diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp index c4be48708a..b1d0cee264 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp @@ -47,7 +47,8 @@ public: const GFrameDesc& required_frame_descr) override; cv::MediaFrame run_sync(const pp_session &session_handle, - const cv::MediaFrame& in_frame) override; + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) override; private: std::map preproc_session_map; diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp index 059b7caea7..2695a26049 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp @@ -60,6 +60,24 @@ void VPPPreprocSession::init_surface_pool(VPLAccelerationPolicy::pool_key_t key) const mfxFrameInfo& VPPPreprocSession::get_video_param() const { return mfx_vpp_out_param.vpp.Out; } + +VPPPreprocSession::outgoing_task::outgoing_task(mfxSyncPoint acquired_sync_handle, + mfxFrameSurface1* acquired_surface_ptr, + VPPPreprocSession::incoming_task &&in) : + sync_handle(acquired_sync_handle), + vpp_surface_ptr(acquired_surface_ptr), + original_surface_ptr(in.decoded_surface_ptr), + original_frame_info(std::move(in.decoded_frame_info)), + original_frame(in.decoded_frame_copy) { +} + +void VPPPreprocSession::outgoing_task::release_frame() { + // restore initial surface params + memcpy(&(original_surface_ptr->Info), + &original_frame_info, sizeof(Surface::info_t)); + // release references on frame adapter + original_frame = cv::MediaFrame(); +} } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp index 1f873fda56..b6800c3f76 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp @@ -41,12 +41,24 @@ private: struct incoming_task { mfxSyncPoint sync_handle; mfxFrameSurface1* decoded_surface_ptr; + Surface::info_t decoded_frame_info; cv::MediaFrame decoded_frame_copy; + cv::util::optional roi; }; struct outgoing_task { + outgoing_task() = default; + outgoing_task(mfxSyncPoint acquired_sync_handle, + mfxFrameSurface1* acquired_surface_ptr, + incoming_task &&in); mfxSyncPoint sync_handle; mfxFrameSurface1* vpp_surface_ptr; + + mfxFrameSurface1* original_surface_ptr; + void release_frame(); + private: + Surface::info_t original_frame_info; + cv::MediaFrame original_frame; }; std::queue sync_in_queue; diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp index 4997a04562..be347a258f 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp @@ -27,7 +27,8 @@ struct IPreprocEngine { initialize_preproc(const pp_params& initial_frame_param, const GFrameDesc& required_frame_descr) = 0; virtual cv::MediaFrame - run_sync(const pp_session &sess, const cv::MediaFrame& in_frame) = 0; + run_sync(const pp_session &sess, const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi = {}) = 0; }; } // namespace wip } // namespace gapi diff --git a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp index c43dfa9496..a0a66c7b93 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp @@ -181,6 +181,8 @@ using acceleration_t = int; using out_frame_info_t = cv::GFrameDesc; using preproc_args_t = std::tuple; +static cv::util::optional empty_roi; + class VPPPreprocParams : public ::testing::TestWithParam {}; preproc_args_t files[] = { @@ -246,7 +248,9 @@ TEST(OneVPL_Source_PreprocEngine, functional_single_thread) required_frame_param); // 2) make preproc using incoming decoded frame & preproc session - cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, first_decoded_frame); + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + empty_roi); cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); ASSERT_FALSE(first_frame_decoded_desc == first_outcome_pp_desc); @@ -278,7 +282,9 @@ TEST(OneVPL_Source_PreprocEngine, functional_single_thread) ASSERT_EQ(pp_sess.get().get(), first_pp_sess.get().get()); - cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame); + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, + decoded_frame, + empty_roi); cv::GFrameDesc pp_desc = pp_frame.desc(); ASSERT_TRUE(pp_desc == first_outcome_pp_desc); in_progress = false; @@ -291,8 +297,95 @@ TEST(OneVPL_Source_PreprocEngine, functional_single_thread) ASSERT_NE(frames_processed_count, 1); } +void decode_function(cv::gapi::wip::onevpl::VPLLegacyDecodeEngine &decode_engine, + cv::gapi::wip::onevpl::ProcessingEngineBase::session_ptr sess_ptr, + SafeQueue &queue, size_t &decoded_number) { + // decode first frame + { + cv::MediaFrame decoded_frame; + ASSERT_NO_THROW(decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); + queue.push(std::move(decoded_frame)); + } -TEST_P(VPPPreprocParams, functional_different_threads) + // launch pipeline + try { + while(true) { + queue.push(extract_decoded_frame(sess_ptr->session, decode_engine)); + decoded_number++; + } + } catch (...) {} + + // send stop + queue.push_stop(); +} + +void preproc_function(cv::gapi::wip::onevpl::VPPPreprocEngine &preproc_engine, SafeQueue&queue, + size_t &preproc_number, const out_frame_info_t &required_frame_param, + const cv::util::optional &roi_rect = {}) { + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + // create preproc session based on frame description & network info + cv::MediaFrame first_decoded_frame = queue.pop(); + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = + preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + roi_rect); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // launch pipeline + bool in_progress = false; + // let's allow counting of preprocessed frames to check this value later: + // Currently, it looks redundant to implement any kind of gracefull shutdown logic + // in this test - so let's apply agreement that media source is processed + // succesfully when preproc_number != 1 in result. + // Specific validation logic which adhere to explicit counter value may be implemented + // in particular test scope + preproc_number = 1; + try { + while(true) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + break; + } + in_progress = true; + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + ASSERT_TRUE(0 == memcmp(¶ms.value(), &first_pp_params.value(), sizeof(pp_params))); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + ASSERT_EQ(pp_sess.get().get(), + first_pp_sess.get().get()); + + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, empty_roi); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + preproc_number++; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(preproc_number, 1); +} + +using roi_t = cv::util::optional; +using preproc_roi_args_t = decltype(std::tuple_cat(std::declval(), + std::declval>())); +class VPPPreprocROIParams : public ::testing::TestWithParam {}; +TEST_P(VPPPreprocROIParams, functional_roi_different_threads) { using namespace cv::gapi::wip; using namespace cv::gapi::wip::onevpl; @@ -300,7 +393,8 @@ TEST_P(VPPPreprocParams, functional_different_threads) decoder_t decoder_id; acceleration_t accel; out_frame_info_t required_frame_param; - std::tie(file_path, decoder_id, accel, required_frame_param) = GetParam(); + roi_t opt_roi; + std::tie(file_path, decoder_id, accel, required_frame_param, opt_roi) = GetParam(); file_path = findDataFile(file_path); @@ -338,87 +432,52 @@ TEST_P(VPPPreprocParams, functional_different_threads) size_t decoded_number = 1; size_t preproc_number = 0; - std::thread decode_thread([&decode_engine, sess_ptr, - &queue, &decoded_number] () { - // decode first frame - { - cv::MediaFrame decoded_frame; - ASSERT_NO_THROW(decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); - queue.push(std::move(decoded_frame)); - } - - // launch pipeline - try { - while(true) { - queue.push(extract_decoded_frame(sess_ptr->session, decode_engine)); - decoded_number++; - } - } catch (...) {} - - // send stop - queue.push_stop(); - }); - - std::thread preproc_thread([&preproc_engine, &queue, &preproc_number, required_frame_param] () { - // create preproc session based on frame description & network info - cv::MediaFrame first_decoded_frame = queue.pop(); - cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); - ASSERT_TRUE(first_pp_params.has_value()); - pp_session first_pp_sess = - preproc_engine.initialize_preproc(first_pp_params.value(), required_frame_param); - - // make preproc using incoming decoded frame & preproc session - cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, first_decoded_frame); - cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); - - // do not hold media frames because they share limited DX11 surface pool resources - first_decoded_frame = cv::MediaFrame(); - first_pp_frame = cv::MediaFrame(); - - // launch pipeline - bool in_progress = false; - // let's allow counting of preprocessed frames to check this value later: - // Currently, it looks redundant to implement any kind of gracefull shutdown logic - // in this test - so let's apply agreement that media source is processed - // succesfully when preproc_number != 1 in result - preproc_number = 1; - try { - while(true) { - cv::MediaFrame decoded_frame = queue.pop(); - if (SafeQueue::is_stop(decoded_frame)) { - break; - } - in_progress = true; - - cv::util::optional params = preproc_engine.is_applicable(decoded_frame); - ASSERT_TRUE(params.has_value()); - ASSERT_TRUE(0 == memcmp(¶ms.value(), &first_pp_params.value(), sizeof(pp_params))); - - pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), - required_frame_param); - ASSERT_EQ(pp_sess.get().get(), - first_pp_sess.get().get()); - - cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame); - cv::GFrameDesc pp_desc = pp_frame.desc(); - ASSERT_TRUE(pp_desc == first_outcome_pp_desc); - in_progress = false; - preproc_number++; - } - } catch (...) {} - - // test if interruption has happened - ASSERT_FALSE(in_progress); - ASSERT_NE(preproc_number, 1); - }); + std::thread decode_thread(decode_function, std::ref(decode_engine), sess_ptr, + std::ref(queue), std::ref(decoded_number)); + std::thread preproc_thread(preproc_function, std::ref(preproc_engine), + std::ref(queue), std::ref(preproc_number), + std::cref(required_frame_param), + std::cref(opt_roi)); decode_thread.join(); preproc_thread.join(); ASSERT_EQ(preproc_number, decoded_number); } -INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocEngine, VPPPreprocParams, - testing::ValuesIn(files)); +preproc_roi_args_t files_w_roi[] = { + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{0,0,50,50}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{0,0,100,100}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{100,100,200,200}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{cv::Rect{0,0,100,100}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{cv::Rect{100,100,200,200}}} +}; + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocEngineROI, VPPPreprocROIParams, + testing::ValuesIn(files_w_roi)); + using VPPInnerPreprocParams = VPPPreprocParams; TEST_P(VPPInnerPreprocParams, functional_inner_preproc_size) From ecb30409f6aa74057e90ceccbe10146d0cbbd111 Mon Sep 17 00:00:00 2001 From: Ruslan Garnov Date: Tue, 1 Mar 2022 15:09:05 +0300 Subject: [PATCH 16/84] Added reshape() functionality to CPU backend --- modules/gapi/src/backends/cpu/gcpubackend.cpp | 43 ++++++++--- modules/gapi/src/backends/cpu/gcpubackend.hpp | 17 ++--- .../gapi/src/backends/fluid/gfluidbackend.cpp | 4 +- .../cpu/gapi_ocv_stateful_kernel_tests.cpp | 74 ++++++++++++++++++- .../test/internal/gapi_int_executor_tests.cpp | 26 ++++++- 5 files changed, 140 insertions(+), 24 deletions(-) diff --git a/modules/gapi/src/backends/cpu/gcpubackend.cpp b/modules/gapi/src/backends/cpu/gcpubackend.cpp index dfcaf3d478..b1e716f3ba 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.cpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "precomp.hpp" @@ -88,7 +88,7 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g, { case NodeType::OP: { - m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); + m_opNodes.push_back(nh); // If kernel is stateful then prepare storage for its state. GCPUKernel k = gcm.metadata(nh).get().k; @@ -107,19 +107,12 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g, auto rc = RcDesc{desc.rc, desc.shape, desc.ctor}; magazine::bindInArg(m_res, rc, m_gm.metadata(nh).get().arg); } - //preallocate internal Mats in advance - if (desc.storage == Data::Storage::INTERNAL && desc.shape == GShape::GMAT) - { - const auto mat_desc = util::get(desc.meta); - auto& mat = m_res.slot()[desc.rc]; - createMat(mat_desc, mat); - } break; } default: util::throw_error(std::logic_error("Unsupported NodeType type")); } } - + makeReshape(); // For each stateful kernel call 'setup' user callback to initialize state. setupKernelStates(); } @@ -176,8 +169,38 @@ void cv::gimpl::GCPUExecutable::setupKernelStates() } } +void cv::gimpl::GCPUExecutable::makeReshape() { + // Prepare the execution script + m_script.clear(); + for (auto &nh : m_opNodes) { + m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); + } + + // Preallocate internal mats + for (auto& nh : m_dataNodes) { + const auto& desc = m_gm.metadata(nh).get(); + if (desc.storage == Data::Storage::INTERNAL && desc.shape == GShape::GMAT) { + const auto mat_desc = util::get(desc.meta); + auto& mat = m_res.slot()[desc.rc]; + createMat(mat_desc, mat); + } + } +} + +void cv::gimpl::GCPUExecutable::reshape(ade::Graph&, const GCompileArgs& args) { + m_compileArgs = args; + makeReshape(); + // Signal to reset stateful kernels` state. + // There can be no handleNewStream() call to set this flag + // if user didn't call GCompiled`s prepareForNewStream() + m_newStreamStarted = true; +} + void cv::gimpl::GCPUExecutable::handleNewStream() { + // Signal to reset stateful kernels` state. + // No need to call reshape() here since it'll + // be called automatically if input meta was changed m_newStreamStarted = true; } diff --git a/modules/gapi/src/backends/cpu/gcpubackend.hpp b/modules/gapi/src/backends/cpu/gcpubackend.hpp index 6328da03b0..6a7b41e3d4 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.hpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #ifndef OPENCV_GAPI_GCPUBACKEND_HPP @@ -33,7 +33,7 @@ class GCPUExecutable final: public GIslandExecutable { const ade::Graph &m_g; GModel::ConstGraph m_gm; - const cv::GCompileArgs m_compileArgs; + cv::GCompileArgs m_compileArgs; struct OperationInfo { @@ -51,6 +51,7 @@ class GCPUExecutable final: public GIslandExecutable // List of all resources in graph (both internal and external) std::vector m_dataNodes; + std::vector m_opNodes; // Actual data of all resources in graph (both internal and external) Mag m_res; @@ -61,19 +62,15 @@ class GCPUExecutable final: public GIslandExecutable GArg packArg(const GArg &arg); void setupKernelStates(); + void makeReshape(); + public: GCPUExecutable(const ade::Graph &graph, const cv::GCompileArgs &compileArgs, const std::vector &nodes); - virtual inline bool canReshape() const override { return false; } - virtual inline void reshape(ade::Graph&, const GCompileArgs&) override - { - // FIXME: CPU plugin is in fact reshapeable (as it was initially, - // even before outMeta() has been introduced), so this limitation - // should be dropped. - util::throw_error(std::logic_error("GCPUExecutable::reshape() should never be called")); - } + virtual inline bool canReshape() const override { return true; } + virtual void reshape(ade::Graph&, const GCompileArgs&) override; virtual void handleNewStream() override; diff --git a/modules/gapi/src/backends/fluid/gfluidbackend.cpp b/modules/gapi/src/backends/fluid/gfluidbackend.cpp index 0e33ca9c0f..ed4dda7d49 100644 --- a/modules/gapi/src/backends/fluid/gfluidbackend.cpp +++ b/modules/gapi/src/backends/fluid/gfluidbackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "precomp.hpp" @@ -954,7 +954,7 @@ namespace GFluidModel fg(graph); for (const auto& node : g.nodes()) { - if (g.metadata(node).get().t == NodeType::DATA) + if (fg.metadata(node).contains()) { auto& fd = fg.metadata(node).get(); fd.latency = 0; diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp index 239afc38c4..cf03430d55 100644 --- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp +++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2022 Intel Corporation #include "gapi_ocv_stateful_kernel_test_utils.hpp" #include @@ -342,7 +342,79 @@ TEST(StatefulKernel, StateIsInitViaCompArgsInStreaming) // Allowing 5% difference of all pixels between G-API and reference OpenCV results testBackSubInStreaming(gapiBackSub, 5); } + +TEST(StatefulKernel, StateIsChangedViaCompArgsOnReshape) +{ + cv::GMat in; + cv::GComputation comp(in, GBackSub::on(in)); + + const auto pkg = cv::gapi::kernels(); + + // OpenCV reference substractor + auto pOCVBackSubKNN = createBackgroundSubtractorKNN(); + auto pOCVBackSubMOG2 = createBackgroundSubtractorMOG2(); + + const auto run = [&](const std::string& videoPath, const std::string& method) { + auto path = findDataFile(videoPath); + cv::gapi::wip::IStreamSource::Ptr source; + try { + source = gapi::wip::make_src(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + cv::Mat inMat, gapiForeground, ocvForeground; + + for (int i = 0; i < 10; i++) { + cv::gapi::wip::Data inData; + source->pull(inData); + inMat = cv::util::get(inData); + comp.apply(inMat, gapiForeground, + cv::compile_args(pkg, BackSubStateParams{method})); + + if (method == "knn") { + pOCVBackSubKNN->apply(inMat, ocvForeground, -1); + // Allowing 1% difference among all pixels + compareBackSubResults(gapiForeground, ocvForeground, 1); + } else if (method == "mog2") { + pOCVBackSubMOG2->apply(inMat, ocvForeground, -1); + compareBackSubResults(gapiForeground, ocvForeground, 5); + } else { + CV_Assert(false && "Unknown BackSub method"); + } + } + }; + + run("cv/video/768x576.avi", "knn"); + run("cv/video/1920x1080.avi", "mog2"); +} #endif + +TEST(StatefulKernel, StateIsAutoResetOnReshape) +{ + cv::GMat in; + cv::GOpaque up_to_date = GIsStateUpToDate::on(in); + cv::GOpaque calls_count = GCountCalls::on(in); + cv::GComputation comp(cv::GIn(in), cv::GOut(up_to_date, calls_count)); + + auto run = [&comp](const cv::Mat& in_mat) { + const auto pkg = cv::gapi::kernels(); + bool stateIsUpToDate = false; + int callsCount = 0; + for (int i = 0; i < 3; i++) { + comp.apply(cv::gin(in_mat), cv::gout(stateIsUpToDate, callsCount), + cv::compile_args(pkg)); + EXPECT_TRUE(stateIsUpToDate); + EXPECT_EQ(i+1, callsCount); + } + }; + + cv::Mat in_mat1(32, 32, CV_8UC1); + run(in_mat1); + + cv::Mat in_mat2(16, 16, CV_8UC1); + run(in_mat2); +} + //------------------------------------------------------------------------------------------------------------- diff --git a/modules/gapi/test/internal/gapi_int_executor_tests.cpp b/modules/gapi/test/internal/gapi_int_executor_tests.cpp index 4745213909..b8f0e18e0b 100644 --- a/modules/gapi/test/internal/gapi_int_executor_tests.cpp +++ b/modules/gapi/test/internal/gapi_int_executor_tests.cpp @@ -2,12 +2,14 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "../test_precomp.hpp" #include "../gapi_mock_kernels.hpp" +#include + namespace opencv_test { @@ -294,6 +296,28 @@ TEST_F(GExecutorReshapeTest, ReshapeCallAllocate) EXPECT_EQ(1, island1.getReshapeCounter()); } +TEST_F(GExecutorReshapeTest, CPUBackendIsReshapable) +{ + comp = cv::GComputation([](){ + cv::GMat in; + cv::GMat foo = I::Foo::on(in); + cv::GMat out = cv::gapi::bitwise_not(cv::gapi::bitwise_not(in)); + return cv::GComputation(cv::GIn(in), cv::GOut(foo, out)); + }); + // NB: Initial state + EXPECT_EQ(0, island1.getReshapeCounter()); + + // NB: First compilation. + cv::Mat out_mat2; + comp.apply(cv::gin(in_mat1), cv::gout(out_mat, out_mat2), cv::compile_args(pkg)); + EXPECT_EQ(0, island1.getReshapeCounter()); + + // NB: The entire graph is reshapable, so it won't be recompiled, but reshaped. + comp.apply(cv::gin(in_mat2), cv::gout(out_mat, out_mat2), cv::compile_args(pkg)); + EXPECT_EQ(1, island1.getReshapeCounter()); + EXPECT_EQ(0, cvtest::norm(out_mat2, in_mat2, NORM_INF)); +} + // FIXME: Add explicit tests on GMat/GScalar/GArray being connectors // between executed islands From 8db7d435b9cd1082ac17f2872e1fdf3afb948877 Mon Sep 17 00:00:00 2001 From: Tsukasa Sugiura Date: Tue, 8 Mar 2022 05:26:15 +0900 Subject: [PATCH 17/84] Merge pull request #21692 from UnaNancyOwen:add_softmax * add apply softmax option to ClassificationModel * remove default arguments of ClassificationModel::setSoftMax() * fix build for python * fix docs warning for setSoftMax() * add impl for ClassficationModel() * fix failed build for docs by trailing whitespace * move to implement classify() to ClassificationModel_Impl * move to implement softmax() to ClassificationModel_Impl * remove softmax from public method in ClassificationModel --- modules/dnn/include/opencv2/dnn/dnn.hpp | 21 ++++++ modules/dnn/src/model.cpp | 89 ++++++++++++++++++++++--- 2 files changed, 99 insertions(+), 11 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 97033a313e..67042a14b7 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1310,6 +1310,9 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model { public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + ClassificationModel(); + /** * @brief Create classification model from network represented in one of the supported formats. * An order of @p model and @p config arguments does not matter. @@ -1324,6 +1327,24 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP ClassificationModel(const Net& network); + /** + * @brief Set enable/disable softmax post processing option. + * + * If this option is true, softmax is applied after forward inference within the classify() function + * to convert the confidences range to [0.0-1.0]. + * This function allows you to toggle this behavior. + * Please turn true when not contain softmax layer in model. + * @param[in] enable Set enable softmax post processing within the classify() function. + */ + CV_WRAP ClassificationModel& setEnableSoftmaxPostProcessing(bool enable); + + /** + * @brief Get enable/disable softmax post processing option. + * + * This option defaults to false, softmax post processing is not applied within the classify() function. + */ + CV_WRAP bool getEnableSoftmaxPostProcessing() const; + /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction. * @param[in] frame The input image. */ diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index bc8709d22e..22d5681d5b 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -197,28 +197,95 @@ void Model::predict(InputArray frame, OutputArrayOfArrays outs) const } +class ClassificationModel_Impl : public Model::Impl +{ +public: + virtual ~ClassificationModel_Impl() {} + ClassificationModel_Impl() : Impl() {} + ClassificationModel_Impl(const ClassificationModel_Impl&) = delete; + ClassificationModel_Impl(ClassificationModel_Impl&&) = delete; + + void setEnableSoftmaxPostProcessing(bool enable) + { + applySoftmax = enable; + } + + bool getEnableSoftmaxPostProcessing() const + { + return applySoftmax; + } + + std::pair classify(InputArray frame) + { + std::vector outs; + processFrame(frame, outs); + CV_Assert(outs.size() == 1); + + Mat out = outs[0].reshape(1, 1); + + if(getEnableSoftmaxPostProcessing()) + { + softmax(out, out); + } + + double conf; + Point maxLoc; + cv::minMaxLoc(out, nullptr, &conf, nullptr, &maxLoc); + return {maxLoc.x, static_cast(conf)}; + } + +protected: + void softmax(InputArray inblob, OutputArray outblob) + { + const Mat input = inblob.getMat(); + outblob.create(inblob.size(), inblob.type()); + + Mat exp; + const float max = *std::max_element(input.begin(), input.end()); + cv::exp((input - max), exp); + outblob.getMat() = exp / cv::sum(exp)[0]; + } + +protected: + bool applySoftmax = false; +}; + +ClassificationModel::ClassificationModel() + : Model() +{ + // nothing +} + ClassificationModel::ClassificationModel(const String& model, const String& config) - : Model(model, config) + : ClassificationModel(readNet(model, config)) { // nothing } ClassificationModel::ClassificationModel(const Net& network) - : Model(network) + : Model() { - // nothing + impl = makePtr(); + impl->initNet(network); +} + +ClassificationModel& ClassificationModel::setEnableSoftmaxPostProcessing(bool enable) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + impl.dynamicCast()->setEnableSoftmaxPostProcessing(enable); + return *this; +} + +bool ClassificationModel::getEnableSoftmaxPostProcessing() const +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + return impl.dynamicCast()->getEnableSoftmaxPostProcessing(); } std::pair ClassificationModel::classify(InputArray frame) { - std::vector outs; - impl->processFrame(frame, outs); - CV_Assert(outs.size() == 1); - - double conf; - cv::Point maxLoc; - minMaxLoc(outs[0].reshape(1, 1), nullptr, &conf, nullptr, &maxLoc); - return {maxLoc.x, static_cast(conf)}; + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + return impl.dynamicCast()->classify(frame); } void ClassificationModel::classify(InputArray frame, int& classId, float& conf) From a80af177b6cca878e92e48a9fbe022396dcbe368 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 16 Feb 2022 21:55:56 +0000 Subject: [PATCH 18/84] dnn: split dnn.cpp code base commit: 19926e2979ef049a89dd029e2231555db40c2776 original dnn.cpp content: https://github.com/opencv/opencv/blame/19926e2979ef049a89dd029e2231555db40c2776/modules/dnn/src/dnn.cpp --- modules/dnn/src/dnn.cpp | 5849 ----------------- modules/dnn/src/dnn_common.hpp | 41 +- modules/dnn/src/dnn_params.cpp | 67 + modules/dnn/src/dnn_read.cpp | 93 + modules/dnn/src/dnn_utils.cpp | 158 + modules/dnn/src/layer.cpp | 247 + modules/dnn/src/layer_factory.cpp | 111 + modules/dnn/src/layer_internals.hpp | 335 + .../dnn/src/layers/not_implemented_layer.cpp | 2 +- modules/dnn/src/legacy_backend.cpp | 122 + modules/dnn/src/legacy_backend.hpp | 339 + modules/dnn/src/model.cpp | 2 +- modules/dnn/src/net.cpp | 414 ++ modules/dnn/src/net_impl.cpp | 2087 ++++++ modules/dnn/src/net_impl.hpp | 261 + modules/dnn/src/net_impl_backend.cpp | 200 + modules/dnn/src/net_impl_fuse.cpp | 607 ++ modules/dnn/src/net_openvino.cpp | 568 ++ modules/dnn/src/net_quantization.cpp | 296 + modules/dnn/src/op_cuda.cpp | 106 + modules/dnn/src/op_halide.cpp | 206 +- modules/dnn/src/op_inf_engine.cpp | 25 + modules/dnn/src/op_inf_engine.hpp | 7 + modules/dnn/src/op_vkcom.cpp | 39 + modules/dnn/src/op_webnn.cpp | 272 + modules/dnn/src/precomp.hpp | 9 + modules/dnn/src/registry.cpp | 144 + 27 files changed, 6749 insertions(+), 5858 deletions(-) delete mode 100644 modules/dnn/src/dnn.cpp create mode 100644 modules/dnn/src/dnn_params.cpp create mode 100644 modules/dnn/src/dnn_read.cpp create mode 100644 modules/dnn/src/dnn_utils.cpp create mode 100644 modules/dnn/src/layer.cpp create mode 100644 modules/dnn/src/layer_factory.cpp create mode 100644 modules/dnn/src/layer_internals.hpp create mode 100644 modules/dnn/src/legacy_backend.cpp create mode 100644 modules/dnn/src/legacy_backend.hpp create mode 100644 modules/dnn/src/net.cpp create mode 100644 modules/dnn/src/net_impl.cpp create mode 100644 modules/dnn/src/net_impl.hpp create mode 100644 modules/dnn/src/net_impl_backend.cpp create mode 100644 modules/dnn/src/net_impl_fuse.cpp create mode 100644 modules/dnn/src/net_openvino.cpp create mode 100644 modules/dnn/src/net_quantization.cpp create mode 100644 modules/dnn/src/op_cuda.cpp create mode 100644 modules/dnn/src/registry.cpp diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp deleted file mode 100644 index 954ada50f2..0000000000 --- a/modules/dnn/src/dnn.cpp +++ /dev/null @@ -1,5849 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "op_halide.hpp" -#include "op_inf_engine.hpp" -#include "ie_ngraph.hpp" -#include "op_vkcom.hpp" -#include "op_cuda.hpp" -#include "op_webnn.hpp" - -#ifdef HAVE_CUDA -#include "cuda4dnn/init.hpp" -#include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers -#endif - -#include "halide_scheduler.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace cv { -namespace dnn { -CV__DNN_INLINE_NS_BEGIN - -static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0); - -// this option is useful to run valgrind memory errors detection -static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); - -#ifdef HAVE_OPENCL -static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false); -#endif - -static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", -#ifdef HAVE_INF_ENGINE - (size_t)DNN_BACKEND_INFERENCE_ENGINE -#else - (size_t)DNN_BACKEND_OPENCV -#endif -); - -// Additional checks (slowdowns execution!) -static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); -static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); -static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); - -using std::vector; -using std::map; -using std::make_pair; -using std::set; -using std::string; - -//================================================================================================== - -class BackendRegistry -{ -public: - typedef std::vector< std::pair > BackendsList; - const BackendsList & getBackends() const { return backends; } - static BackendRegistry & getRegistry() - { - static BackendRegistry impl; - return impl; - } - -#ifdef HAVE_INF_ENGINE - static inline bool checkIETarget(Target target) - { -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3) - // Lightweight detection - const std::vector devices = getCore("").GetAvailableDevices(); - for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) - { - if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD) - return true; - if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL) - return true; - else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA) - return true; - else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU) - return true; - else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) - return true; - } - return false; -#else - cv::dnn::Net net; - cv::dnn::LayerParams lp; - lp.set("kernel_size", 1); - lp.set("num_output", 1); - lp.set("bias_term", false); - lp.type = "Convolution"; - lp.name = "testLayer"; - lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1))); - net.addLayerToPrev(lp.name, lp.type, lp); - net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); - net.setPreferableTarget(target); - static int inpDims[] = {1, 2, 3, 4}; - net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); - try - { - net.forward(); - } - catch(const std::exception& e) - { - CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what()); - return false; - } - return true; -#endif - } -#endif - -private: - BackendRegistry() - { -#ifdef HAVE_HALIDE - backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU)); -# ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL()) - backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL)); -# endif -#endif // HAVE_HALIDE - -#ifdef HAVE_INF_ENGINE - if (checkIETarget(DNN_TARGET_CPU)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU)); -#endif - } - if (checkIETarget(DNN_TARGET_MYRIAD)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD)); -#endif - } - if (checkIETarget(DNN_TARGET_HDDL)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL)); -#endif - } -#ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) - { - if (checkIETarget(DNN_TARGET_OPENCL)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL)); -#endif - } - if (checkIETarget(DNN_TARGET_OPENCL_FP16)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16)); -#endif - } - } -#endif -#endif // HAVE_INF_ENGINE - -#ifdef HAVE_WEBNN - if (haveWebnn()) - { - backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU)); - } -#endif // HAVE_WEBNN - -#ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL()) - { - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)); - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)); - } -#endif - - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); - -#ifdef HAVE_VULKAN - if (haveVulkan()) - backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN)); -#endif - -#ifdef HAVE_CUDA - if (haveCUDA()) - { - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); - } -#endif - } - - BackendsList backends; -}; - - -std::vector< std::pair > getAvailableBackends() -{ - return BackendRegistry::getRegistry().getBackends(); -} - -std::vector getAvailableTargets(Backend be) -{ - if (be == DNN_BACKEND_DEFAULT) - be = (Backend)PARAM_DNN_BACKEND_DEFAULT; -#ifdef HAVE_INF_ENGINE - if (be == DNN_BACKEND_INFERENCE_ENGINE) - be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; -#endif - - std::vector result; - const BackendRegistry::BackendsList all_backends = getAvailableBackends(); - for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i ) - { - if (i->first == be) - result.push_back(i->second); - } - return result; -} - -//================================================================================================== - -namespace -{ - struct LayerShapes - { - ShapesVec in, out, internal; - // No guarantees that layer which support in-place computations - // will be computed in-place (input.data_ptr == output.data_ptr). - // If layer said that it could work in-place and layers after it - // no longer use input blob, we'll set output = input. - bool supportInPlace; - LayerShapes() {supportInPlace = false;} - }; -} - -Mat blobFromImage(InputArray image, double scalefactor, const Size& size, - const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - Mat blob; - blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth); - return blob; -} - -void blobFromImage(InputArray image, OutputArray blob, double scalefactor, - const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - std::vector images(1, image.getMat()); - blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); -} - -Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, - const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - Mat blob; - blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); - return blob; -} - -void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, - Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); - if (ddepth == CV_8U) - { - CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); - CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); - } - - std::vector images; - images_.getMatVector(images); - CV_Assert(!images.empty()); - for (size_t i = 0; i < images.size(); i++) - { - Size imgSize = images[i].size(); - if (size == Size()) - size = imgSize; - if (size != imgSize) - { - if(crop) - { - float resizeFactor = std::max(size.width / (float)imgSize.width, - size.height / (float)imgSize.height); - resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); - Rect crop(Point(0.5 * (images[i].cols - size.width), - 0.5 * (images[i].rows - size.height)), - size); - images[i] = images[i](crop); - } - else - resize(images[i], images[i], size, 0, 0, INTER_LINEAR); - } - if(images[i].depth() == CV_8U && ddepth == CV_32F) - images[i].convertTo(images[i], CV_32F); - Scalar mean = mean_; - if (swapRB) - std::swap(mean[0], mean[2]); - - images[i] -= mean; - images[i] *= scalefactor; - } - - size_t nimages = images.size(); - Mat image0 = images[0]; - int nch = image0.channels(); - CV_Assert(image0.dims == 2); - if (nch == 3 || nch == 4) - { - int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); - Mat blob = blob_.getMat(); - Mat ch[4]; - - for(size_t i = 0; i < nimages; i++ ) - { - const Mat& image = images[i]; - CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); - CV_Assert(image.size() == image0.size()); - - for( int j = 0; j < nch; j++ ) - ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); - if(swapRB) - std::swap(ch[0], ch[2]); - split(image, ch); - } - } - else - { - CV_Assert(nch == 1); - int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); - Mat blob = blob_.getMat(); - - for(size_t i = 0; i < nimages; i++ ) - { - const Mat& image = images[i]; - CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 1)); - CV_Assert(image.size() == image0.size()); - - image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); - } - } -} - -void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) -{ - CV_TRACE_FUNCTION(); - - //A blob is a 4 dimensional matrix in floating point precision - //blob_[0] = batchSize = nbOfImages - //blob_[1] = nbOfChannels - //blob_[2] = height - //blob_[3] = width - CV_Assert(blob_.depth() == CV_32F); - CV_Assert(blob_.dims == 4); - - images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); - - std::vector vectorOfChannels(blob_.size[1]); - for (int n = 0; n < blob_.size[0]; ++n) - { - for (int c = 0; c < blob_.size[1]; ++c) - { - vectorOfChannels[c] = getPlane(blob_, n, c); - } - cv::merge(vectorOfChannels, images_.getMatRef(n)); - } -} - -#ifdef HAVE_OPENCL -class OpenCLBackendWrapper : public BackendWrapper -{ -public: - OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) - { - m.copyTo(umat); - host = &m; - hostDirty = false; - } - - OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) - : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) - { - Ptr base = baseBuffer.dynamicCast(); - CV_Assert(!base.empty()); - - host = &m; - - int shape[] = {1, (int)base->umat.total()}; - umat = base->umat.reshape(1, 2, &shape[0]) - .colRange(0, host->total()) - .reshape(1, host->dims, &host->size[0]); - hostDirty = false; - } - - static Ptr create(Mat& m) - { - return Ptr(new OpenCLBackendWrapper(m)); - } - - static Ptr create(const Ptr& baseBuffer, Mat& m) - { - return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); - } - - static std::vector getUMatVector(const std::vector >& wrappers) - { - const int numWrappers = wrappers.size(); - std::vector mats(wrappers.size()); - for (int i = 0; i < numWrappers; ++i) - { - Ptr umatWrapper = wrappers[i].dynamicCast(); - CV_Assert(!umatWrapper.empty()); - umatWrapper->copyToDevice(); - mats[i] = umatWrapper->umat; - } - return mats; - } - - // Replaces all umats in wrappers to specific ones. - static void update(const std::vector >& wrappers, - const std::vector& umats) - { - CV_Assert(wrappers.size() == umats.size()); - for (int i = 0, n = umats.size(); i < n; ++i) - { - Ptr umatWrapper = wrappers[i].dynamicCast(); - CV_Assert(!umatWrapper.empty()); - umatWrapper->umat = umats[i]; - } - } - - ~OpenCLBackendWrapper() {} - - // Copies data from device to a host memory. - virtual void copyToHost() CV_OVERRIDE - { - umat.copyTo(*host); - } - - virtual void setHostDirty() CV_OVERRIDE - { - hostDirty = true; - }; - - void copyToDevice() - { - if (hostDirty) - { - host->copyTo(umat); - hostDirty = false; - } - } - -private: - UMat umat; - Mat* host; - bool hostDirty; -}; -#endif - -struct LayerPin -{ - int lid; - int oid; - - LayerPin(int layerId = -1, int outputId = -1) - : lid(layerId), oid(outputId) {} - - bool valid() const - { - return (lid >= 0 && oid >= 0); - } - - bool equal(const LayerPin &r) const - { - return (lid == r.lid && oid == r.oid); - } - - bool operator<(const LayerPin &r) const - { - return lid < r.lid || (lid == r.lid && oid < r.oid); - } - - bool operator ==(const LayerPin &r) const - { - return lid == r.lid && oid == r.oid; - } -}; - -struct LayerData -{ - LayerData() : id(-1), dtype(CV_32F), skip(false), flag(0) {} - LayerData(int _id, const String &_name, const String &_type, const int &_dtype, LayerParams &_params) - : id(_id), name(_name), type(_type), dtype(_dtype), params(_params), skip(false), flag(0) - { - CV_TRACE_FUNCTION(); - - //add logging info - params.name = name; - params.type = type; - } - - int id; - String name; - String type; - int dtype; // Datatype of output blobs. - LayerParams params; - - std::vector inputBlobsId; - std::set inputLayersId; - std::set requiredOutputs; - std::vector consumers; - std::vector > outputBlobsWrappers; - std::vector > inputBlobsWrappers; - std::vector > internalBlobsWrappers; - -#ifdef HAVE_CUDA - /* output ids which must be transferred to the host in the background - * after the completion of the forward pass of the layer - */ - std::vector cudaD2HBackgroundTransfers; -#endif - - Ptr layerInstance; - std::vector outputBlobs; - std::vector inputBlobs; - std::vector internals; - // Computation nodes of implemented backends (except DEFAULT). - std::map > backendNodes; - // Flag for skip layer computation for specific backend. - bool skip; - - int flag; - - Ptr getLayerInstance() - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - if (layerInstance) - return layerInstance; - - layerInstance = LayerFactory::createLayerInstance(type, params); - if (!layerInstance) - { - CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); - } - - return layerInstance; - } -}; - -//fake layer containing network input blobs -struct DataLayer : public Layer -{ - DataLayer() : Layer() - { - skip = false; - } - - virtual bool supportBackend(int backendId) CV_OVERRIDE - { - return backendId == DNN_BACKEND_OPENCV; - } - - void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - // FIXIT: add wrapper without exception suppression - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) - - bool isFP16 = outputs_arr.depth() == CV_16S; - - std::vector outputs, internals; - outputs_arr.getMatVector(outputs); - internals_arr.getMatVector(internals); - - for (int i = 0; i < inputsData.size(); ++i) - { - double scale = scaleFactors[i]; - Scalar& mean = means[i]; - - CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); - if (isFP16) - CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); - else - CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); - - bool singleMean = true; - for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) - { - singleMean = mean[j] == mean[j - 1]; - } - - if (singleMean) - { - if (isFP16) - { - Mat input_f32; - inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale); - convertFp16(input_f32, outputs[i]); - } - else - { - inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); - } - } - else - { - for (int n = 0; n < inputsData[i].size[0]; ++n) - { - for (int c = 0; c < inputsData[i].size[1]; ++c) - { - Mat inp = getPlane(inputsData[i], n, c); - Mat out = getPlane(outputs[i], n, c); - if (isFP16) - { - Mat input_f32; - inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale); - convertFp16(input_f32, out); - } - else - { - inp.convertTo(out, CV_32F, scale, -mean[c] * scale); - } - } - } - } - } - } - -#ifdef HAVE_OPENCL - bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) - { - bool isFP16 = outputs_.depth() == CV_16S; - - std::vector outputs; - outputs_.getUMatVector(outputs); - - for (int i = 0; i < inputsData.size(); ++i) - { - Mat inputData = inputsData[i]; - - double scale = scaleFactors[i]; - Scalar& mean = means[i]; - - CV_Assert(mean == Scalar() || inputData.size[1] <= 4); - if (isFP16) - CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); - else - CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); - - bool singleMean = true; - for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j) - { - singleMean = mean[j] == mean[j - 1]; - } - - if (singleMean) - { - if (isFP16) - { - UMat input_i; - inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale); - convertFp16(input_i, outputs[i]); - } - else - { - inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); - } - } - else - { - for (int n = 0; n < inputData.size[0]; ++n) - { - for (int c = 0; c < inputData.size[1]; ++c) - { - Mat inp = getPlane(inputData, n, c); - - std::vector plane(4, Range::all()); - plane[0] = Range(n, n + 1); - plane[1] = Range(c, c + 1); - UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); - - if (isFP16) - { - UMat input_i; - inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale); - convertFp16(input_i, out); - } - else - { - inp.convertTo(out, CV_32F, scale, -mean[c] * scale); - } - } - } - } - } - return true; - } -#endif - - int outputNameToIndex(const String& tgtName) CV_OVERRIDE - { - int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); - return (idx < (int)outNames.size()) ? idx : -1; - } - - void setNames(const std::vector &names) - { - outNames.assign(names.begin(), names.end()); - shapes.clear(); shapes.resize(outNames.size()); - } - - void setInputShape(const String& tgtName, const MatShape& shape) - { - std::vector::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName); - CV_Check(tgtName, it != outNames.end(), "Unknown input"); - int idx = (int)(it - outNames.begin()); - - CV_Assert(idx < (int)shapes.size()); - CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed"); - shapes[idx] = shape; - } - - bool getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const CV_OVERRIDE - { - CV_Assert(inputs.size() == requiredOutputs); - outputs.assign(inputs.begin(), inputs.end()); - return false; - } - - virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE - { - std::vector outputs; - outputs_arr.getMatVector(outputs); - - CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(), - inputsData.size() == outputs.size()); - skip = true; - for (int i = 0; skip && i < inputsData.size(); ++i) - { - if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar()) - skip = false; - } - } - - - std::vector outNames; - std::vector shapes; - // Preprocessing parameters for each network's input. - std::vector scaleFactors; - std::vector means; - std::vector inputsData; - bool skip; -}; - -struct BlobManager -{ -public: - // Increase references counter to layer output. - void addReference(const LayerPin& lp) - { - std::map::iterator it = refCounter.find(lp); - if (it == refCounter.end()) - refCounter[lp] = 1; - else - it->second += 1; - } - - void addReferences(const std::vector& pins) - { - for (int i = 0; i < pins.size(); i++) - { - addReference(pins[i]); - } - } - - // Returns number of references to allocated memory that used in specific - // layer blob. - int numReferences(const LayerPin& lp) - { - std::map::const_iterator mapIt = reuseMap.find(lp); - CV_Assert(mapIt != reuseMap.end()); - LayerPin memHost = mapIt->second; - - std::map::const_iterator refIt = refCounter.find(memHost); - CV_Assert(refIt != refCounter.end()); - return refIt->second; - } - - // Reuse data allocated in inside the blob. - void reuse(const LayerPin& host, const LayerPin& user) - { - CV_Assert(reuseMap.find(user) == reuseMap.end()); - CV_Assert(reuseMap.find(host) != reuseMap.end()); - LayerPin memHost = reuseMap[host]; - reuseMap[user] = memHost; - if (refCounter.find(memHost) != refCounter.end()) - { - std::map::iterator userRefIt = refCounter.find(user); - if (userRefIt != refCounter.end()) - { - refCounter[memHost] += userRefIt->second; - refCounter.erase(userRefIt); - } - else - refCounter[memHost] += 1; - } - } - - // Decrease references counter to allocated memory inside specific blob. - void releaseReference(const LayerPin& lp) - { - std::map::const_iterator mapIt = reuseMap.find(lp); - CV_Assert(mapIt != reuseMap.end()); - - std::map::iterator refIt = refCounter.find(mapIt->second); - CV_Assert(refIt != refCounter.end()); - CV_Assert(refIt->second > 0); - refIt->second -= 1; - } - - void releaseReferences(const std::vector& pins) - { - for (int i = 0; i < pins.size(); i++) - { - releaseReference(pins[i]); - } - } - - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype) - { - if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) - { - Mat bestBlob; - LayerPin bestBlobPin; - - std::map::const_iterator hostIt; - std::map::const_iterator refIt; - - const int targetTotal = total(shape); - int bestBlobTotal = INT_MAX; - - for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) - { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) - { - const Mat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal && - unusedBlob.type() == dtype) - { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); - } - } - } - if (!bestBlob.empty()) - { - reuse(bestBlobPin, lp); - dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); - return; - } - } - - { - // if dst already has been allocated with total(shape) elements, - // it won't be recreated and pointer of dst.data remains the same. - dst.create(shape, dtype); - addHost(lp, dst); - } - } - - void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, - std::vector& pinsForInternalBlobs) - { - CV_TRACE_FUNCTION(); - - pinsForInternalBlobs.clear(); - - std::vector& outputBlobs = ld.outputBlobs, - &internalBlobs = ld.internals; - - const ShapesVec& outShapes = layerShapes.out, - internalShapes = layerShapes.internal; - - outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob - internalBlobs.resize(internalShapes.size()); - - CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); - - // Check that layer could work in-place. - bool inPlace = false; - if (layerShapes.supportInPlace) - { - if (ld.inputBlobs.size() == 1) - { - // Get number of references to the input memory. - int numRef = numReferences(ld.inputBlobsId[0]); - // If current layer is one and only customer of this blob. - inPlace = numRef == 1; - } - } - - ShapesVec shapes(outShapes); - shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); - std::vector blobs; - for(int i = 0; i < outputBlobs.size(); i++) - { - blobs.push_back(&outputBlobs[i]); - } - - for(int i = 0; i < internalBlobs.size(); i++) - { - blobs.push_back(&internalBlobs[i]); - if (total(internalShapes[i])) - { - pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); - } - } - - addReferences(pinsForInternalBlobs); - - std::map > idxSizes; - for(int i = 0; i < shapes.size(); i++) - { - idxSizes[total(shapes[i])].push_back(i); - } - - std::map >::reverse_iterator it; - for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) - { - for(int j = 0; j < it->second.size(); j++) - { - int index = it->second[j]; - if (total(shapes[index])) - { - LayerPin blobPin(ld.id, index); - if (index < outShapes.size() && inPlace) - { - CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); - ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); - reuse(ld.inputBlobsId[0], blobPin); - } - else - reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype); - } - } - } - } - - // Clear internal state. Calls before an every reallocation. - void reset() - { - CV_TRACE_FUNCTION(); - - refCounter.clear(); - reuseMap.clear(); - memHosts.clear(); - } - -private: - // Register allocated memory. - void addHost(const LayerPin& lp, const Mat& mat) - { - CV_Assert(memHosts.find(lp) == memHosts.end()); - reuseMap[lp] = lp; - memHosts[lp] = mat; - } - - std::map refCounter; - // Maps pin to origin blob (for whom memory was allocated firstly). - // For origin blobs key == value. - std::map reuseMap; - std::map memHosts; -}; - -static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) -{ - if (backendId == DNN_BACKEND_OPENCV) - { - if (targetId == DNN_TARGET_CPU) - return Ptr(); -#ifdef HAVE_OPENCL - else if (IS_DNN_OPENCL_TARGET(targetId)) - return OpenCLBackendWrapper::create(m); -#endif - else - CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier"); - } - else if (backendId == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(targetId, m)); -#endif // HAVE_HALIDE - } - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; - } - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { -#ifdef HAVE_DNN_NGRAPH - return Ptr(new NgraphBackendWrapper(targetId, m)); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph"); -#endif - } - else if (backendId == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - return Ptr(new WebnnBackendWrapper(targetId, m)); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); -#endif - } - else if (backendId == DNN_BACKEND_VKCOM) - { - CV_Assert(haveVulkan()); -#ifdef HAVE_VULKAN - return Ptr(new VkComBackendWrapper(m)); -#endif // HAVE_VULKAN - } - else if (backendId == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); - -#ifdef HAVE_CUDA - switch (targetId) - { - case DNN_TARGET_CUDA: - return CUDABackendWrapperFP32::create(m); - case DNN_TARGET_CUDA_FP16: - return CUDABackendWrapperFP16::create(m); - default: - CV_Assert(IS_DNN_CUDA_TARGET(targetId)); - } -#endif - } - else - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - return Ptr(); // TODO Error? -} - -static int g_networkId = 0; - -detail::NetImplBase::NetImplBase() - : networkId(CV_XADD(&g_networkId, 1)) - , networkDumpCounter(0) - , dumpLevel(DNN_NETWORK_DUMP) -{ - // nothing -} - -std::string detail::NetImplBase::getDumpFileNameBase() const -{ - std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); - return dumpFileNameBase; -} - -struct Net::Impl : public detail::NetImplBase -{ - typedef std::map LayersShapesMap; - typedef std::map MapIdToLayerData; - - Impl() - { - //allocate fake net input layer - netInputLayer = Ptr(new DataLayer()); - LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second; - inpl.id = 0; - netInputLayer->name = inpl.name = "_input"; - inpl.type = "__NetInputLayer__"; - inpl.layerInstance = netInputLayer; - layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); - - lastLayerId = 0; - netWasAllocated = false; - netWasQuantized = false; - fusion = true; - isAsync = false; - preferableBackend = DNN_BACKEND_DEFAULT; - preferableTarget = DNN_TARGET_CPU; - skipInfEngineInit = false; - hasDynamicShapes = false; - } - - Ptr netInputLayer; - std::vector blobsToKeep; - MapIdToLayerData layers; - std::map layerNameToId; - std::map outputNameToId; // use registerOutput() to populate outputs - BlobManager blobManager; - int preferableBackend; - int preferableTarget; - String halideConfigFile; - bool skipInfEngineInit; - bool hasDynamicShapes; - // Map host data to backend specific wrapper. - std::map > backendWrappers; - - int lastLayerId; - - bool netWasAllocated; - bool netWasQuantized; - bool fusion; - bool isAsync; - std::vector layersTimings; - -#ifdef HAVE_CUDA - struct CudaInfo_t - { - CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_) - : context(std::move(ctxt)), d2h_stream(std::move(d2h_stream_)) { } - cuda4dnn::csl::CSLContext context; - cuda4dnn::csl::Stream d2h_stream; - cuda4dnn::csl::Workspace workspace; - }; - - std::unique_ptr cudaInfo; -#endif - - Ptr wrap(Mat& host) - { - if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) - return Ptr(); - - MatShape shape(host.dims); - for (int i = 0; i < host.dims; ++i) - shape[i] = host.size[i]; - - void* data = host.data; - if (backendWrappers.find(data) != backendWrappers.end()) - { - Ptr baseBuffer = backendWrappers[data]; - if (preferableBackend == DNN_BACKEND_OPENCV) - { -#ifdef HAVE_OPENCL - CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget)); - return OpenCLBackendWrapper::create(baseBuffer, host); -#else - CV_Error(Error::StsInternal, ""); -#endif - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(baseBuffer, shape)); -#endif - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - return wrapMat(preferableBackend, preferableTarget, host); - } - else if (preferableBackend == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - return wrapMat(preferableBackend, preferableTarget, host); -#endif - } - else if (preferableBackend == DNN_BACKEND_VKCOM) - { - #ifdef HAVE_VULKAN - return Ptr(new VkComBackendWrapper(baseBuffer, host)); - #endif - } - else if (preferableBackend == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); -#ifdef HAVE_CUDA - switch (preferableTarget) - { - case DNN_TARGET_CUDA: - return CUDABackendWrapperFP32::create(baseBuffer, shape); - case DNN_TARGET_CUDA_FP16: - return CUDABackendWrapperFP16::create(baseBuffer, shape); - default: - CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); - } -#endif - } - else - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - - Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); - backendWrappers[data] = wrapper; - return wrapper; - } - -#ifdef HAVE_HALIDE - void compileHalide() - { - CV_TRACE_FUNCTION(); - - CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); - - HalideScheduler scheduler(halideConfigFile); - std::vector< std::reference_wrapper > compileList; compileList.reserve(64); - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - Ptr layer = ld.layerInstance; - if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) - { - CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); - bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); - if (!scheduled) - { - // Use automatic scheduling provided by layer. - layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], - ld.inputBlobs, ld.outputBlobs, - preferableTarget); - } - compileList.emplace_back(ld); - } - } - std::atomic progress(0); - auto fn = ([&] () -> void - { - for (;;) - { - int id = progress.fetch_add(1); - if ((size_t)id >= compileList.size()) - return; - const LayerData& ld = compileList[id].get(); - Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; - dnn::compileHalide(ld.outputBlobs, node, preferableTarget); - } - }); - size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); - num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); - std::vector threads(num_threads - 1); - for (auto& t: threads) t = std::thread(fn); - fn(); // process own tasks - for (auto& t: threads) t.join(); - } -#endif - - void clear() - { - CV_TRACE_FUNCTION(); - - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end(); it++) - { - if (it->second.id != 0) { - it->second.inputBlobs.clear(); - it->second.outputBlobs.clear(); - it->second.internals.clear(); - } - it->second.skip = false; - //it->second.consumers.clear(); - Ptr currLayer = it->second.layerInstance; - - if( currLayer.empty() ) - continue; - - currLayer->unsetAttached(); - } - netWasAllocated = false; - layersTimings.clear(); - } - - void setUpNet(const std::vector& blobsToKeep_ = std::vector()) - { - CV_TRACE_FUNCTION(); - - if (dumpLevel && networkDumpCounter == 0) - { - dumpNetworkToFile(); - } - - if (preferableBackend == DNN_BACKEND_DEFAULT) - preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT; -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) - preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam(); -#endif - - CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || - preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16); - CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || - preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL); -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - CV_Assert( - (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16 || - preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_FPGA - ); - } -#endif -#ifdef HAVE_WEBNN - if (preferableBackend == DNN_BACKEND_WEBNN) - { - CV_Assert(preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL); - } -#endif - CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || - preferableTarget == DNN_TARGET_VULKAN); - CV_Assert(preferableBackend != DNN_BACKEND_CUDA || - IS_DNN_CUDA_TARGET(preferableTarget)); - if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) - { - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) -#ifndef HAVE_OPENCL - { - CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); - preferableTarget = DNN_TARGET_CPU; - } -#else - { - if (!DNN_OPENCL_ALLOW_ALL_DEVICES) - { - // Current implementation is only valid for GPU (#11494) - if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) - { - CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); - preferableTarget = DNN_TARGET_CPU; - } - else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) - { - CV_LOG_WARNING(NULL, - "DNN: OpenCL target with fp16 precision is not supported " - "with current OpenCL device (tested with Intel GPUs only), " - "switching to OpenCL with fp32 precision."); - preferableTarget = DNN_TARGET_OPENCL; - } - } - } -#endif - if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan()) - { - preferableBackend = DNN_BACKEND_OPENCV; - preferableTarget = DNN_TARGET_CPU; - } - - if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA()) - { -#ifdef HAVE_CUDA - CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU"); -#else - CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU"); -#endif - preferableBackend = DNN_BACKEND_OPENCV; - preferableTarget = DNN_TARGET_CPU; - } - - clear(); - - if (hasDynamicShapes) - { - updateLayersShapes(); - } - - this->blobsToKeep = blobsToKeep_; - - allocateLayers(blobsToKeep_); - - MapIdToLayerData::iterator it = layers.find(0); - CV_Assert(it != layers.end()); - it->second.skip = netInputLayer->skip; - - initBackend(blobsToKeep_); - - if (!netWasAllocated) - { -#ifdef HAVE_HALIDE - if (preferableBackend == DNN_BACKEND_HALIDE) - compileHalide(); -#else - CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); -#endif - } - - netWasAllocated = true; - - if (dumpLevel) - { - dumpNetworkToFile(); - } - } - } - - int getLayerId(const String &layerName) const - { - std::map::const_iterator it = layerNameToId.find(layerName); - return (it != layerNameToId.end()) ? it->second : -1; - } - - int getLayerId(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - return (it != layers.end()) ? id : -1; - } - - int getLayerId(DictValue &layerDesc) const - { - if (layerDesc.isInt()) - return getLayerId(layerDesc.get()); - else if (layerDesc.isString()) - return getLayerId(layerDesc.get()); - - CV_Assert(layerDesc.isInt() || layerDesc.isString()); - return -1; - } - - String getLayerName(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - return (it != layers.end()) ? it->second.name : "(unknown layer)"; - } - - LayerData& getLayerData(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - - if (it == layers.end()) - CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); - - return const_cast(it->second); - } - - LayerData& getLayerData(const String &layerName) const - { - int id = getLayerId(layerName); - - if (id < 0) - CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); - - return getLayerData(id); - } - - LayerData& getLayerData(const DictValue &layerDesc) const - { - CV_Assert(layerDesc.isInt() || layerDesc.isString()); - if (layerDesc.isInt()) - return getLayerData(layerDesc.get()); - else /*if (layerDesc.isString())*/ - return getLayerData(layerDesc.get()); - } - - static void addLayerInput(LayerData &ld, int inNum, LayerPin from) - { - if ((int)ld.inputBlobsId.size() <= inNum) - { - ld.inputBlobsId.resize(inNum + 1); - } - else - { - LayerPin storedFrom = ld.inputBlobsId[inNum]; - if (storedFrom.valid() && !storedFrom.equal(from)) - CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", - inNum, ld.name.c_str())); - } - - ld.inputBlobsId[inNum] = from; - } - - int resolvePinOutputName(LayerData &ld, const String &outName) const - { - if (outName.empty()) - return 0; - return ld.getLayerInstance()->outputNameToIndex(outName); - } - - LayerPin getPinByAlias(const String &layerName) const - { - LayerPin pin; - pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); - - if (pin.lid >= 0) - pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); - - return pin; - } - - std::vector getLayerOutPins(const String &layerName) const - { - int lid = (layerName.empty()) ? 0 : getLayerId(layerName); - - MapIdToLayerData::const_iterator it = layers.find(lid); - if (it == layers.end()) - CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); - const size_t nOutputs = it->second.outputBlobs.size(); - - std::vector pins; - for (int i = 0; i < nOutputs; i++) - { - pins.push_back(LayerPin(lid, i)); - } - - return pins; - } - - // FIXIT remove dtype - int addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) - { - int id = getLayerId(name); - if (id >= 0) - { - if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") - { - CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); - return -1; - } - else - { - LayerData& ld = layers.find(id)->second; - ld.type = type; - ld.params = params; - return -1; - } - } - - id = ++lastLayerId; - layerNameToId.insert(std::make_pair(name, id)); - layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); - if (params.get("has_dynamic_shapes", false)) - hasDynamicShapes = true; - - if (dtype == CV_8S) - netWasQuantized = true; - - return id; - } - - void connect(int outLayerId, int outNum, int inLayerId, int inNum) - { - CV_Assert(outLayerId < inLayerId); - LayerData &ldOut = getLayerData(outLayerId); - LayerData &ldInp = getLayerData(inLayerId); - - addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); - ldOut.requiredOutputs.insert(outNum); - ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); - - CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); - } - - int registerOutput(const std::string& outputName, int layerId, int outputPort) - { - int checkLayerId = getLayerId(outputName); - if (checkLayerId >= 0) - { - if (checkLayerId == layerId) - { - if (outputPort == 0) - { - // layer name correlates with its output name - CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); - outputNameToId.insert(std::make_pair(outputName, layerId)); - return checkLayerId; - } - } - CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); - } -#if 0 // TODO - if (outputPort == 0) - // make alias only, need to adopt getUnconnectedOutLayers() call -#endif - LayerParams outputLayerParams; - outputLayerParams.name = outputName; - outputLayerParams.type = "Identity"; - int dtype = CV_32F; // FIXIT remove - int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); - connect(layerId, outputPort, outputLayerId, 0); - CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); - outputNameToId.insert(std::make_pair(outputName, outputLayerId)); - return outputLayerId; - } - - void initBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - if (preferableBackend == DNN_BACKEND_OPENCV) - { - CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - initHalideBackend(); - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { -#ifdef HAVE_DNN_NGRAPH - initNgraphBackend(blobsToKeep_); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); -#endif - } - else if (preferableBackend == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - initWebnnBackend(blobsToKeep_); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); -#endif - } - else if (preferableBackend == DNN_BACKEND_VKCOM) - initVkComBackend(); - else if (preferableBackend == DNN_BACKEND_CUDA) - initCUDABackend(blobsToKeep_); - else - CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); - } - - void initHalideBackend() - { - CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); - - // Iterator to current layer. - MapIdToLayerData::iterator it = layers.begin(); - // Iterator to base layer for fusion. In example, in case of conv+bn+relu - // it'll be a conv layer. - MapIdToLayerData::iterator baseIt = layers.begin(); - for (; it != layers.end(); it++) - { - LayerData &ldTop = it->second; - Ptr layerTop = ldTop.layerInstance; - if (!layerTop->supportBackend(preferableBackend)) - { - // Move base iterator to layer that don't support preferable - // backend to prevent fusion over layer of different backend. - baseIt = it; - continue; - } - // Try to do layers fusion. - LayerData &ldBot = baseIt->second; - Ptr layerBot = ldBot.layerInstance; - // 1. Check that bottom and top from the same backends. - if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) - { - // 2. Check that current layer works in-place. - bool inPlace = ldTop.inputBlobs.size() == 1 && - ldBot.outputBlobs.size() == 1 && - ldTop.inputBlobs[0]->data == - ldBot.outputBlobs[0].data; - if (inPlace) - { - // 3. Try to attach node. - CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); - Ptr fusedNode = - layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); - if (!fusedNode.empty()) - { - ldTop.skip = true; - ldBot.backendNodes[preferableBackend] = fusedNode; - ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; - continue; - } - } - } - // No layers fusion. - ldTop.skip = false; - ldTop.backendNodes[DNN_BACKEND_HALIDE] = - layerTop->initHalide(ldTop.inputBlobsWrappers); - baseIt = it; - } - } - - -#ifdef HAVE_DNN_NGRAPH - /** mark input pins as outputs from other subnetworks - * FIXIT must be done by DNN engine not ngraph. - */ - void addNgraphOutputs(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type); - - Ptr layerNet; - auto it = ld.backendNodes.find(preferableBackend); - if (it != ld.backendNodes.end()) - { - Ptr node = it->second; - if (!node.empty()) - { - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty()); - layerNet = ieNode->net; - } - } - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (layerNet != ieInpNode->net) - { - CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); - ieInpNode->net->addOutput(ieInpNode); - } - } - } - } - - void initNgraphBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, ""); - - Ptr net; - - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) - { - const LayerData& ld = it->second; - if (ld.id == 0) - { - CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || - (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; - outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; - dataPtr->setName(outputName); - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; - dataPtr->setName(outputName); - } - } - } - - if (skipInfEngineInit) - { - Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; - CV_Assert(!node.empty()); - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - - CV_Assert(ieNode->net); - InfEngineNgraphNet& ienet = *ieNode->net; - ienet.reset(); - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - if (ld.id == 0) - { - for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]); - dataPtr->setName(netInputLayer->outNames[i]); - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - auto it = ienet.outputsDesc.find(ld.name); - if (it != ienet.outputsDesc.end()) - { - const InferenceEngine::TensorDesc& descriptor = it->second; - InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name); - dataPtr->setName(ld.name); - } - else - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - dataPtr->setName(ld.name); - } - } - } - ienet.addBlobs(ld.inputBlobsWrappers); - ienet.addBlobs(ld.outputBlobsWrappers); - ld.skip = true; - } - layers[lastLayerId].skip = false; - ienet.init((Target)preferableTarget); - return; - } - - bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || - BackendRegistry::checkIETarget(DNN_TARGET_CPU)); - - // Build Inference Engine networks from sets of layers that support this - // backend. Split a whole model on several Inference Engine networks if - // some of layers are not implemented. - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - - CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ..."); - - if (ld.id == 0 && ld.skip) - { - CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!"); - continue; - } - - bool fused = ld.skip; - Ptr layer = ld.layerInstance; - if (!fused && !layer->supportBackend(preferableBackend)) - { - CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); - bool customizable = ld.id != 0 && supportsCPUFallback; - - // TODO: there is a bug in Myriad plugin with custom layers shape infer. - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) - { - for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) - { - customizable = ld.inputBlobs[i]->size[0] == 1; - } - } - - // TODO: fix these workarounds - if (preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Concat"; - - if (preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Power"; - - if (preferableTarget == DNN_TARGET_OPENCL) - customizable &= ld.type != "Eltwise"; - - if (!customizable) - { - CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!"); - addNgraphOutputs(ld); - net = Ptr(); - layer->preferableTarget = DNN_TARGET_CPU; - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) { - Ptr ieNode = inpNode.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net->addOutput(ieNode); - } - } - continue; - } - } - ld.skip = true; // Initially skip all Inference Engine supported layers. - - // Create a new network if one of inputs from different Inference Engine graph. - std::vector> inputNodes; - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) - if (inputNodes.size() == ld.inputBlobsId.size()) { - break; - } - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (ieInpNode->net == net && !fused) { - inputNodes.push_back(inpNode); - continue; - } - } - - if (net.empty()) { - net = Ptr(new InfEngineNgraphNet(*this)); - } - - if (!fused) { - std::vector inputNames; - std::vector inputs; - - auto curr_pos = inpLd.consumers.begin(); - auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; - auto cons = curr_pos; - while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != - inpLd.consumers.end()) { - int cons_inp = cons->oid; - Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. - dynamicCast(); - CV_Assert(!inpWrapper.empty()); - auto iter = std::find(inputNames.begin(), inputNames.end(), - inpWrapper->dataPtr->getName()); - if (iter == inputNames.end()) { - inputNames.push_back(inpWrapper->dataPtr->getName()); - inputs.push_back(inpLd.outputBlobs[cons_inp]); - } - curr_pos = cons + 1; - } - - auto inps = net->setInputs(inputs, inputNames); - for (auto& inp : inps) { - inputNodes.emplace_back(Ptr(new InfEngineNgraphNode(inp))); - } - } - } - - Ptr node; - if (!net.empty()) - { - if (fused) - { - bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && - ld.inputBlobs[0]->data == ld.outputBlobs[0].data; - CV_Assert(inPlace); - node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; - ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; - } - } - else { - net = Ptr(new InfEngineNgraphNet(*this)); - } - - if (!fused) - { - CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - int lid = ld.inputBlobsId[i].lid; - int oid = ld.inputBlobsId[i].oid; - if (oid == 0 || lid == 0) - continue; - - auto ieInpNode = inputNodes[i].dynamicCast(); - const auto& ngraph_input_node = ieInpNode->node; - CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); - - // Handle parameters from other subnets. Output port is not used in this case - if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && - ngraph_input_node->get_output_size() == 1) - { - inputNodes[i] = Ptr(new InfEngineNgraphNode(ngraph_input_node)); - continue; - } - CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - // FIXIT refactor ".initNgraph()" API to use Output - // WA: use Concat to emulate Identity operation with requested output port - auto oid_node = std::make_shared(ngraph::OutputVector {ngraph_input_node->output(oid)}, 0); - inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); -#elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) - inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); -#else - inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); -#endif - } - - if (layer->supportBackend(preferableBackend)) - { - CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size()); - node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes); -#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node) - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - node.dynamicCast()->setName(dataPtr->getName()); - } -#else - node.dynamicCast()->setName(layer->name); -#endif - } - else - { - CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type); - node = Ptr(new InfEngineNgraphNode(inputNodes, - ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); - } - } - else if (node.empty()) - { - CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass..."); - continue; - } - - ld.backendNodes[preferableBackend] = node; - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net = net; - - for (const auto& pin : blobsToKeep_) - { - if (pin.lid == ld.id) - { - ieNode->net->addOutput(ieNode); - break; - } - } - ieNode->net->setNodePtr(&ieNode->node); - - net->addBlobs(ld.inputBlobsWrappers); - net->addBlobs(ld.outputBlobsWrappers); - addNgraphOutputs(ld); - } - - // Initialize all networks. - for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) - { - LayerData &ld = it->second; - auto iter = ld.backendNodes.find(preferableBackend); - if (iter == ld.backendNodes.end()) - continue; - - Ptr& node = iter->second; - if (node.empty()) - continue; - - Ptr ieNode = node.dynamicCast(); - if (ieNode.empty()) - continue; - - CV_Assert(!ieNode->net.empty()); - - if (!ieNode->net->isInitialized()) - { - ieNode->net->addOutput(ieNode); - ieNode->net->createNet((Target)preferableTarget); - ld.skip = false; - } - } - } -#endif // HAVE_DNN_NGRAPH - -#ifdef HAVE_WEBNN - void addWebnnOutputs(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - Ptr layerNet; - auto it = ld.backendNodes.find(preferableBackend); - if (it != ld.backendNodes.end()) - { - Ptr node = it->second; - if (!node.empty()) - { - Ptr webnnNode = node.dynamicCast(); - CV_Assert(!webnnNode.empty()); CV_Assert(!webnnNode->net.empty()); - layerNet = webnnNode->net; - } - } - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr webnnInpNode = inpNode.dynamicCast(); - CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); - if (layerNet != webnnInpNode->net) - { - webnnInpNode->net->addOutput(webnnInpNode->name); - webnnInpNode->net->setUnconnectedNodes(webnnInpNode); - } - } - } - } - - void initWebnnBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn()); - - Ptr net; - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - if (ld.id == 0) - { - CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || - (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; - outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; - wrapper->name = outputName; - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; - wrapper->name = outputName; - } - } - } - - // Build WebNN networks from sets of layers that support this - // backend. Split a whole model on several WebNN networks if - // some of layers are not implemented. - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - - if (ld.id == 0 && ld.skip) - continue; - - bool fused = ld.skip; - Ptr layer = ld.layerInstance; - if (!fused && !layer->supportBackend(preferableBackend)) - { - // For test use. when not using WebNN, the test case will fail - // with the following code. - CV_LOG_WARNING(NULL, "Layer " + ld.type + " name " + ld.name + " is unsupported by WebNN backend."); - - addWebnnOutputs(ld); - net = Ptr(); - layer->preferableTarget = DNN_TARGET_CPU; - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) { - Ptr webnnNode = inpNode.dynamicCast(); - CV_Assert(!webnnNode.empty()); - webnnNode->net->setUnconnectedNodes(webnnNode); - } - } - continue; - } - ld.skip = true; // Initially skip all WebNN supported layers. - - // Create a new network if one of inputs from different WebNN graph. - std::vector> inputNodes; - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) - if (inputNodes.size() == ld.inputBlobsId.size()) { - break; - } - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr webnnInpNode = inpNode.dynamicCast(); - CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); - if (webnnInpNode->net == net && !fused) { - inputNodes.push_back(inpNode); - continue; - } - } - - if (net.empty()) { - net = Ptr(new WebnnNet()); - } - - if (!fused) { - std::vector inputNames; - std::vector inputs; - - auto curr_pos = inpLd.consumers.begin(); - auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; - auto cons = curr_pos; - while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != - inpLd.consumers.end()) { - int cons_inp = cons->oid; - Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. - dynamicCast(); - CV_Assert(!inpWrapper.empty()); - auto iter = std::find(inputNames.begin(), inputNames.end(), - inpWrapper->name); - if (iter == inputNames.end()) { - inputNames.push_back(inpWrapper->name); - inputs.push_back(inpLd.outputBlobs[cons_inp]); - } - curr_pos = cons + 1; - } - - auto inps = net->setInputs(inputs, inputNames); - for (auto& inp : inps) { - WebnnBackendNode* node = new WebnnBackendNode(inp); - node->net = net; - inputNodes.emplace_back(Ptr(node)); - } - } - } - - Ptr node; - if (!net.empty()) - { - if (fused) - { - bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && - ld.inputBlobs[0]->data == ld.outputBlobs[0].data; - CV_Assert(inPlace); - node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; - ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; - } - } - else { - net = Ptr(new WebnnNet()); - } - - if (!fused) - { - CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - int lid = ld.inputBlobsId[i].lid; - int oid = ld.inputBlobsId[i].oid; - if (oid == 0 || lid == 0) - continue; - - auto webnnInpNode = inputNodes[i].dynamicCast(); - inputNodes[i] = Ptr(new WebnnBackendNode(webnnInpNode->operand)); - } - - if (layer->supportBackend(preferableBackend)) - { - if (ld.type == "Const") { - ml::Operand fake_operand; - Ptr fake_input_node = Ptr(new WebnnBackendNode(fake_operand)); - fake_input_node->net = net; - inputNodes.push_back(fake_input_node); - } - node = layer->initWebnn(ld.inputBlobsWrappers, inputNodes); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - node.dynamicCast()->name = wrapper->name; - } - } - else - { - continue; - } - } - else if (node.empty()) - continue; - - ld.backendNodes[preferableBackend] = node; - - Ptr webnnNode = node.dynamicCast(); - CV_Assert(!webnnNode.empty()); - webnnNode->net = net; - - if (ld.consumers.empty()) { - // TF EAST_text_detection - webnnNode->net->setUnconnectedNodes(webnnNode); - } - for (const auto& pin : blobsToKeep_) - { - if (pin.lid == ld.id) - { - webnnNode->net->addOutput(webnnNode->name); - break; - } - } - net->addBlobs(ld.inputBlobsWrappers); - net->addBlobs(ld.outputBlobsWrappers); - addWebnnOutputs(ld); - } - - // Initialize all networks. - for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) - { - LayerData &ld = it->second; - auto iter = ld.backendNodes.find(preferableBackend); - if (iter == ld.backendNodes.end()) - continue; - - Ptr& node = iter->second; - if (node.empty()) - continue; - - Ptr webnnNode = node.dynamicCast(); - if (webnnNode.empty()) - continue; - - CV_Assert(!webnnNode->net.empty()); - - if (!webnnNode->net->isInitialized()) - { - webnnNode->net->setUnconnectedNodes(webnnNode); - webnnNode->net->createNet((Target)preferableTarget); - ld.skip = false; - } - } - } -#endif - - void initVkComBackend() - { - CV_TRACE_FUNCTION(); - CV_Assert(preferableBackend == DNN_BACKEND_VKCOM); -#ifdef HAVE_VULKAN - if (!haveVulkan()) - return; - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - { - LayerData &ld = it->second; - Ptr layer = ld.layerInstance; - if (!layer->supportBackend(preferableBackend)) - { - continue; - } - - ld.skip = false; - - try - { - ld.backendNodes[DNN_BACKEND_VKCOM] = - layer->initVkCom(ld.inputBlobsWrappers); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what()); - ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr(); - } - } -#endif - } - - void initCUDABackend(const std::vector& blobsToKeep_) - { - CV_Assert(haveCUDA()); - CV_Assert(preferableBackend == DNN_BACKEND_CUDA); - -#ifdef HAVE_CUDA - if (!cudaInfo) /* we need to check only once */ - cuda4dnn::checkVersions(); - - if (cuda4dnn::getDeviceCount() <= 0) - CV_Error(Error::StsError, "No CUDA capable device found."); - - if (cuda4dnn::getDevice() < 0) - CV_Error(Error::StsError, "No CUDA capable device selected."); - - if (!cuda4dnn::isDeviceCompatible()) - CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); - - if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) - { - CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); - preferableTarget = DNN_TARGET_CUDA; - } - - if (!cudaInfo) - { - cuda4dnn::csl::CSLContext context; - context.stream = cuda4dnn::csl::Stream(true); - context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream); - context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream); - - auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers - cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); - } - - cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any - - for (auto& layer : layers) - { - auto& ld = layer.second; - if (ld.id == 0) - { - for (auto& wrapper : ld.inputBlobsWrappers) - { - auto cudaWrapper = wrapper.dynamicCast(); - cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); - } - } - - for (auto& wrapper : ld.outputBlobsWrappers) - { - auto cudaWrapper = wrapper.dynamicCast(); - cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); - } - } - - for (auto& layer : layers) - { - auto& ld = layer.second; - auto& layerInstance = ld.layerInstance; - - if (!layerInstance->supportBackend(DNN_BACKEND_CUDA)) - { - std::ostringstream os; - os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name - << "\" of type " << ld.type << '\n'; - CV_LOG_INFO(NULL, os.str().c_str()); - continue; - } - - /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */ - auto context = cudaInfo->context; - auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); - ld.backendNodes[DNN_BACKEND_CUDA] = node; - - auto cudaNode = node.dynamicCast(); - cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); - } - - if (blobsToKeep_.size() > 1) - { - for (const auto& pin : blobsToKeep_) - { - LayerData& ld = layers[pin.lid]; - ld.cudaD2HBackgroundTransfers.push_back(pin.oid); - } - } -#endif - } - - void allocateLayer(int lid, const LayersShapesMap& layersShapes) - { - CV_TRACE_FUNCTION(); - - LayerData &ld = layers[lid]; - - //already allocated - if (ld.flag) - return; - - size_t ninputs = ld.inputBlobsId.size(); -#if 0 - printf("layer %s:", ld.name.c_str()); - for (size_t i = 0; i < ninputs; i++) - { - int inp_lid = ld.inputBlobsId[i].lid; - LayerData &inp_ld = layers[inp_lid]; - int inp_outputs = (int)inp_ld.outputBlobs.size(); - std::cout << " " << inp_ld.name << "(" << inp_outputs; - - for( int j = 0; j < inp_outputs; j++ ) - { - std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; - } - std::cout << ")"; - } - printf("\n"); -#endif - - //determine parent layers - for (size_t i = 0; i < ninputs; i++) - ld.inputLayersId.insert(ld.inputBlobsId[i].lid); - - //allocate parents - for (set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) - allocateLayer(*i, layersShapes); - - //bind inputs - if (ld.id == 0) // DataLayer - { - ninputs = netInputLayer->inputsData.size(); - ld.inputBlobsWrappers.resize(ninputs); - for (size_t i = 0; i < ninputs; i++) - ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); - } - else - { - ld.inputBlobs.resize(ninputs); - ld.inputBlobsWrappers.resize(ninputs); - for (size_t i = 0; i < ninputs; i++) - { - LayerPin from = ld.inputBlobsId[i]; - CV_Assert(from.valid()); - CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); - ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; - ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; - } - } - - LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); - - CV_Assert(layerShapesIt != layersShapes.end()); - - if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) - ld.dtype = CV_16S; - - std::vector pinsForInternalBlobs; - blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); - ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); - for (int i = 0; i < ld.outputBlobs.size(); ++i) - ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); - - /* CUDA backend has its own system for internal blobs; we don't need these */ - ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size()); - for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) - ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); - - Ptr layerPtr = ld.getLayerInstance(); - { - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - { - inps[i] = *ld.inputBlobs[i]; - } - layerPtr->finalize(inps, ld.outputBlobs); - layerPtr->preferableTarget = preferableTarget; -#if 0 - std::cout << "\toutputs:"; - size_t noutputs = ld.outputBlobs.size(); - for (size_t j = 0; j < noutputs; j++) - { - std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; - } - std::cout << "\n"; -#endif - } - - // After allocation of layer, we decrease counters to it's input blobs. - blobManager.releaseReferences(ld.inputBlobsId); - blobManager.releaseReferences(pinsForInternalBlobs); - - ld.flag = 1; - } - -#if 0 -#define printf_(args) printf args -#else -#define printf_(args) -#endif - - void fuseLayers(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - - if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && - preferableBackend != DNN_BACKEND_CUDA && - preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) - return; - -#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return; -#endif - - // scan through all the layers. If there is convolution layer followed by the activation layer, - // we try to embed this activation into the convolution and disable separate execution of the activation - - // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)" - std::set pinsToKeep(blobsToKeep_.begin(), - blobsToKeep_.end()); - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) - { - int lid = it->first; - LayerData& ld = layers[lid]; - if( ld.skip ) - { - printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); - continue; - } - printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); - - // the optimization #1. try to fuse batch norm, scaling and/or activation layers - // with the current layer if they follow it. Normally, the are fused with the convolution layer, - // but some of them (like activation) may be fused with fully-connected, elemwise (+) and - // some other layers. - Ptr& currLayer = ld.layerInstance; - if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) - { - LayerData* nextData = &layers[ld.consumers[0].lid]; - LayerPin lpNext(ld.consumers[0].lid, 0); - while (nextData) - { -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0) - { - CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type); - break; - } -#endif - /* we use `tryFuse` member of convolution layer to fuse eltwise later - * it's not intended to be fused here; hence, we stop when we encounter eltwise - */ - if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise") - break; - Ptr nextLayer = nextData->layerInstance; - if (currLayer->tryFuse(nextLayer)) - { - printf_(("\tfused with %s\n", nextLayer->name.c_str())); - nextData->skip = true; - ld.outputBlobs = layers[lpNext.lid].outputBlobs; - ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; - if (nextData->consumers.size() == 1) - { - int nextLayerId = nextData->consumers[0].lid; - nextData = &layers[nextLayerId]; - lpNext = LayerPin(nextLayerId, 0); - } - else - { - nextData = 0; - break; - } - } - else - break; - } - - if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) - continue; // Go to the next layer. - - // TODO: OpenCL target support more fusion styles. - if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && - (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && - ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" && - ld.layerInstance->type != "Concat")) ) - continue; - - if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget) - && ld.layerInstance->type != "Convolution" - && ld.layerInstance->type != "Concat") - continue; - - while (nextData) - { - // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh - if (IS_DNN_OPENCL_TARGET(preferableTarget) && - nextData->type != "ReLU" && - nextData->type != "ChannelsPReLU" && - nextData->type != "ReLU6" && - nextData->type != "TanH" && - nextData->type != "Power") - break; - - Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); - if (nextActivLayer.empty()) - break; - - if (currLayer->setActivation(nextActivLayer)) - { - printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); - nextData->skip = true; - ld.outputBlobs = layers[lpNext.lid].outputBlobs; - ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; - if (nextData->consumers.size() == 1) - { - int nextLayerId = nextData->consumers[0].lid; - nextData = &layers[nextLayerId]; - lpNext = LayerPin(nextLayerId, 0); - } - else - { - nextData = 0; - break; - } - } - else - break; - } - - // OpenCL: fuse convolution layer followed by eltwise + relu - // CUDA: fuse convolution layer followed by eltwise (and optional activation) - while (nextData && - (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) && - ld.layerInstance->type == "Convolution" - ) // semantic of 'if' - { - Ptr nextEltwiseLayer = nextData->layerInstance.dynamicCast(); - if (nextEltwiseLayer.empty()) - break; - -#ifdef HAVE_CUDA - // CUDA backend supports fusion with eltwise sum (without variable channels) - if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) - { - // we create a temporary backend node for eltwise layer to obtain the eltwise configuration - cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init - const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers); - auto eltwiseNode = node.dynamicCast(); - - // broadcasting not supported in fused ops - auto required_shape = shape(nextData->outputBlobs[0]); - for (int i = 0; i < nextData->inputBlobs.size(); i++) - { - if (shape(*nextData->inputBlobs[i]) != required_shape) - { - eltwiseNode.reset(); - break; - } - } - - // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used. - // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors. - if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty()) - break; - } -#endif - - if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0) - break; - if (nextData->inputBlobsId.size() != 2) - break; - - if (IS_DNN_OPENCL_TARGET(preferableTarget)) - { - if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) == "sum") - { - if (nextData->params.has("coeff")) - { - DictValue paramCoeff = nextData->params.get("coeff"); - int n = paramCoeff.size(); - bool isCoeffOneOne = (n == 2); - for (int i = 0; isCoeffOneOne && i < n; i++) - { - float c = paramCoeff.get(i); - isCoeffOneOne &= (c == 1.0f); - } - if (!isCoeffOneOne) - { - CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only"); - break; - } - } - } - else - { - CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get("operation")); - break; - } - } - - { - LayerData *eltwiseData = nextData; - - // Eltwise layer has two inputs. We need to determine which - // is a base convolution layer and which could be used as it's bias. - LayerData* biasLayerData = 0; - for (int i = 0; i < 2; ++i) - { - LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid]; - CV_Assert(downLayerData); - while (downLayerData->skip) - { - if (downLayerData->inputBlobsId.size() == 1) - downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; - else - { - downLayerData = 0; - break; - } - } - if (downLayerData && ld.id == downLayerData->id) - { - biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid]; - break; - } - } - CV_Assert(biasLayerData); - { - // fuse eltwise + activation layer - // bias must already be computed to fuse => bias layer must appear before convolution - if (biasLayerData->id < ld.id) - { - /* we can fuse activation if: - * => activation layer that follows is the only consumer of eltwise output - * => activation layer does not process multiple inputs - * => we do not require to keep the output of eltwise - */ - Ptr nextFusabeleActivLayer; - if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0) - { - nextData = &layers[eltwiseData->consumers[0].lid]; - lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); - CV_Assert(nextData); - if (nextData->outputBlobs.size() == 1) - nextFusabeleActivLayer = nextData->layerInstance.dynamicCast(); - } - else - { - // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise - nextData = 0; - } - - // the requirements of OCV OpenCL backend and CUDA backend are different - // we need to check them separately; hence, the fuse variables - bool fuse_eltwise = false, fuse_activation = false; - - Ptr activ_power; - if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() && - nextData && - (!nextData->type.compare("ReLU") || - !nextData->type.compare("ChannelsPReLU") || - (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast()) && activ_power->scale == 1.0f) - ) && - currLayer->setActivation(nextFusabeleActivLayer)) - { - fuse_eltwise = true; - fuse_activation = true; - } - - if (IS_DNN_CUDA_TARGET(preferableTarget)) - { - /* supported fusion options: - * => convolution + eltwise - * => activation(convolution) + eltwise - * > convolution + activation would have been fused already; we have to fuse eltwise - * => activation(convolution + eltwise) - * > fuse eltwise and then activation - */ - auto layer = nextEltwiseLayer.staticCast(); - if (currLayer->tryFuse(layer)) - { - fuse_eltwise = true; /* eltwise was successfully fused */ - if (!nextFusabeleActivLayer.empty() && nextData) - { - if ((!nextData->type.compare("ReLU") || - !nextData->type.compare("ReLU6") || - !nextData->type.compare("Power") || - !nextData->type.compare("TanH") || - !nextData->type.compare("Sigmoid") || - !nextData->type.compare("Swish") || - !nextData->type.compare("Mish")) && - currLayer->setActivation(nextFusabeleActivLayer)) - { - // activation was fused - fuse_activation = true; - } - } - } - } - - CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */ - if(fuse_eltwise && fuse_activation) - { - CV_Assert(nextData); - CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); - ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); - printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str())); - eltwiseData->skip = true; - nextData->skip = true; - // This optimization for cases like - // some_layer conv - // | | - // +-- eltwise --+ - // | - // activ - // This way all the element-wise computations - // (i.e. some_layer+conv or some_layer*conv) - // would be done at [conv] layer. So we need to - // replace [conv]'s output blob to [eltwise]'s one - // considering that [activ] is an in-place layer. - // Also we need to move all the consumers' references. - // To prevent memory collisions (i.e. when input of - // [conv] and output of [eltwise] is the same blob) - // we allocate a new blob. - CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); - ld.outputBlobs[0] = ld.outputBlobs[0].clone(); - ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); - - eltwiseData->outputBlobs = ld.outputBlobs; - nextData->outputBlobs = ld.outputBlobs; - eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; - nextData->outputBlobsWrappers = ld.outputBlobsWrappers; - - // Move references of [activ] layer consumers to the newly allocated blob. - for (int i = 0; i < nextData->consumers.size(); ++i) - { - LayerData& consumer = layers[nextData->consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); ++j) - { - if (consumer.inputBlobsId[j].lid == lpNext.lid) - { - consumer.inputBlobs[j] = &ld.outputBlobs[0]; - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } - else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise) - { - CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); - CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); - ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); - eltwiseData->skip = true; - // This optimization is for cases like - // some_layer conv (maybe fused with activ) - // | | - // +-- eltwise --+ - // - // This way all the element-wise computations - // (i.e. some_layer+conv or some_layer*conv) - // would be done at [conv] layer. So we need to - // replace [conv]'s output blob to [eltwise]'s one. - // Also we need to move all the consumers' references. - // To prevent memory collisions (i.e. when input of - // [conv] and output of [eltwise] is the same blob) - // we allocate a new blob. - CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); - ld.outputBlobs[0] = ld.outputBlobs[0].clone(); - ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); - - eltwiseData->outputBlobs = ld.outputBlobs; - eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; - - // Move references of [eltwise] layer consumers to the newly allocated blob. - for (int i = 0; i < eltwiseData->consumers.size(); ++i) - { - LayerData& consumer = layers[eltwiseData->consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); ++j) - { - if (consumer.inputBlobsId[j].lid == eltwiseData->id) - { - consumer.inputBlobs[j] = &ld.outputBlobs[0]; - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } - } - } - } - - break; - } - } - - if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) - continue; // Go to the next layer. - - // the optimization #2. if there is concat layer that concatenates channels - // from the inputs together (i.e. axis == 1) then we make the inputs of - // the concat layer to write to the concatenation output buffer - // (and so we eliminate the concatenation layer, because the channels - // are concatenated implicitly). - Ptr concatLayer = ld.layerInstance.dynamicCast(); - if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 ) - { - Mat& output = ld.outputBlobs[0]; - UMat umat_output; -#ifdef HAVE_OPENCL - if (!ld.outputBlobsWrappers.empty() && - (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))) - { - size_t i, ninputs = ld.inputBlobsId.size(); - bool conv_layer = true; - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = ld.inputBlobsId[i]; - LayerData* inp_i_data = &layers[pin.lid]; - while(inp_i_data->skip && - inp_i_data->inputBlobsId.size() == 1 && - inp_i_data->consumers.size() == 1) - { - pin = inp_i_data->inputBlobsId[0]; - inp_i_data = &layers[pin.lid]; - } - conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution"); - } - if (!conv_layer) - continue; - std::vector umat_outputBlobs; - umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - umat_output = umat_outputBlobs[0]; - } -#endif - - // TODO: in general, this optimization can always be done, but - // many layers currently check that the input/output blobs are - // continuous arrays. Unfortunately, this is not true when - // the concatenation optimization is applied with batch_size > 1. - // so, for now, we only apply this optimization in the most popular - // case batch_size == 1. - int axis = normalize_axis(concatLayer->axis, output.dims); - if( output.total(0, axis) == 1 ) - { - size_t i, ninputs = ld.inputBlobsId.size(); - std::vector realinputs(ninputs); - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = ld.inputBlobsId[i]; - LayerData* inp_i_data = &layers[pin.lid]; - while(inp_i_data->skip && - inp_i_data->inputBlobsId.size() == 1 && - inp_i_data->consumers.size() == 1) - { - pin = inp_i_data->inputBlobsId[0]; - inp_i_data = &layers[pin.lid]; - } - printf_(("\treal input for %s is %s\n", - layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), - inp_i_data->getLayerInstance()->name.c_str())); - - if(inp_i_data->skip || inp_i_data->consumers.size() != 1) - break; -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA && - (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false || - (inp_i_data->layerInstance->type != "Convolution" && - inp_i_data->layerInstance->type != "Pooling" && - inp_i_data->layerInstance->type != "Resize" && - inp_i_data->layerInstance->type != "Flatten" && - inp_i_data->layerInstance->type != "Permute" && - inp_i_data->layerInstance->type != "Reorg" && - inp_i_data->layerInstance->type != "Eltwise" && - inp_i_data->layerInstance.dynamicCast().empty()))) - { - break; - } -#endif - realinputs[i] = pin; - } - - if( i >= ninputs ) - { - // Allocate new memory to prevent collisions during memory - // reusing (see https://github.com/opencv/opencv/pull/10456). - output = output.clone(); -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && - IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umats(1); - umat_output = umat_output.clone(); - umats[0] = umat_output; - OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats); - } -#endif - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - ld.outputBlobsWrappers[0] = wrap(output); -#endif - std::vector chrange(output.dims, Range::all()); - int ofs = 0; - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = realinputs[i]; - LayerData* inp_i_data = &layers[pin.lid]; - int channels_i = ld.inputBlobs[i]->size[axis]; - chrange[axis] = Range(ofs, ofs + channels_i); - printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), - pin.oid, ofs, ofs + channels_i)); - ofs += channels_i; - Mat output_slice = output(chrange); - Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; - CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); - Mat* oldPtr = &curr_output; - curr_output = output_slice; -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umats(inp_i_data->outputBlobsWrappers.size()); - umats[pin.oid] = umat_output(chrange); - OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats); - } -#endif -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - { - auto cuda_wrapper = wrap(output).dynamicCast(); - auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims); - auto new_shape = shape(output_slice); - cuda_wrapper->update(new_shape, offset); - inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast(); - } -#endif - // Layers that refer old input Mat will refer to the - // new data but the same Mat object. - CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output); - } - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - { - for (int i = 0; i < ld.consumers.size(); i++) - { - LayerData& consumer = layers[ld.consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); j++) - { - if (consumer.inputBlobsId[j].lid == ld.id) - { - CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data); - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } -#endif - ld.skip = true; - printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); - } - } - } - } - } - - void allocateLayers(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - it->second.flag = 0; - - CV_Assert(!layers[0].outputBlobs.empty()); - ShapesVec inputShapes; - for(int i = 0; i < layers[0].outputBlobs.size(); i++) - { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); - if (preferableBackend == DNN_BACKEND_OPENCV && - preferableTarget == DNN_TARGET_OPENCL_FP16 && - layers[0].dtype == CV_32F) - { - layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); - } - inputShapes.push_back(shape(inp)); - } - LayersShapesMap layersShapes; - getLayersShapes(inputShapes, layersShapes); - - blobManager.reset(); - backendWrappers.clear(); - - for(auto& layer : layers) - { - auto& ld = layer.second; - ld.inputBlobsWrappers.clear(); - ld.outputBlobsWrappers.clear(); - ld.internalBlobsWrappers.clear(); - } - - // Fake references to input blobs. - for (int i = 0; i < layers[0].outputBlobs.size(); ++i) - blobManager.addReference(LayerPin(0, i)); - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) - { - const LayerData& ld = it->second; - blobManager.addReferences(ld.inputBlobsId); - } - - for (int i = 0; i < blobsToKeep_.size(); i++) - { - blobManager.addReference(blobsToKeep_[i]); - } - - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) - { - int lid = it->first; - allocateLayer(lid, layersShapes); - } - - layersTimings.resize(lastLayerId + 1, 0); - fuseLayers(blobsToKeep_); - } - - void forwardLayer(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - Ptr layer = ld.layerInstance; - - if( !ld.skip ) - { - TickMeter tm; - tm.start(); - -#ifndef HAVE_VULKAN - std::map >::const_iterator it = ld.backendNodes.find(preferableBackend); -#else - std::map >::iterator it = ld.backendNodes.find(preferableBackend); -#endif - if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) - { - if (isAsync) - CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); - - if (!layer->supportBackend(DNN_BACKEND_OPENCV)) - CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", - ld.name.c_str(), ld.type.c_str())); - -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); - std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); - layer->forward(umat_inputBlobs, - umat_outputBlobs, - umat_internalBlobs); - if (DNN_CHECK_NAN_INF) - { - bool fail = false; - for (size_t i = 0; i < umat_outputBlobs.size(); ++i) - { - UMat& u = umat_outputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - if (!checkRange(m)) - { - std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - else if (!checkRange(m, true, NULL, -1e6, 1e6)) - { - std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - } - if (fail) - { - for (size_t i = 0; i < umat_inputBlobs.size(); ++i) - { - UMat& u = umat_inputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < umat_outputBlobs.size(); ++i) - { - UMat& u = umat_outputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < umat_internalBlobs.size(); ++i) - { - UMat& u = umat_internalBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; - } - if (DNN_CHECK_NAN_INF_RAISE_ERROR) - CV_Assert(!fail); - } - } - OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); - } - else -#endif - { - for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) - { - if (!ld.inputBlobsWrappers[i].empty()) - ld.inputBlobsWrappers[i]->copyToHost(); - } - - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - { - inps[i] = *ld.inputBlobs[i]; - } - layer->forward(inps, ld.outputBlobs, ld.internals); - - if (DNN_CHECK_NAN_INF) - { - bool fail = false; - for (size_t i = 0; i < ld.outputBlobs.size(); ++i) - { - const Mat& m = ld.outputBlobs[i]; - if (!checkRange(m)) - { - std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - else if (!checkRange(m, true, NULL, -1e6, 1e6)) - { - std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - } - if (fail) - { - for (size_t i = 0; i < ld.inputBlobs.size(); ++i) - { - const Mat* pM = ld.inputBlobs[i]; - if (!pM) - { - std::cout << "INPUT " << i << " is NULL" << std::endl; - continue; - } - const Mat& m = *pM; - std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < ld.outputBlobs.size(); ++i) - { - const Mat& m = ld.outputBlobs[i]; - std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < ld.internals.size(); ++i) - { - const Mat& m = ld.internals[i]; - std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - if (DNN_CHECK_NAN_INF_RAISE_ERROR) - CV_Assert(!fail); - } - } - - for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) - { - if (!ld.outputBlobsWrappers[i].empty()) - ld.outputBlobsWrappers[i]->setHostDirty(); - } - } - } - else - { - Ptr node = it->second; - CV_Assert(!node.empty()); - if (preferableBackend == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); - -#ifdef HAVE_CUDA - Ptr cudaNode = node.dynamicCast(); - CV_Assert(!cudaNode.empty()); - - cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace); - - for (auto id : ld.cudaD2HBackgroundTransfers) - { - auto wrapper = ld.outputBlobsWrappers[id].dynamicCast(); - wrapper->copyToHostInBackground(); - } -#endif - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - { - forwardHalide(ld.outputBlobsWrappers, node); - } -#ifdef HAVE_INF_ENGINE - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - forwardNgraph(ld.outputBlobsWrappers, node, isAsync); - } -#endif - else if (preferableBackend == DNN_BACKEND_WEBNN) - { - forwardWebnn(ld.outputBlobsWrappers, node, isAsync); - } -#ifdef HAVE_VULKAN - else if (preferableBackend == DNN_BACKEND_VKCOM) - { - try - { - forwardVkCom(ld.outputBlobsWrappers, node); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); - it->second = Ptr(); - forwardLayer(ld); - } - } -#endif - else - { - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - } - - tm.stop(); - int64 t = tm.getTimeTicks(); - layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only - } - else - { - layersTimings[ld.id] = 0; - } - - ld.flag = 1; - } - - void forwardToLayer(LayerData &ld, bool clearFlags = true) - { - CV_TRACE_FUNCTION(); - - if (clearFlags) - { - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - it->second.flag = 0; - } - - //already was forwarded - if (ld.flag) - return; - - //forward parents - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) - { - LayerData &ld = it->second; - if (ld.flag) - continue; - forwardLayer(ld); - } - - //forward itself - forwardLayer(ld); - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - cudaInfo->context.stream.synchronize(); -#endif - } - - void getQuantizationParams(const Mat& src, std::vector& scales, std::vector& zeropoints) - { - const int qmin = -128; // INT8_MIN - const int qmax = 127; // INT8_MAX - - double rmin, rmax, sc, zp; - cv::minMaxIdx(src, &rmin, &rmax); - - // 0 must be present in the range [rmin, rmax] - rmin = std::min(rmin, 0.0); - rmax = std::max(rmax, 0.0); - - sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); - zp = qmin - (rmin/sc); - - scales.push_back((float)sc); - zeropoints.push_back((int)std::round(zp)); - } - - void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) - { - CV_CheckGE(id, 0, ""); - CV_CheckLT(id, (int)layers.size(), ""); - LayerData& layerData = layers[id]; - std::vector& inputLayerIds = layerData.inputBlobsId; - LayerShapes& layerShapes = inOutShapes[id]; - - if (id == 0 && layerShapes.in[0].empty()) - { - if (!layerData.outputBlobs.empty()) - { - ShapesVec shapes; - for (int i = 0; i < layerData.outputBlobs.size(); i++) - { - Mat& inp = layerData.outputBlobs[i]; - CV_Assert(!inp.empty()); - shapes.push_back(shape(inp)); - } - layerShapes.in = shapes; - } - else - { - const std::vector& inputShapes = netInputLayer->shapes; - bool none = true; - for (size_t i = 0; i < inputShapes.size(); i++) - { - if (!inputShapes[i].empty()) - { - none = false; - break; - } - } - if (none) - { - layerShapes.out.clear(); - return; - } - else - { - layerShapes.in = inputShapes; - } - } - } - - if (layerShapes.in.empty()) - { - for(int i = 0; i < inputLayerIds.size(); i++) - { - int layerId = inputLayerIds[i].lid; - LayersShapesMap::const_iterator it = - inOutShapes.find(layerId); - if(it == inOutShapes.end() || - it->second.out.empty()) - { - getLayerShapesRecursively(layerId, inOutShapes); - } - const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; - layerShapes.in.push_back(shape); - } - } - const ShapesVec& is = layerShapes.in; - ShapesVec& os = layerShapes.out; - ShapesVec& ints = layerShapes.internal; - int requiredOutputs = layerData.requiredOutputs.size(); - Ptr l = layerData.getLayerInstance(); - CV_Assert(l); - bool layerSupportInPlace = false; - try - { - layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << - " inputs=" << is.size() << - " outputs=" << os.size() << "/" << requiredOutputs << - " blobs=" << l->blobs.size()); - for (size_t i = 0; i < is.size(); ++i) - { - CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); - } - for (size_t i = 0; i < os.size(); ++i) - { - CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); - } - for (size_t i = 0; i < l->blobs.size(); ++i) - { - CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); - } - CV_LOG_ERROR(NULL, "Exception message: " << e.what()); - throw; - } - layerShapes.supportInPlace = layerSupportInPlace; - - try - { - for (int i = 0; i < ints.size(); i++) - CV_CheckGT(total(ints[i]), 0, ""); - - for (int i = 0; i < os.size(); i++) - CV_CheckGT(total(os[i]), 0, ""); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << - " inputs=" << is.size() << - " outputs=" << os.size() << "/" << requiredOutputs << - " blobs=" << l->blobs.size() << - " inplace=" << layerSupportInPlace); - for (size_t i = 0; i < is.size(); ++i) - { - CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); - } - for (size_t i = 0; i < os.size(); ++i) - { - CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); - } - for (size_t i = 0; i < l->blobs.size(); ++i) - { - CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); - } - CV_LOG_ERROR(NULL, "Exception message: " << e.what()); - throw; - } - } - - void getLayersShapes(const ShapesVec& netInputShapes, - LayersShapesMap& inOutShapes) - { - inOutShapes.clear(); - - inOutShapes[0].in = netInputShapes; //insert shape for first input layer - for (MapIdToLayerData::const_iterator it = layers.begin(); - it != layers.end(); it++) - { - getLayerShapesRecursively(it->first, inOutShapes); - } - } - - void getLayerShapes(const ShapesVec& netInputShapes, - const int layerId, - LayerShapes& shapes) - { - LayersShapesMap inOutShapes; - inOutShapes[0].in = netInputShapes; //insert shape for first input layer - getLayerShapesRecursively(layerId, inOutShapes); - shapes = inOutShapes[layerId]; - } - - void updateLayersShapes() - { - CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); - CV_Assert(netInputLayer); - DataLayer& inputLayer = *netInputLayer; - LayerData& inputLayerData = layers[0]; - CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); - CV_Assert(!inputLayerData.outputBlobs.empty()); - ShapesVec inputShapes; - for(int i = 0; i < inputLayerData.outputBlobs.size(); i++) - { - Mat& inp = inputLayerData.outputBlobs[i]; - CV_Assert(!inp.empty()); - if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation - preferableTarget == DNN_TARGET_OPENCL_FP16 && - inputLayerData.dtype == CV_32F) - { - inp.create(inp.dims, inp.size, CV_16S); - } - inputShapes.push_back(shape(inp)); - } - CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); - LayersShapesMap layersShapes; - layersShapes[0].in = inputShapes; - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - { - int layerId = it->first; - LayerData& layerData = it->second; - const std::vector& inputLayerIds = layerData.inputBlobsId; - LayerShapes& layerShapes = layersShapes[layerId]; - CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); - if (layerShapes.in.empty()) - { - for(int i = 0; i < inputLayerIds.size(); i++) - { - const LayerPin& inputPin = inputLayerIds[i]; - int inputLayerId = inputPin.lid; - CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); - LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); - if (inputIt == layersShapes.end() || inputIt->second.out.empty()) - { - getLayerShapesRecursively(inputLayerId, layersShapes); - } - const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; - layerShapes.in.push_back(shape); - } - layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in); - } - CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); - CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); - CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); - } - CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); - } - - LayerPin getLatestLayerPin(const std::vector& pins) const - { - return *std::max_element(pins.begin(), pins.end()); - } - - Mat getBlob(const LayerPin& pin) const - { - CV_TRACE_FUNCTION(); - - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - - MapIdToLayerData::const_iterator it = layers.find(pin.lid); - if (it == layers.end()) - CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); - - const LayerData &ld = it->second; - if ((size_t)pin.oid >= ld.outputBlobs.size()) - { - CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " - "the #%d was requested", ld.name.c_str(), - ld.outputBlobs.size(), pin.oid)); - } - if (preferableTarget != DNN_TARGET_CPU) - { - CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); - // Transfer data to CPU if it's require. - ld.outputBlobsWrappers[pin.oid]->copyToHost(); - } - - if (ld.outputBlobs[pin.oid].depth() == CV_16S) - { - Mat output_blob; - convertFp16(ld.outputBlobs[pin.oid], output_blob); - return output_blob; - } - else - return ld.outputBlobs[pin.oid]; - } - - Mat getBlob(String outputName) const - { - return getBlob(getPinByAlias(outputName)); - } - -#ifdef CV_CXX11 - AsyncArray getBlobAsync(const LayerPin& pin) - { - CV_TRACE_FUNCTION(); -#ifdef HAVE_INF_ENGINE - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - - LayerData &ld = layers[pin.lid]; - if ((size_t)pin.oid >= ld.outputBlobs.size()) - { - CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " - "the #%d was requested", ld.name.c_str(), - (int)ld.outputBlobs.size(), (int)pin.oid)); - } - if (preferableTarget != DNN_TARGET_CPU) - { - CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); - // Transfer data to CPU if it's require. - ld.outputBlobsWrappers[pin.oid]->copyToHost(); - } - CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - - Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); - return std::move(wrapper->futureMat); -#else - CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); -#endif // HAVE_INF_ENGINE - } - - AsyncArray getBlobAsync(String outputName) - { - return getBlobAsync(getPinByAlias(outputName)); - } -#endif // CV_CXX11 - -#ifdef HAVE_INF_ENGINE - static - Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); -#endif - - string dump() const; - - void dumpNetworkToFile() const - { -#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP - string dumpFileNameBase = getDumpFileNameBase(); - string dumpFileName = dumpFileNameBase + ".dot"; - try - { - string dumpStr = dump(); - std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary); - out << dumpStr; - } - catch (const std::exception& e) - { - std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); - out << "Exception: " << e.what() << std::endl; - } - catch (...) - { - std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); - out << "Can't dump: unknown exception" << std::endl; - } -#endif - } -}; - -Net::Net() : impl(new Net::Impl) -{ -} - -#ifdef HAVE_INF_ENGINE -/*static*/ -Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) -{ - CV_TRACE_FUNCTION(); - - CV_TRACE_REGION("register_inputs"); - - std::vector inputsNames; - std::vector inp_shapes; - for (auto& it : ieNet.getInputsInfo()) - { - inputsNames.push_back(it.first); - std::vector dims = it.second->getTensorDesc().getDims(); - inp_shapes.push_back(std::vector(dims.begin(), dims.end())); - } - - Net cvNet; - cvNet.setInputsNames(inputsNames); - - // set empty input to determine input shapes - for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id) - { - cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]); - } - - CV_TRACE_REGION_NEXT("backendNode"); - - Ptr backendNode; - { - auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape{}); - Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); - backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); - backendNode = backendNodeNGraph; - } - - CV_TRACE_REGION_NEXT("register_outputs"); - - auto ngraphFunction = ieNet.getFunction(); - CV_Assert(ngraphFunction); - std::vector< std::shared_ptr > ngraphOperations = ngraphFunction->get_ops(); - - for (auto& it : ieNet.getOutputsInfo()) - { - CV_TRACE_REGION("output"); - const auto& outputName = it.first; - - LayerParams lp; - int lid = cvNet.addLayer(it.first, "", lp); - - LayerData& ld = cvNet.impl->layers[lid]; - - { - Ptr cvLayer(new NgraphBackendLayer(ieNet)); - cvLayer->name = outputName; - cvLayer->type = "_unknown_"; - - auto process_layer = [&](const std::string& name) -> bool - { - CV_TRACE_REGION("ngraph_function"); - for (const auto& op : ngraphOperations) - { - CV_Assert(op); - if (op->get_friendly_name() == name) - { - const std::string typeName = op->get_type_info().name; - cvLayer->type = typeName; - return true; - } - } - return false; - }; - - bool found = process_layer(outputName); - if (!found) - { - auto pos = outputName.rfind('.'); // cut port number: ".0" - if (pos != std::string::npos) - { - std::string layerName = outputName.substr(0, pos); - found = process_layer(layerName); - } - } - if (!found) - CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'"); - - ld.layerInstance = cvLayer; - ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; - } - - for (int i = 0; i < inputsNames.size(); ++i) - cvNet.connect(0, i, lid, i); - } - - CV_TRACE_REGION_NEXT("finalize"); - - cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - - cvNet.impl->skipInfEngineInit = true; - return cvNet; -} -#endif // HAVE_INF_ENGINE - -Net Net::readFromModelOptimizer(const String& xml, const String& bin) -{ - CV_TRACE_FUNCTION(); -#ifndef HAVE_INF_ENGINE - CV_UNUSED(xml); CV_UNUSED(bin); - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - InferenceEngine::Core& ie = getCore(""); - InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); - - return Impl::createNetworkFromModelOptimizer(ieNet); -#endif // HAVE_INF_ENGINE -} - -Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!bufferModelConfig.empty()); - CV_Assert(!bufferWeights.empty()); - return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), - bufferWeights.data(), bufferWeights.size()); -} - -Net Net::readFromModelOptimizer( - const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, - const uchar* bufferWeightsPtr, size_t bufferWeightsSize -) -{ - CV_TRACE_FUNCTION(); -#ifndef HAVE_INF_ENGINE - CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); - CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - InferenceEngine::Core& ie = getCore(""); - - std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); - - InferenceEngine::CNNNetwork ieNet; - try - { - InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); - InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); - - ieNet = ie.ReadNetwork(model, weights_blob); - } - catch (const std::exception& e) - { - CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); - } - - return Impl::createNetworkFromModelOptimizer(ieNet); -#endif // HAVE_INF_ENGINE -} - - -Net::~Net() -{ -} - -int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - return impl->addLayer(name, type, dtype, params); -} - -int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - return addLayer(name, type, CV_32F, params); -} - -int Net::addLayerToPrev(const String &name, const String &type, const int &dtype, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - - int prvLid = impl->lastLayerId; - int newLid = this->addLayer(name, type, dtype, params); - this->connect(prvLid, 0, newLid, 0); - return newLid; -} - -int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - return addLayerToPrev(name, type, CV_32F, params); -} - -void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) -{ - CV_TRACE_FUNCTION(); - - impl->connect(outLayerId, outNum, inpLayerId, inpNum); -} - -void Net::connect(String _outPin, String _inPin) -{ - CV_TRACE_FUNCTION(); - - LayerPin outPin = impl->getPinByAlias(_outPin); - LayerPin inpPin = impl->getPinByAlias(_inPin); - - CV_Assert(outPin.valid() && inpPin.valid()); - - impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); -} - -int Net::registerOutput(const std::string& outputName, int layerId, int outputPort) -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - return impl->registerOutput(outputName, layerId, outputPort); -} - -Mat Net::forward(const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - impl->forwardToLayer(impl->getLayerData(layerName)); - - return impl->getBlob(layerName); -} - -AsyncArray Net::forwardAsync(const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - -#ifdef CV_CXX11 - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - - if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); - - impl->isAsync = true; - impl->forwardToLayer(impl->getLayerData(layerName)); - impl->isAsync = false; - - return impl->getBlobAsync(layerName); -#else - CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11"); -#endif // CV_CXX11 -} - -void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - impl->forwardToLayer(impl->getLayerData(layerName)); - - LayerPin pin = impl->getPinByAlias(layerName); - LayerData &ld = impl->layers[pin.lid]; - - if (outputBlobs.isUMat()) - { - impl->getBlob(layerName).copyTo(outputBlobs); - } - else if (outputBlobs.isMat()) - { - outputBlobs.assign(impl->getBlob(layerName)); - } - else if (outputBlobs.isMatVector()) - { - if (impl->preferableTarget != DNN_TARGET_CPU) - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - CV_Assert(!ld.outputBlobsWrappers[i].empty()); - ld.outputBlobsWrappers[i]->copyToHost(); - } - } - if (ld.outputBlobs[0].depth() == CV_16S) - { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec.resize(ld.outputBlobs.size()); - for (int i = 0; i < outputvec.size(); i++) - convertFp16(ld.outputBlobs[i], outputvec[i]); - } - else - { - // Output depth can be CV_32F or CV_8S - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec = ld.outputBlobs; - } - } - else if (outputBlobs.isUMatVector()) - { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - -#ifdef HAVE_OPENCL - if (impl->preferableBackend == DNN_BACKEND_OPENCV && - IS_DNN_OPENCL_TARGET(impl->preferableTarget)) - { - if (impl->preferableTarget == DNN_TARGET_OPENCL) - outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16) - { - std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - outputvec.resize(out_vec.size()); - for (int i = 0; i < out_vec.size(); i++) - convertFp16(out_vec[i], outputvec[i]); - } - } - else -#endif - { - outputvec.resize(ld.outputBlobs.size()); - for (int i = 0; i < outputvec.size(); ++i) - ld.outputBlobs[i].copyTo(outputvec[i]); - } - } -} - -void Net::forward(OutputArrayOfArrays outputBlobs, - const std::vector& outBlobNames) -{ - CV_TRACE_FUNCTION(); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - std::vector pins; - for (int i = 0; i < outBlobNames.size(); i++) - { - pins.push_back(impl->getPinByAlias(outBlobNames[i])); - } - - impl->setUpNet(pins); - - LayerPin out = impl->getLatestLayerPin(pins); - - impl->forwardToLayer(impl->getLayerData(out.lid)); - - std::vector matvec; - for (int i = 0; i < pins.size(); i++) - { - matvec.push_back(impl->getBlob(pins[i])); - } - - outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector - outputBlobs.assign(matvec); -} - -void Net::forward(std::vector >& outputBlobs, - const std::vector& outBlobNames) -{ - CV_TRACE_FUNCTION(); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - std::vector pins; - for (int i = 0; i < outBlobNames.size(); i++) - { - pins.push_back(impl->getPinByAlias(outBlobNames[i])); - } - - impl->setUpNet(pins); - - LayerPin out = impl->getLatestLayerPin(pins); - - impl->forwardToLayer(impl->getLayerData(out.lid)); - - outputBlobs.resize(outBlobNames.size()); - for (int i = 0; i < outBlobNames.size(); i++) - { - std::vector lp = impl->getLayerOutPins(outBlobNames[i]); - outputBlobs[i].resize(lp.size()); - for (int j = 0; j < lp.size(); j++) - { - outputBlobs[i][j] = impl->getBlob(lp[j]); - } - } -} - -Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) -{ - CV_TRACE_FUNCTION(); - - // Net can be quantized only once. - if (impl->netWasQuantized) - CV_Error(Error::StsBadArg, "Cannot quantize a quantized net"); - - CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S"); - CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S"); - - bool originalFusion = impl->fusion; - int prefBackend = impl->preferableBackend; - int prefTarget = impl->preferableTarget; - - // Disable fusions and use CPU backend to quantize net - setPreferableBackend(DNN_BACKEND_OPENCV); - setPreferableTarget(DNN_TARGET_CPU); - enableFusion(false); - - if (calibData.isMat()) - { - setInput(calibData.getMat()); - } - else if (calibData.isMatVector()) - { - std::vector calibDataVec; - calibData.getMatVector(calibDataVec); - - std::vector inpNames = impl->netInputLayer->outNames; - CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs"); - for (int i = 0; i < calibDataVec.size(); i++) - setInput(calibDataVec[i], inpNames[i]); - } - - std::vector outNames = getUnconnectedOutLayersNames(); - std::vector pins; - for (int i = 0; i < outNames.size(); i++) - pins.push_back(impl->getPinByAlias(outNames[i])); - impl->setUpNet(pins); - - // Compute scales and zeropoints for all the layers - std::vector > scales; - std::vector > zeropoints; - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) - { - LayerData& ld = it->second; - if (!ld.skip) - { - Ptr layer = ld.layerInstance; - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - inps[i] = *ld.inputBlobs[i]; - layer->forward(inps, ld.outputBlobs, ld.internals); - } - - std::vector sc; - std::vector zp; - if (ld.type == "TanH") - { - sc.push_back(1.f/128); - zp.push_back(0); - } - else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax") - { - if (ld.params.get("log_softmax", false)) - { - sc.push_back(16.f/256); - zp.push_back(127); - } - else - { - sc.push_back(1.f/256); - zp.push_back(-128); - } - } - else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop") - { - std::vector inp_sc; std::vector inp_zp; - impl->getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp); - sc.assign(ld.outputBlobs.size(), inp_sc[0]); - zp.assign(ld.outputBlobs.size(), inp_zp[0]); - } - else - { - for (int i = 0; i < ld.outputBlobs.size(); i++) - impl->getQuantizationParams(ld.outputBlobs[i], sc, zp); - } - scales.push_back(sc); - zeropoints.push_back(zp); - } - - // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs - // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints - // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer - for (Impl::MapIdToLayerData::reverse_iterator it = impl->layers.rbegin(); it != impl->layers.rend(); ++it) - { - LayerData& ld = it->second; - // Layers with multiple outputs. Number of outputs is equal to number of inputs - if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || - ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || - ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || - (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) - { - for (int i = 0; i < ld.outputBlobs.size(); i++) - { - LayerPin &pin = ld.inputBlobsId[i]; - scales[pin.lid][pin.oid] = scales[ld.id][i]; - zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; - } - } - // Layers with multiple inputs and single output. - else if ((ld.type == "Pooling" && toLowerCase(ld.params.get("pool", "max")) == "max") /* Max Pooling */ || - (ld.type == "Eltwise" && toLowerCase(ld.params.get("operation", "sum")) == "max") /* Elementwise max */ || - ld.type == "Concat") - { - for (int i = 0; i < ld.inputBlobsId.size(); i++) - { - LayerPin &pin = ld.inputBlobsId[i]; - scales[pin.lid][pin.oid] = scales[ld.id][0]; - zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; - } - } - } - - // Create a new Net and add quantized layers to it. - Net dstNet; - dstNet.impl->netWasQuantized = true; - dstNet.setInputsNames(impl->netInputLayer->outNames); - dstNet.setPreferableBackend(prefBackend); - dstNet.setPreferableTarget(prefTarget); - dstNet.enableFusion(originalFusion); - - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) - { - LayerData ld = it->second; - if (ld.id == 0) - { - LayerData &quantInpLd = dstNet.impl->layers[0]; - quantInpLd.dtype = inputsDtype; - quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); - quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); - continue; - } - - std::vector inpPins = ld.inputBlobsId; - // Fill input and output scales/zeropoints for the layer - std::vector > inp_out_sc(2); - std::vector > inp_out_zp(2); - for (int i = 0; i < inpPins.size(); i++) - { - LayerPin &pin = inpPins[i]; - inp_out_sc[0].push_back(scales[pin.lid][pin.oid]); - inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]); - } - inp_out_sc[1] = scales[ld.id]; - inp_out_zp[1] = zeropoints[ld.id]; - - // Quantize layer - Ptr layer = ld.layerInstance; - if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params)) - { - ld.type += "Int8"; - ld.dtype = CV_8S; - } - ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size())); - ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size())); - - // Check and add quantize/dequantize node before layer - for (int i = 0; i < inpPins.size(); i++) - { - LayerPin &pin = inpPins[i]; - LayerData &inpLd = dstNet.impl->getLayerData(impl->getLayerName(pin.lid)); - pin.lid = inpLd.id; - if (inpLd.dtype != ld.dtype) - { - String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid) - : cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid); - // Check if quantize/dequantize node for the input layer already exists - if (dstNet.impl->getLayerId(layerName) >= 0) - { - pin.lid = dstNet.impl->getLayerId(layerName); - pin.oid = 0; - } - else - { - LayerParams lp; - lp.set("scales", inp_out_sc[0][i]); - lp.set("zeropoints", inp_out_zp[0][i]); - lp.name = layerName; - lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize"; - int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp); - dstNet.connect(pin.lid, pin.oid, newLid, 0); - pin.lid = newLid; pin.oid = 0; - } - } - } - - // Add quantized layer to Net and connect to its inputs. - int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params); - for( int i = 0; i < inpPins.size(); i++ ) - dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i); - - // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. - if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) - { - LayerParams lp; - lp.set("scales", inp_out_sc[1][0]); - lp.set("zeropoints", inp_out_zp[1][0]); - lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name; - lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize"; - dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp); - } - } - // Restore FP32 Net's backend, target and fusion - setPreferableBackend(prefBackend); - setPreferableTarget(prefTarget); - enableFusion(originalFusion); - return dstNet; -} - -void Net::getInputDetails(std::vector& scales, std::vector& zeropoints) const -{ - if (!impl->netWasQuantized) - CV_Error(Error::StsBadFunc, "Net isn't quantized"); - - LayerParams &lp = impl->layers[0].params; - DictValue sc = lp.get("scales"); - DictValue zp = lp.get("zeropoints"); - - for (int i = 0; i < sc.size(); i++) - { - scales.push_back(sc.get(i)); - zeropoints.push_back(zp.get(i)); - } -} - -void Net::getOutputDetails(std::vector& scales, std::vector& zeropoints) const -{ - if (!impl->netWasQuantized) - CV_Error(Error::StsBadFunc, "Net isn't quantized"); - - std::vector outLayerIds = getUnconnectedOutLayers(); - for (auto &lid : outLayerIds) - { - LayerParams &lp = impl->layers[lid].params; - DictValue sc = lp.get("scales"); - DictValue zp = lp.get("zeropoints"); - - for (int i = 0; i < sc.size(); i++) - { - scales.push_back(sc.get(i)); - zeropoints.push_back(zp.get(i)); - } - } -} - -void Net::setPreferableBackend(int backendId) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG(backendId); - - if (backendId == DNN_BACKEND_DEFAULT) - backendId = (Backend)PARAM_DNN_BACKEND_DEFAULT; - - if (impl->netWasQuantized && backendId != DNN_BACKEND_OPENCV) - { - CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks"); - backendId = DNN_BACKEND_OPENCV; - } - -#ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE) - backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; -#endif - - if( impl->preferableBackend != backendId ) - { - impl->preferableBackend = backendId; - impl->clear(); - } -} - -void Net::setPreferableTarget(int targetId) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG(targetId); - - if (impl->netWasQuantized && targetId != DNN_TARGET_CPU && - targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) - { - CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks"); - targetId = DNN_TARGET_CPU; - } - - if( impl->preferableTarget != targetId ) - { - impl->preferableTarget = targetId; - if (IS_DNN_OPENCL_TARGET(targetId)) - { -#ifndef HAVE_OPENCL -#ifdef HAVE_INF_ENGINE - if (impl->preferableBackend == DNN_BACKEND_OPENCV) -#else - if (impl->preferableBackend == DNN_BACKEND_DEFAULT || - impl->preferableBackend == DNN_BACKEND_OPENCV) -#endif // HAVE_INF_ENGINE - impl->preferableTarget = DNN_TARGET_CPU; -#else - bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16"); - if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16) - impl->preferableTarget = DNN_TARGET_OPENCL; -#endif - } - impl->clear(); - } -} - -void Net::setInputsNames(const std::vector &inputBlobNames) -{ - CV_TRACE_FUNCTION(); - - impl->netInputLayer->setNames(inputBlobNames); -} - -void Net::setInputShape(const String &inputName, const MatShape& shape) -{ - CV_TRACE_FUNCTION(); - - impl->netInputLayer->setInputShape(inputName, shape); -} - -void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - LayerPin pin; - pin.lid = 0; - pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name); - - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); - - Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff - MatShape blobShape = shape(blob_); - - if (pin.lid == 0) - { - CV_Assert(!impl->netInputLayer.empty()); - const DataLayer& netInputLayer = *impl->netInputLayer.get(); - if (!netInputLayer.shapes.empty()) - { - CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), ""); - const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid]; - if (!inputShapeLimitation.empty()) - { - CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); -#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 - const size_t dims = inputShapeLimitation.size(); - for (size_t dim = 0; dim < dims; dim++) - { - if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1) - continue; // don't limit batch - CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); - } -#endif - } - } - } - - LayerData &ld = impl->layers[pin.lid]; - const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size()); - ld.outputBlobs.resize(numInputs); - ld.outputBlobsWrappers.resize(numInputs); - impl->netInputLayer->inputsData.resize(numInputs); - impl->netInputLayer->scaleFactors.resize(numInputs); - impl->netInputLayer->means.resize(numInputs); - - MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]); - bool oldShape = prevShape == blobShape; - - blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); - if (!oldShape) - ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid]; - - if (!ld.outputBlobsWrappers[pin.oid].empty()) - { - ld.outputBlobsWrappers[pin.oid]->setHostDirty(); - } - impl->netInputLayer->scaleFactors[pin.oid] = scalefactor; - impl->netInputLayer->means[pin.oid] = mean; - impl->netWasAllocated = impl->netWasAllocated && oldShape; -} - -Mat Net::getParam(int layer, int numParam) const -{ - LayerData &ld = impl->getLayerData(layer); - std::vector &layerBlobs = ld.getLayerInstance()->blobs; - CV_Assert(numParam < (int)layerBlobs.size()); - return layerBlobs[numParam]; -} - -void Net::setParam(int layer, int numParam, const Mat &blob) -{ - LayerData &ld = impl->getLayerData(layer); - - std::vector &layerBlobs = ld.getLayerInstance()->blobs; - CV_Assert(numParam < (int)layerBlobs.size()); - //we don't make strong checks, use this function carefully - layerBlobs[numParam] = blob; -} - -int Net::getLayerId(const String &layer) const -{ - return impl->getLayerId(layer); -} - -static -string dumpLayerParameterSize(const string& name, const LayerParams& lp) -{ - std::ostringstream out(name, std::ios::ate); - DictValue param = lp.get(name); - switch (param.size()) - { - case 1: out << " : "; break; - case 2: out << " (HxW): "; break; - case 3: out << " (DxHxW): "; break; - default: - CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size())); - out << ": "; - } - for (size_t i = 0; i < param.size(); i++) - { - if (i > 0) - out << " x "; - out << param.get(i); - } - return out.str(); -} - -String Net::dump() -{ - CV_Assert(!empty()); - - bool hasInput = !impl->netInputLayer->inputsData.empty(); - - if (hasInput) - { - if (!impl->netWasAllocated) - impl->setUpNet(); - } - - return impl->dump(); -} - -string Net::Impl::dump() const -{ - bool hasInput = !netInputLayer->inputsData.empty(); - - std::ostringstream out; - const std::map& map = layers; - - Backend prefBackend = (Backend)preferableBackend; - std::vector > skippedLayers; - std::vector skipId; - std::vector allLayers(map.size(), -1); - int idPrev = -1; - Ptr prevNode; - for (std::map::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) - { - std::map >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend); - if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || - itBackend->second.empty()) - { - if (rit->second.skip) - skipId.push_back(rit->first); - else if (!skipId.empty()) - { - if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) - skipId.push_back(rit->first); - else if (idPrev != -1) - skipId.push_back(idPrev); - - std::sort(skipId.begin(), skipId.end()); - for (int i = 0; i < skipId.size(); i++) { - allLayers[skipId[i]] = skippedLayers.size(); - } - skippedLayers.push_back(skipId); - skipId.clear(); - } - } - else - { - if (itBackend->second == prevNode) - skipId.push_back(idPrev); - else if (!skipId.empty()) - { - skipId.push_back(idPrev); - std::sort(skipId.begin(), skipId.end()); - for (int i = 0; i < skipId.size(); i++) { - allLayers[skipId[i]] = skippedLayers.size(); - } - skippedLayers.push_back(skipId); - skipId.clear(); - } - idPrev = rit->first; - prevNode = itBackend->second; - } - } - std::vector colors = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff"}; - string backend; - switch (prefBackend) - { - case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; - case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; - case DNN_BACKEND_INFERENCE_ENGINE: // fallthru - case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru - case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; - case DNN_BACKEND_OPENCV: backend = "OCV/"; break; - case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; - case DNN_BACKEND_CUDA: backend = "CUDA/"; break; - case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; - // don't use default: - } - out << "digraph G {\n"; - // Add nodes - for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) - { - const LayerData& ld = it->second; - string name = ld.params.name; - std::vector clusterIds(1, it->first); - if (allLayers[it->first] == -1 && !name.empty()) - { - out << "\t\"" << name << "\" [label=\""; - } - else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) - { - continue; - } - else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] - { - int cluster = allLayers[it->first]; - out << "\t\"" << "cluster_" << cluster << "\" [label=\"{"; - clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster - } - for (int i = 0; i < clusterIds.size(); i++) - { - CV_DbgAssert(map.find(clusterIds[i]) != map.end()); - const LayerParams& lp = map.find(clusterIds[i])->second.params; - if (!lp.name.empty()) { - if (i > 0) { - out << " | "; - } - out << lp.name << "\\n" << lp.type << "\\n"; // align center - if (lp.has("kernel_size")) - { - string kernel = dumpLayerParameterSize("kernel_size", lp); - out << kernel; - out << "\\l"; // align left - } else if (lp.has("kernel_h") && lp.has("kernel_w")) { - DictValue h = lp.get("kernel_h"); - DictValue w = lp.get("kernel_w"); - out << "kernel (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("stride")) { - string stride = dumpLayerParameterSize("stride", lp); - out << stride; - out << "\\l"; // align left - } else if (lp.has("stride_h") && lp.has("stride_w")) { - DictValue h = lp.get("stride_h"); - DictValue w = lp.get("stride_w"); - out << "stride (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("dilation")) { - string dilation = dumpLayerParameterSize("dilation", lp); - out << dilation; - out << "\\l"; // align left - } else if (lp.has("dilation_h") && lp.has("dilation_w")) { - DictValue h = lp.get("dilation_h"); - DictValue w = lp.get("dilation_w"); - out << "dilation (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("pad")) { - DictValue pad = lp.get("pad"); - out << "pad "; - switch (pad.size()) - { - case 1: out << ": " << pad; break; - case 2: - out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")"; - break; - case 4: - out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) - << ") x (" << pad.get(1) << ", " << pad.get(3) << ")"; - break; - case 6: - out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) - << ") x (" << pad.get(1) << ", " << pad.get(4) - << ") x (" << pad.get(2) << ", " << pad.get(5) << ")"; - break; - default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); - } - out << "\\l"; // align left - } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) { - DictValue l = lp.get("pad_l"); - DictValue t = lp.get("pad_t"); - DictValue r = lp.get("pad_r"); - DictValue b = lp.get("pad_b"); - out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")"; - out << "\\l"; // align left - } - else if (lp.has("pooled_w") || lp.has("pooled_h")) { - DictValue h = lp.get("pooled_h"); - DictValue w = lp.get("pooled_w"); - out << "pad pooled (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("pool")) { - out << "pool: " << lp.get("pool"); - out << "\\l"; // align left - } - if (lp.has("global_pooling")) { - out << "global_pooling: " << lp.get("global_pooling"); - out << "\\l"; // align left - } - if (lp.has("group")) { - out << "group: " << lp.get("group"); - out << "\\l"; // align left - } - } - } - if (!ld.outputBlobs.empty()) - { - out << "output: " << ld.outputBlobs[0].size; - out << "\\l"; // align left - } - - Ptr layerBackend; - std::map >::const_iterator ibn = ld.backendNodes.find(prefBackend); - if (ibn != ld.backendNodes.end()) - layerBackend = ibn->second; - out << (!layerBackend.empty() ? backend : "OCV/"); - int colorId = 0; - const Target target = ld.layerInstance.empty() - ? DNN_TARGET_CPU - : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type - switch (target) - { - case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break; - case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break; - case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break; - case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break; - case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break; - case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break; - case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break; - case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break; - case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break; - // don't use default: - } - CV_Assert(colorId < colors.size()); - out << "\\n"; // align center - out << ((clusterIds.size() == 1)? "\" " : " }\" "); - out << "fillcolor=\"" << colors[colorId] << "\" "; - out << "style=filled "; - out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n"; - } - out << '\n'; - // Add edges - int inputsSize = hasInput ? netInputLayer->outNames.size() : 0; - for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) - { - const LayerData& ld = it->second; - if (allLayers[it->first] == -1) // node - { - for (int i = 0; i < ld.consumers.size(); i++) - { - int outId = ld.consumers[i].lid; - if (it == map.begin() && inputsSize > 1) - out << "\t\"" << ld.name << "_" << i << "\"" << " -> "; - else - out << "\t\"" << ld.name << "\"" << " -> "; - if (allLayers[outId] == -1) // node - { - CV_DbgAssert(map.find(outId) != map.end()); - out << "\"" << map.find(outId)->second.name << "\"\n"; - } - else // cluster - { - out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; - } - } - } - else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster - { - for (int i = 0; i < ld.consumers.size(); i++) - { - int outId = ld.consumers[i].lid; - if (allLayers[outId] == -1) // node - { - CV_DbgAssert(map.find(outId) != map.end()); - out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; - out << "\"" << map.find(outId)->second.name << "\"\n"; - } - else if (allLayers[outId] != allLayers[it->first]) { // another cluster - out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; - out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; - } - } - } - } - out << "}\n"; - return out.str(); -} - -void Net::dumpToFile(const String& path) { - std::ofstream file(path.c_str()); - file << dump(); - file.close(); -} - -Ptr Net::getLayer(int layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - return ld.getLayerInstance(); -} -Ptr Net::getLayer(const LayerId& layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - return ld.getLayerInstance(); -} - -std::vector > Net::getLayerInputs(int layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - - std::vector > inputLayers; - inputLayers.reserve(ld.inputBlobsId.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) { - inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); - } - return inputLayers; -} - -std::vector Net::getLayerNames() const -{ - CV_TRACE_FUNCTION(); - - std::vector res; - res.reserve(impl->layers.size()); - - Impl::MapIdToLayerData::const_iterator it; - for (it = impl->layers.begin(); it != impl->layers.end(); it++) - { - if (it->second.id) //skip Data layer - res.push_back(it->second.name); - } - - return res; -} - -bool Net::empty() const -{ - return impl->layers.size() <= 1; //first layer is default Data layer -} - -std::vector Net::getUnconnectedOutLayers() const -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - - std::vector layersIds; - - // registerOutput() flow - const std::map& outputNameToId = impl->outputNameToId; - if (!outputNameToId.empty()) - { - for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) - { - layersIds.push_back(it->second); - } - return layersIds; - } - - Impl::MapIdToLayerData::const_iterator it; - for (it = impl->layers.begin(); it != impl->layers.end(); it++) - { - int lid = it->first; - const LayerData &ld = it->second; - - if (ld.requiredOutputs.size() == 0) - layersIds.push_back(lid); - } - - return layersIds; -} - -std::vector Net::getUnconnectedOutLayersNames() const -{ - std::vector ids = getUnconnectedOutLayers(); - const size_t n = ids.size(); - std::vector names(n); - for (size_t i = 0; i < n; ++i) - { - names[i] = impl->layers[ids[i]].name; - } - return names; -} - -void Net::getLayersShapes(const ShapesVec& netInputShapes, - std::vector& layersIds, - std::vector& inLayersShapes, - std::vector& outLayersShapes) const -{ - layersIds.clear(); - inLayersShapes.clear(); - outLayersShapes.clear(); - - Impl::LayersShapesMap inOutShapes; - impl->getLayersShapes(netInputShapes, inOutShapes); - - for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); - it != inOutShapes.end(); it++) - { - layersIds.push_back(it->first); - inLayersShapes.push_back(it->second.in); - outLayersShapes.push_back(it->second.out); - } -} - -void Net::getLayersShapes(const MatShape& netInputShape, - std::vector& layerIds, - std::vector& inLayersShapes, - std::vector& outLayersShapes) const -{ - getLayersShapes(ShapesVec(1, netInputShape), - layerIds, inLayersShapes, outLayersShapes); -} - -void Net::getLayerShapes(const MatShape& netInputShape, - const int layerId, - ShapesVec& inLayerShapes, - ShapesVec& outLayerShapes) const -{ - getLayerShapes(ShapesVec(1, netInputShape), - layerId, inLayerShapes, outLayerShapes); - -} - -void Net::getLayerShapes(const ShapesVec& netInputShapes, - const int layerId, - ShapesVec& inLayerShapes, - ShapesVec& outLayerShapes) const -{ - LayerShapes shapes; - impl->getLayerShapes(netInputShapes, layerId, shapes); - inLayerShapes = shapes.in; - outLayerShapes = shapes.out; -} - -int64 Net::getFLOPS(const std::vector& netInputShapes) const -{ - CV_TRACE_FUNCTION(); - - int64 flops = 0; - std::vector ids; - std::vector > inShapes, outShapes; - getLayersShapes(netInputShapes, ids, inShapes, outShapes); - CV_Assert(inShapes.size() == outShapes.size()); - CV_Assert(inShapes.size() == ids.size()); - - for(int i = 0; i < ids.size(); i++) - { - flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], - outShapes[i]); - } - - return flops; -} - -int64 Net::getFLOPS(const MatShape& netInputShape) const -{ - return getFLOPS(std::vector(1, netInputShape)); -} - -int64 Net::getFLOPS(const int layerId, - const std::vector& netInputShapes) const -{ - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); - CV_Assert(layer != impl->layers.end()); - - LayerShapes shapes; - impl->getLayerShapes(netInputShapes, layerId, shapes); - - return const_cast(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out); -} - -int64 Net::getFLOPS(const int layerId, - const MatShape& netInputShape) const -{ - return getFLOPS(layerId, std::vector(1, netInputShape)); -} - -void Net::getLayerTypes(std::vector& layersTypes) const -{ - layersTypes.clear(); - - std::map layers; - for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); - it != impl->layers.end(); it++) - { - if (layers.find(it->second.type) == layers.end()) - layers[it->second.type] = 0; - layers[it->second.type]++; - } - - for (std::map::const_iterator it = layers.begin(); - it != layers.end(); it++) - { - layersTypes.push_back(it->first); - } -} - -int Net::getLayersCount(const String& layerType) const -{ - int count = 0; - for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); - it != impl->layers.end(); it++) - { - if (it->second.type == layerType) - count++; - } - return count; -} - -void Net::getMemoryConsumption(const int layerId, - const std::vector& netInputShapes, - size_t& weights, size_t& blobs) const -{ - CV_TRACE_FUNCTION(); - - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); - CV_Assert(layer != impl->layers.end()); - - weights = blobs = 0; - - for(int i = 0; i < layer->second.params.blobs.size(); i++) - { - const Mat& weightsBlob = layer->second.params.blobs[i]; - weights += weightsBlob.total()*weightsBlob.elemSize(); - } - - ShapesVec inLayerShapes, outLayerShapes; - getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes); - size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); - for(int i = 0; i < outLayerShapes.size(); i++) - { - blobs += total(outLayerShapes[i]) * elemSize; - } -} - -void Net::getMemoryConsumption(const std::vector& netInputShapes, - size_t& weights, size_t& blobs) const -{ - CV_TRACE_FUNCTION(); - - std::vector layerIds; - std::vector w, b; - getMemoryConsumption(netInputShapes, layerIds, w, b); - - weights = blobs = 0; - for(int i = 0; i < layerIds.size(); i++) - { - weights += w[i]; - blobs += b[i]; - } -} - -void Net::getMemoryConsumption(const int layerId, - const MatShape& netInputShape, - size_t& weights, size_t& blobs) const -{ - getMemoryConsumption(layerId, std::vector(1, netInputShape), - weights, blobs); -} - -void Net::getMemoryConsumption(const MatShape& netInputShape, - size_t& weights, size_t& blobs) const -{ - getMemoryConsumption(std::vector(1, netInputShape), - weights, blobs); -} - -void Net::getMemoryConsumption(const std::vector& netInputShapes, - std::vector& layerIds, std::vector& weights, - std::vector& blobs) const -{ - CV_TRACE_FUNCTION(); - - layerIds.clear(); - weights.clear(); - blobs.clear(); - - std::vector > inLayerShapes, outLayerShapes; - - getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); - size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); - for(int i = 0; i < layerIds.size(); i++) - { - int w = 0, b = 0; - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerIds[i]); - CV_Assert(layer != impl->layers.end()); - - for(int j = 0; j < layer->second.params.blobs.size(); j++) - { - const Mat& weightsBlob = layer->second.params.blobs[j]; - w += weightsBlob.total()*weightsBlob.elemSize(); - } - - for(int j = 0; j < outLayerShapes[i].size(); j++) - { - b += total(outLayerShapes[i][j]) * elemSize; - } - - weights.push_back(w); - blobs.push_back(b); - } -} - -void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, - std::vector& weights, std::vector& blobs) const -{ - getMemoryConsumption(std::vector(1, netInputShape), layerIds, - weights, blobs); -} - -void Net::enableFusion(bool fusion) -{ - if( impl->fusion != fusion ) - { - impl->fusion = fusion; - impl->clear(); - } -} - -void Net::setHalideScheduler(const String& scheduler) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); - - impl->halideConfigFile = scheduler; -} - -int64 Net::getPerfProfile(std::vector& timings) -{ - timings = std::vector(impl->layersTimings.begin() + 1, impl->layersTimings.end()); - int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); - return total; -} - -////////////////////////////////////////////////////////////////////////// - -Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } - -Layer::Layer(const LayerParams ¶ms) - : blobs(params.blobs), name(params.name), type(params.type) -{ - preferableTarget = DNN_TARGET_CPU; -} - -void Layer::setParamsFrom(const LayerParams ¶ms) -{ - blobs = params.blobs; - name = params.name; - type = params.type; -} - -int Layer::inputNameToIndex(String) -{ - return -1; -} - -int Layer::outputNameToIndex(const String&) -{ - return 0; -} - -bool Layer::supportBackend(int backendId) -{ - return backendId == DNN_BACKEND_OPENCV; -} - -Ptr Layer::initCUDA( - void*, - const std::vector>&, - const std::vector>&) -{ - CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initVkCom(const std::vector > &) -{ - CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initHalide(const std::vector > &) -{ - CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initNgraph(const std::vector > & inputs, const std::vector >& nodes) -{ - CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initWebnn(const std::vector > & inputs, const std::vector >& nodes) -{ - CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, - const std::vector &outputs, int targetId) const -{ -#ifdef HAVE_HALIDE - CV_TRACE_FUNCTION(); - - Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), - xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); - Halide::Func& top = node.dynamicCast()->funcs.back(); - - int outW, outH, outC, outN; - getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); - - if (targetId == DNN_TARGET_CPU) - { - if (outW == 1 && outH == 1) - { - if (outC + outN == 1) - return; - - if (outC > 8) - top.split(c, co, ci, 8) - .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) - .parallel(tile) - .vectorize(ci, 8); - else - top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) - .parallel(tile); - } - else - { - if (outH > 2) - { - top.reorder(x, c, y) - .split(y, yo, yi, 2) - .fuse(yo, n, tile) - .parallel(tile) - .unroll(yi) - .vectorize(x, outW >= 16 ? 16 : outW); - } - } - } - else if (targetId == DNN_TARGET_OPENCL) - { - if (outW == 1 && outH == 1) - { - int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; - top.split(c, co, ci, c_split) - .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) - .gpu_blocks(tile) - .gpu_threads(ci); - } - else - { - int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; - int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; - // Supported vectorization widths: 2, 3, 4, 8, 16 - int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC); - top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) - .split(c, co, ci, c_split) - .gpu_blocks(xo, yo, co) - .gpu_threads(xi, yi) - .reorder(xi, yi, ci, xo, yo, co) - .vectorize(ci); - } - } - else - CV_Error(Error::StsNotImplemented, "Unknown target identifier"); -#endif // HAVE_HALIDE -} - -Ptr Layer::tryAttach(const Ptr& node) -{ - return Ptr(); -} - -bool Layer::setActivation(const Ptr&) { return false; } -bool Layer::tryFuse(Ptr&) { return false; } -void Layer::getScaleShift(Mat& scale, Mat& shift) const -{ - scale = Mat(); - shift = Mat(); -} - -void Layer::getScaleZeropoint(float& scale, int& zeropoint) const -{ - scale = 1.f; - zeropoint = 0; -} - -void Layer::unsetAttached() -{ - setActivation(Ptr()); -} - -template -static void vecToPVec(const std::vector &v, std::vector &pv) -{ - pv.resize(v.size()); - for (size_t i = 0; i < v.size(); i++) - pv[i] = const_cast(&v[i]); -} - -void Layer::finalize(const std::vector &inputs, std::vector &outputs) -{ - CV_TRACE_FUNCTION(); - this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs); -} - -void Layer::finalize(const std::vector &input, std::vector &output) -{ - CV_UNUSED(input);CV_UNUSED(output); -} - -void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) -{ - CV_TRACE_FUNCTION(); - std::vector inputs, outputs; - inputs_arr.getMatVector(inputs); - outputs_arr.getMatVector(outputs); - - std::vector inputsp; - vecToPVec(inputs, inputsp); - this->finalize(inputsp, outputs); -} - -std::vector Layer::finalize(const std::vector &inputs) -{ - CV_TRACE_FUNCTION(); - - std::vector outputs; - this->finalize(inputs, outputs); - return outputs; -} - -void Layer::forward(std::vector &input, std::vector &output, std::vector &internals) -{ - // We kept this method for compatibility. DNN calls it now only to support users' implementations. -} - -void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); -} - -void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S) - { - std::vector inputs; - std::vector outputs; - std::vector internals; - - std::vector orig_inputs; - std::vector orig_outputs; - std::vector orig_internals; - - inputs_arr.getUMatVector(orig_inputs); - outputs_arr.getUMatVector(orig_outputs); - internals_arr.getUMatVector(orig_internals); - - inputs.resize(orig_inputs.size()); - for (size_t i = 0; i < orig_inputs.size(); i++) - convertFp16(orig_inputs[i], inputs[i]); - - outputs.resize(orig_outputs.size()); - for (size_t i = 0; i < orig_outputs.size(); i++) - outputs[i].create(shape(orig_outputs[i]), CV_32F); - - internals.resize(orig_internals.size()); - for (size_t i = 0; i < orig_internals.size(); i++) - internals[i].create(shape(orig_internals[i]), CV_32F); - - forward(inputs, outputs, internals); - - for (size_t i = 0; i < outputs.size(); i++) - convertFp16(outputs[i], orig_outputs[i]); - - // sync results back - outputs_arr.assign(orig_outputs); - internals_arr.assign(orig_internals); - return; - } - std::vector inpvec; - std::vector outputs; - std::vector internals; - - inputs_arr.getMatVector(inpvec); - outputs_arr.getMatVector(outputs); - internals_arr.getMatVector(internals); - - std::vector inputs(inpvec.size()); - for (int i = 0; i < inpvec.size(); i++) - inputs[i] = &inpvec[i]; - - this->forward(inputs, outputs, internals); - - // sync results back - outputs_arr.assign(outputs); - internals_arr.assign(internals); -} - -void Layer::run(const std::vector &inputs, std::vector &outputs, std::vector &internals) -{ - CV_TRACE_FUNCTION(); - - this->finalize(inputs, outputs); - this->forward(inputs, outputs, internals); -} - -bool Layer::tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) -{ - return false; -} - -Layer::~Layer() {} - -bool Layer::getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const -{ - CV_Assert(inputs.size()); - outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); - return false; -} - -bool Layer::updateMemoryShapes(const std::vector &inputs) -{ - return true; -} -////////////////////////////////////////////////////////////////////////// - -Mutex& getLayerFactoryMutex() -{ - static Mutex* volatile instance = NULL; - if (instance == NULL) - { - cv::AutoLock lock(getInitializationMutex()); - if (instance == NULL) - instance = new Mutex(); - } - return *instance; -} - -static LayerFactory_Impl& getLayerFactoryImpl_() -{ - static LayerFactory_Impl impl; - return impl; -} - -LayerFactory_Impl& getLayerFactoryImpl() -{ - static LayerFactory_Impl* volatile instance = NULL; - if (instance == NULL) - { - cv::AutoLock lock(getLayerFactoryMutex()); - if (instance == NULL) - { - instance = &getLayerFactoryImpl_(); - initializeLayerFactory(); - } - } - return *instance; -} - -void LayerFactory::registerLayer(const String &type, Constructor constructor) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); - - if (it != getLayerFactoryImpl().end()) - { - if (it->second.back() == constructor) - CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered"); - it->second.push_back(constructor); - } - getLayerFactoryImpl().insert(std::make_pair(type, std::vector(1, constructor))); -} - -void LayerFactory::unregisterLayer(const String &type) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - - LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); - if (it != getLayerFactoryImpl().end()) - { - if (it->second.size() > 1) - it->second.pop_back(); - else - getLayerFactoryImpl().erase(it); - } -} - -bool LayerFactory::isLayerRegistered(const std::string& type) -{ - cv::AutoLock lock(getLayerFactoryMutex()); - auto& registeredLayers = getLayerFactoryImpl(); - return registeredLayers.find(type) != registeredLayers.end(); -} - -Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type); - - if (it != getLayerFactoryImpl().end()) - { - CV_Assert(!it->second.empty()); - return it->second.back()(params); - } - else - { - return Ptr(); //NULL - } -} - -BackendNode::BackendNode(int backendId) : backendId(backendId) {} - -BackendNode::~BackendNode() {}; - -BackendWrapper::BackendWrapper(int backendId, int targetId) - : backendId(backendId), targetId(targetId) {} - -BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) -{ - CV_Error(Error::StsNotImplemented, - "Constructor of backend wrapper must be implemented"); -} - -BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) -{ - CV_Error(Error::StsNotImplemented, - "Constructor of backend wrapper must be implemented"); -} - -BackendWrapper::~BackendWrapper() {} - -Net readNet(const String& _model, const String& _config, const String& _framework) -{ - String framework = toLowerCase(_framework); - String model = _model; - String config = _config; - const std::string modelExt = model.substr(model.rfind('.') + 1); - const std::string configExt = config.substr(config.rfind('.') + 1); - if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || - modelExt == "prototxt" || configExt == "prototxt") - { - if (modelExt == "prototxt" || configExt == "caffemodel") - std::swap(model, config); - return readNetFromCaffe(config, model); - } - if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || - modelExt == "pbtxt" || configExt == "pbtxt") - { - if (modelExt == "pbtxt" || configExt == "pb") - std::swap(model, config); - return readNetFromTensorflow(model, config); - } - if (framework == "torch" || modelExt == "t7" || modelExt == "net" || - configExt == "t7" || configExt == "net") - { - return readNetFromTorch(model.empty() ? config : model); - } - if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || - modelExt == "cfg" || configExt == "cfg") - { - if (modelExt == "cfg" || configExt == "weights") - std::swap(model, config); - return readNetFromDarknet(config, model); - } - if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || - modelExt == "xml" || configExt == "xml") - { - if (modelExt == "xml" || configExt == "bin") - std::swap(model, config); - return readNetFromModelOptimizer(config, model); - } - if (framework == "onnx" || modelExt == "onnx") - { - return readNetFromONNX(model); - } - CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + - model + (config.empty() ? "" : ", " + config)); -} - -Net readNet(const String& _framework, const std::vector& bufferModel, - const std::vector& bufferConfig) -{ - String framework = toLowerCase(_framework); - if (framework == "caffe") - return readNetFromCaffe(bufferConfig, bufferModel); - else if (framework == "tensorflow") - return readNetFromTensorflow(bufferModel, bufferConfig); - else if (framework == "darknet") - return readNetFromDarknet(bufferConfig, bufferModel); - else if (framework == "torch") - CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); - else if (framework == "dldt") - return readNetFromModelOptimizer(bufferConfig, bufferModel); - CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); -} - -Net readNetFromModelOptimizer(const String &xml, const String &bin) -{ - return Net::readFromModelOptimizer(xml, bin); -} - -Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) -{ - return Net::readFromModelOptimizer(bufferCfg, bufferModel); -} - -Net readNetFromModelOptimizer( - const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, - const uchar* bufferWeightsPtr, size_t bufferWeightsSize -) -{ - return Net::readFromModelOptimizer( - bufferModelConfigPtr, bufferModelConfigSize, - bufferWeightsPtr, bufferWeightsSize - ); -} - -CV__DNN_INLINE_NS_END -}} // namespace diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 6350a4e168..ae4d9c295e 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -19,7 +19,44 @@ void initializeLayerFactory(); extern bool DNN_DIAGNOSTICS_RUN; extern bool DNN_SKIP_REAL_IMPORT; -namespace detail { +// +// dnn_params.cpp +// + +/// Network dump level +size_t getParam_DNN_NETWORK_DUMP(); + +/// This parameter is useful to run with valgrind memory errors detection +bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS(); + +#ifdef HAVE_OPENCL +bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES(); +#endif + +int getParam_DNN_BACKEND_DEFAULT(); + +// Additional checks (slowdowns execution!) +bool getParam_DNN_CHECK_NAN_INF(); +bool getParam_DNN_CHECK_NAN_INF_DUMP(); +bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR(); + + +inline namespace detail { + +typedef std::vector ShapesVec; + +struct LayerShapes +{ + ShapesVec in, out, internal; + // No guarantees that layer which support in-place computations + // will be computed in-place (input.data_ptr == output.data_ptr). + // If layer said that it could work in-place and layers after it + // no longer use input blob, we'll set output = input. + bool supportInPlace; + LayerShapes() {supportInPlace = false;} +}; + + #define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn)) class NotImplemented : public Layer @@ -82,8 +119,6 @@ struct NetImplBase } // namespace detail -typedef std::vector ShapesVec; - static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string()) { std::ostringstream ss; diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp new file mode 100644 index 0000000000..48e89c6fac --- /dev/null +++ b/modules/dnn/src/dnn_params.cpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "dnn_common.hpp" +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +size_t getParam_DNN_NETWORK_DUMP() +{ + static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0); + return DNN_NETWORK_DUMP; +} + +// this option is useful to run with valgrind memory errors detection +bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS() +{ + static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); + return DNN_DISABLE_MEMORY_OPTIMIZATIONS; +} + +#ifdef HAVE_OPENCL +bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES() +{ + static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false); + return DNN_OPENCL_ALLOW_ALL_DEVICES; +} +#endif + +int getParam_DNN_BACKEND_DEFAULT() +{ + static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", +#ifdef HAVE_INF_ENGINE + (size_t)DNN_BACKEND_INFERENCE_ENGINE +#else + (size_t)DNN_BACKEND_OPENCV +#endif + ); + return PARAM_DNN_BACKEND_DEFAULT; +} + +// Additional checks (slowdowns execution!) +bool getParam_DNN_CHECK_NAN_INF() +{ + static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); + return DNN_CHECK_NAN_INF; +} +bool getParam_DNN_CHECK_NAN_INF_DUMP() +{ + static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); + return DNN_CHECK_NAN_INF_DUMP; +} +bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR() +{ + static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); + return DNN_CHECK_NAN_INF_RAISE_ERROR; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/dnn_read.cpp b/modules/dnn/src/dnn_read.cpp new file mode 100644 index 0000000000..931170722b --- /dev/null +++ b/modules/dnn/src/dnn_read.cpp @@ -0,0 +1,93 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Net readNet(const String& _model, const String& _config, const String& _framework) +{ + String framework = toLowerCase(_framework); + String model = _model; + String config = _config; + const std::string modelExt = model.substr(model.rfind('.') + 1); + const std::string configExt = config.substr(config.rfind('.') + 1); + if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt") + { + if (modelExt == "prototxt" || configExt == "caffemodel") + std::swap(model, config); + return readNetFromCaffe(config, model); + } + if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt") + { + if (modelExt == "pbtxt" || configExt == "pb") + std::swap(model, config); + return readNetFromTensorflow(model, config); + } + if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") + { + return readNetFromTorch(model.empty() ? config : model); + } + if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg") + { + if (modelExt == "cfg" || configExt == "weights") + std::swap(model, config); + return readNetFromDarknet(config, model); + } + if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml") + { + if (modelExt == "xml" || configExt == "bin") + std::swap(model, config); + return readNetFromModelOptimizer(config, model); + } + if (framework == "onnx" || modelExt == "onnx") + { + return readNetFromONNX(model); + } + CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config)); +} + +Net readNet(const String& _framework, const std::vector& bufferModel, + const std::vector& bufferConfig) +{ + String framework = toLowerCase(_framework); + if (framework == "caffe") + return readNetFromCaffe(bufferConfig, bufferModel); + else if (framework == "tensorflow") + return readNetFromTensorflow(bufferModel, bufferConfig); + else if (framework == "darknet") + return readNetFromDarknet(bufferConfig, bufferModel); + else if (framework == "torch") + CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); + else if (framework == "dldt") + return readNetFromModelOptimizer(bufferConfig, bufferModel); + CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); +} + +Net readNetFromModelOptimizer(const String& xml, const String& bin) +{ + return Net::readFromModelOptimizer(xml, bin); +} + +Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) +{ + return Net::readFromModelOptimizer(bufferCfg, bufferModel); +} + +Net readNetFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize) +{ + return Net::readFromModelOptimizer( + bufferModelConfigPtr, bufferModelConfigSize, + bufferWeightsPtr, bufferWeightsSize); +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp new file mode 100644 index 0000000000..aa4a6eadf1 --- /dev/null +++ b/modules/dnn/src/dnn_utils.cpp @@ -0,0 +1,158 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Mat blobFromImage(InputArray image, double scalefactor, const Size& size, + const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth); + return blob; +} + +void blobFromImage(InputArray image, OutputArray blob, double scalefactor, + const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + std::vector images(1, image.getMat()); + blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); +} + +Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, + const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); + return blob; +} + +void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, + Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); + if (ddepth == CV_8U) + { + CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); + CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); + } + + std::vector images; + images_.getMatVector(images); + CV_Assert(!images.empty()); + for (size_t i = 0; i < images.size(); i++) + { + Size imgSize = images[i].size(); + if (size == Size()) + size = imgSize; + if (size != imgSize) + { + if (crop) + { + float resizeFactor = std::max(size.width / (float)imgSize.width, + size.height / (float)imgSize.height); + resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); + Rect crop(Point(0.5 * (images[i].cols - size.width), + 0.5 * (images[i].rows - size.height)), + size); + images[i] = images[i](crop); + } + else + resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + } + if (images[i].depth() == CV_8U && ddepth == CV_32F) + images[i].convertTo(images[i], CV_32F); + Scalar mean = mean_; + if (swapRB) + std::swap(mean[0], mean[2]); + + images[i] -= mean; + images[i] *= scalefactor; + } + + size_t nimages = images.size(); + Mat image0 = images[0]; + int nch = image0.channels(); + CV_Assert(image0.dims == 2); + if (nch == 3 || nch == 4) + { + int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; + blob_.create(4, sz, ddepth); + Mat blob = blob_.getMat(); + Mat ch[4]; + + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); + CV_Assert(image.size() == image0.size()); + + for (int j = 0; j < nch; j++) + ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); + if (swapRB) + std::swap(ch[0], ch[2]); + split(image, ch); + } + } + else + { + CV_Assert(nch == 1); + int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; + blob_.create(4, sz, ddepth); + Mat blob = blob_.getMat(); + + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 1)); + CV_Assert(image.size() == image0.size()); + + image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); + } + } +} + +void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) +{ + CV_TRACE_FUNCTION(); + + // A blob is a 4 dimensional matrix in floating point precision + // blob_[0] = batchSize = nbOfImages + // blob_[1] = nbOfChannels + // blob_[2] = height + // blob_[3] = width + CV_Assert(blob_.depth() == CV_32F); + CV_Assert(blob_.dims == 4); + + images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); + + std::vector vectorOfChannels(blob_.size[1]); + for (int n = 0; n < blob_.size[0]; ++n) + { + for (int c = 0; c < blob_.size[1]; ++c) + { + vectorOfChannels[c] = getPlane(blob_, n, c); + } + cv::merge(vectorOfChannels, images_.getMatRef(n)); + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/layer.cpp b/modules/dnn/src/layer.cpp new file mode 100644 index 0000000000..ee5c255d57 --- /dev/null +++ b/modules/dnn/src/layer.cpp @@ -0,0 +1,247 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } + +Layer::Layer(const LayerParams& params) + : blobs(params.blobs) + , name(params.name) + , type(params.type) +{ + preferableTarget = DNN_TARGET_CPU; +} + +void Layer::setParamsFrom(const LayerParams& params) +{ + blobs = params.blobs; + name = params.name; + type = params.type; +} + +int Layer::inputNameToIndex(String) +{ + return -1; +} + +int Layer::outputNameToIndex(const String&) +{ + return 0; +} + +bool Layer::supportBackend(int backendId) +{ + return backendId == DNN_BACKEND_OPENCV; +} + +Ptr Layer::initCUDA( + void*, + const std::vector>&, + const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initVkCom(const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initHalide(const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initNgraph(const std::vector>& inputs, const std::vector>& nodes) +{ + CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initWebnn(const std::vector>& inputs, const std::vector>& nodes) +{ + CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::tryAttach(const Ptr& node) +{ + return Ptr(); +} + +bool Layer::setActivation(const Ptr&) { return false; } +bool Layer::tryFuse(Ptr&) { return false; } +void Layer::getScaleShift(Mat& scale, Mat& shift) const +{ + scale = Mat(); + shift = Mat(); +} + +void Layer::getScaleZeropoint(float& scale, int& zeropoint) const +{ + scale = 1.f; + zeropoint = 0; +} + +void Layer::unsetAttached() +{ + setActivation(Ptr()); +} + +template +static void vecToPVec(const std::vector& v, std::vector& pv) +{ + pv.resize(v.size()); + for (size_t i = 0; i < v.size(); i++) + pv[i] = const_cast(&v[i]); +} + +void Layer::finalize(const std::vector& inputs, std::vector& outputs) +{ + CV_TRACE_FUNCTION(); + this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs); +} + +void Layer::finalize(const std::vector& input, std::vector& output) +{ + CV_UNUSED(input); + CV_UNUSED(output); +} + +void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) +{ + CV_TRACE_FUNCTION(); + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + std::vector inputsp; + vecToPVec(inputs, inputsp); + this->finalize(inputsp, outputs); +} + +std::vector Layer::finalize(const std::vector& inputs) +{ + CV_TRACE_FUNCTION(); + + std::vector outputs; + this->finalize(inputs, outputs); + return outputs; +} + +void Layer::forward(std::vector& input, std::vector& output, std::vector& internals) +{ + // We kept this method for compatibility. DNN calls it now only to support users' implementations. +} + +void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); +} + +void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + std::vector orig_inputs; + std::vector orig_outputs; + std::vector orig_internals; + + inputs_arr.getUMatVector(orig_inputs); + outputs_arr.getUMatVector(orig_outputs); + internals_arr.getUMatVector(orig_internals); + + inputs.resize(orig_inputs.size()); + for (size_t i = 0; i < orig_inputs.size(); i++) + convertFp16(orig_inputs[i], inputs[i]); + + outputs.resize(orig_outputs.size()); + for (size_t i = 0; i < orig_outputs.size(); i++) + outputs[i].create(shape(orig_outputs[i]), CV_32F); + + internals.resize(orig_internals.size()); + for (size_t i = 0; i < orig_internals.size(); i++) + internals[i].create(shape(orig_internals[i]), CV_32F); + + forward(inputs, outputs, internals); + + for (size_t i = 0; i < outputs.size(); i++) + convertFp16(outputs[i], orig_outputs[i]); + + // sync results back + outputs_arr.assign(orig_outputs); + internals_arr.assign(orig_internals); + return; + } + std::vector inpvec; + std::vector outputs; + std::vector internals; + + inputs_arr.getMatVector(inpvec); + outputs_arr.getMatVector(outputs); + internals_arr.getMatVector(internals); + + std::vector inputs(inpvec.size()); + for (int i = 0; i < inpvec.size(); i++) + inputs[i] = &inpvec[i]; + + this->forward(inputs, outputs, internals); + + // sync results back + outputs_arr.assign(outputs); + internals_arr.assign(internals); +} + +void Layer::run(const std::vector& inputs, std::vector& outputs, std::vector& internals) +{ + CV_TRACE_FUNCTION(); + + this->finalize(inputs, outputs); + this->forward(inputs, outputs, internals); +} + +bool Layer::tryQuantize(const std::vector>& scales, + const std::vector>& zeropoints, LayerParams& params) +{ + return false; +} + +Layer::~Layer() {} + +bool Layer::getMemoryShapes(const std::vector& inputs, + const int requiredOutputs, + std::vector& outputs, + std::vector& internals) const +{ + CV_Assert(inputs.size()); + outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); + return false; +} + +bool Layer::updateMemoryShapes(const std::vector& inputs) +{ + return true; +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/layer_factory.cpp b/modules/dnn/src/layer_factory.cpp new file mode 100644 index 0000000000..5c80cd09ad --- /dev/null +++ b/modules/dnn/src/layer_factory.cpp @@ -0,0 +1,111 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include // getLayerFactoryImpl + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +Mutex& getLayerFactoryMutex() +{ + static Mutex* volatile instance = NULL; + if (instance == NULL) + { + cv::AutoLock lock(getInitializationMutex()); + if (instance == NULL) + instance = new Mutex(); + } + return *instance; +} + +static LayerFactory_Impl& getLayerFactoryImpl_() +{ + static LayerFactory_Impl impl; + return impl; +} + +LayerFactory_Impl& getLayerFactoryImpl() +{ + static LayerFactory_Impl* volatile instance = NULL; + if (instance == NULL) + { + cv::AutoLock lock(getLayerFactoryMutex()); + if (instance == NULL) + { + instance = &getLayerFactoryImpl_(); + initializeLayerFactory(); + } + } + return *instance; +} + +void LayerFactory::registerLayer(const String& type, Constructor constructor) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); + + if (it != getLayerFactoryImpl().end()) + { + if (it->second.back() == constructor) + CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered"); + it->second.push_back(constructor); + } + getLayerFactoryImpl().insert(std::make_pair(type, std::vector(1, constructor))); +} + +void LayerFactory::unregisterLayer(const String& type) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); + if (it != getLayerFactoryImpl().end()) + { + if (it->second.size() > 1) + it->second.pop_back(); + else + getLayerFactoryImpl().erase(it); + } +} + +bool LayerFactory::isLayerRegistered(const std::string& type) +{ + cv::AutoLock lock(getLayerFactoryMutex()); + auto& registeredLayers = getLayerFactoryImpl(); + return registeredLayers.find(type) != registeredLayers.end(); +} + +Ptr LayerFactory::createLayerInstance(const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type); + + if (it != getLayerFactoryImpl().end()) + { + CV_Assert(!it->second.empty()); + return it->second.back()(params); + } + else + { + return Ptr(); // NULL + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/layer_internals.hpp b/modules/dnn/src/layer_internals.hpp new file mode 100644 index 0000000000..9ded3543e1 --- /dev/null +++ b/modules/dnn/src/layer_internals.hpp @@ -0,0 +1,335 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ +#define __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN +inline namespace detail { + +struct LayerPin +{ + int lid; + int oid; + + LayerPin(int layerId = -1, int outputId = -1) + : lid(layerId) + , oid(outputId) + {} + + bool valid() const + { + return (lid >= 0 && oid >= 0); + } + + bool equal(const LayerPin& r) const + { + return (lid == r.lid && oid == r.oid); + } + + bool operator<(const LayerPin& r) const + { + return lid < r.lid || (lid == r.lid && oid < r.oid); + } + + bool operator==(const LayerPin& r) const + { + return lid == r.lid && oid == r.oid; + } +}; + +struct LayerData +{ + LayerData() + : id(-1) + , dtype(CV_32F) + , skip(false) + , flag(0) + {} + LayerData(int _id, const String& _name, const String& _type, const int& _dtype, LayerParams& _params) + : id(_id) + , name(_name) + , type(_type) + , dtype(_dtype) + , params(_params) + , skip(false) + , flag(0) + { + CV_TRACE_FUNCTION(); + + // add logging info + params.name = name; + params.type = type; + } + + int id; + String name; + String type; + int dtype; // Datatype of output blobs. + LayerParams params; + + std::vector inputBlobsId; + std::set inputLayersId; + std::set requiredOutputs; + std::vector consumers; + std::vector> outputBlobsWrappers; + std::vector> inputBlobsWrappers; + std::vector> internalBlobsWrappers; + +#ifdef HAVE_CUDA + /* output ids which must be transferred to the host in the background + * after the completion of the forward pass of the layer + */ + std::vector cudaD2HBackgroundTransfers; +#endif + + Ptr layerInstance; + std::vector outputBlobs; + std::vector inputBlobs; + std::vector internals; + // Computation nodes of implemented backends (except DEFAULT). + std::map> backendNodes; + // Flag for skip layer computation for specific backend. + bool skip; + + int flag; + + Ptr getLayerInstance() + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + if (layerInstance) + return layerInstance; + + layerInstance = LayerFactory::createLayerInstance(type, params); + if (!layerInstance) + { + CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); + } + + return layerInstance; + } +}; + + +// fake layer containing network input blobs +struct DataLayer : public Layer +{ + DataLayer() + : Layer() + { + skip = false; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + // FIXIT: add wrapper without exception suppression + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + + bool isFP16 = outputs_arr.depth() == CV_16S; + + std::vector outputs, internals; + outputs_arr.getMatVector(outputs); + internals_arr.getMatVector(internals); + + for (int i = 0; i < inputsData.size(); ++i) + { + double scale = scaleFactors[i]; + Scalar& mean = means[i]; + + CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); + if (isFP16) + CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); + else + CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); + + bool singleMean = true; + for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) + { + singleMean = mean[j] == mean[j - 1]; + } + + if (singleMean) + { + if (isFP16) + { + Mat input_f32; + inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale); + convertFp16(input_f32, outputs[i]); + } + else + { + inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); + } + } + else + { + for (int n = 0; n < inputsData[i].size[0]; ++n) + { + for (int c = 0; c < inputsData[i].size[1]; ++c) + { + Mat inp = getPlane(inputsData[i], n, c); + Mat out = getPlane(outputs[i], n, c); + if (isFP16) + { + Mat input_f32; + inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale); + convertFp16(input_f32, out); + } + else + { + inp.convertTo(out, CV_32F, scale, -mean[c] * scale); + } + } + } + } + } + } + +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + bool isFP16 = outputs_.depth() == CV_16S; + + std::vector outputs; + outputs_.getUMatVector(outputs); + + for (int i = 0; i < inputsData.size(); ++i) + { + Mat inputData = inputsData[i]; + + double scale = scaleFactors[i]; + Scalar& mean = means[i]; + + CV_Assert(mean == Scalar() || inputData.size[1] <= 4); + if (isFP16) + CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); + else + CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); + + bool singleMean = true; + for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j) + { + singleMean = mean[j] == mean[j - 1]; + } + + if (singleMean) + { + if (isFP16) + { + UMat input_i; + inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale); + convertFp16(input_i, outputs[i]); + } + else + { + inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); + } + } + else + { + for (int n = 0; n < inputData.size[0]; ++n) + { + for (int c = 0; c < inputData.size[1]; ++c) + { + Mat inp = getPlane(inputData, n, c); + + std::vector plane(4, Range::all()); + plane[0] = Range(n, n + 1); + plane[1] = Range(c, c + 1); + UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); + + if (isFP16) + { + UMat input_i; + inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale); + convertFp16(input_i, out); + } + else + { + inp.convertTo(out, CV_32F, scale, -mean[c] * scale); + } + } + } + } + } + return true; + } +#endif + + int outputNameToIndex(const String& tgtName) CV_OVERRIDE + { + int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); + return (idx < (int)outNames.size()) ? idx : -1; + } + + void setNames(const std::vector& names) + { + outNames.assign(names.begin(), names.end()); + shapes.clear(); + shapes.resize(outNames.size()); + } + + void setInputShape(const String& tgtName, const MatShape& shape) + { + std::vector::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName); + CV_Check(tgtName, it != outNames.end(), "Unknown input"); + int idx = (int)(it - outNames.begin()); + + CV_Assert(idx < (int)shapes.size()); + CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed"); + shapes[idx] = shape; + } + + bool getMemoryShapes(const std::vector& inputs, + const int requiredOutputs, + std::vector& outputs, + std::vector& internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == requiredOutputs); + outputs.assign(inputs.begin(), inputs.end()); + return false; + } + + virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector outputs; + outputs_arr.getMatVector(outputs); + + CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(), + inputsData.size() == outputs.size()); + skip = true; + for (int i = 0; skip && i < inputsData.size(); ++i) + { + if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar()) + skip = false; + } + } + + + std::vector outNames; + std::vector shapes; + // Preprocessing parameters for each network's input. + std::vector scaleFactors; + std::vector means; + std::vector inputsData; + bool skip; +}; // DataLayer + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ diff --git a/modules/dnn/src/layers/not_implemented_layer.cpp b/modules/dnn/src/layers/not_implemented_layer.cpp index 3fd52c09d9..f661155219 100644 --- a/modules/dnn/src/layers/not_implemented_layer.cpp +++ b/modules/dnn/src/layers/not_implemented_layer.cpp @@ -8,7 +8,7 @@ namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN -namespace detail { +inline namespace detail { class NotImplementedImpl CV_FINAL : public NotImplemented { diff --git a/modules/dnn/src/legacy_backend.cpp b/modules/dnn/src/legacy_backend.cpp new file mode 100644 index 0000000000..92661abb63 --- /dev/null +++ b/modules/dnn/src/legacy_backend.cpp @@ -0,0 +1,122 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "legacy_backend.hpp" + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +BackendNode::BackendNode(int backendId) + : backendId(backendId) +{} + +BackendNode::~BackendNode() {}; + +BackendWrapper::BackendWrapper(int backendId, int targetId) + : backendId(backendId) + , targetId(targetId) +{} + +BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) +{ + CV_Error(Error::StsNotImplemented, + "Constructor of backend wrapper must be implemented"); +} + +BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) +{ + CV_Error(Error::StsNotImplemented, + "Constructor of backend wrapper must be implemented"); +} + +BackendWrapper::~BackendWrapper() {} + + + +inline namespace detail { + + +Ptr wrapMat(int backendId, int targetId, cv::Mat& m) +{ + if (backendId == DNN_BACKEND_OPENCV) + { + if (targetId == DNN_TARGET_CPU) + return Ptr(); +#ifdef HAVE_OPENCL + else if (IS_DNN_OPENCL_TARGET(targetId)) + return OpenCLBackendWrapper::create(m); +#endif + else + CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier"); + } + else if (backendId == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); +#ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(targetId, m)); +#endif // HAVE_HALIDE + } + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; + } + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { +#ifdef HAVE_DNN_NGRAPH + return Ptr(new NgraphBackendWrapper(targetId, m)); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph"); +#endif + } + else if (backendId == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + return Ptr(new WebnnBackendWrapper(targetId, m)); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); +#endif + } + else if (backendId == DNN_BACKEND_VKCOM) + { + CV_Assert(haveVulkan()); +#ifdef HAVE_VULKAN + return Ptr(new VkComBackendWrapper(m)); +#endif // HAVE_VULKAN + } + else if (backendId == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); + +#ifdef HAVE_CUDA + switch (targetId) + { + case DNN_TARGET_CUDA: + return CUDABackendWrapperFP32::create(m); + case DNN_TARGET_CUDA_FP16: + return CUDABackendWrapperFP16::create(m); + default: + CV_Assert(IS_DNN_CUDA_TARGET(targetId)); + } +#endif + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + return Ptr(); // TODO Error? +} // wrapMat() + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/legacy_backend.hpp b/modules/dnn/src/legacy_backend.hpp new file mode 100644 index 0000000000..e9ca3fecb3 --- /dev/null +++ b/modules/dnn/src/legacy_backend.hpp @@ -0,0 +1,339 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ +#define __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ + +#include "layer_internals.hpp" // LayerPin LayerData DataLayer + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN +inline namespace detail { + + +#ifdef HAVE_OPENCL +class OpenCLBackendWrapper : public BackendWrapper +{ +public: + OpenCLBackendWrapper(Mat& m) + : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) + { + m.copyTo(umat); + host = &m; + hostDirty = false; + } + + OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) + : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) + { + Ptr base = baseBuffer.dynamicCast(); + CV_Assert(!base.empty()); + + host = &m; + + int shape[] = { 1, (int)base->umat.total() }; + umat = base->umat.reshape(1, 2, &shape[0]) + .colRange(0, host->total()) + .reshape(1, host->dims, &host->size[0]); + hostDirty = false; + } + + static Ptr create(Mat& m) + { + return Ptr(new OpenCLBackendWrapper(m)); + } + + static Ptr create(const Ptr& baseBuffer, Mat& m) + { + return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); + } + + static std::vector getUMatVector(const std::vector>& wrappers) + { + const int numWrappers = wrappers.size(); + std::vector mats(wrappers.size()); + for (int i = 0; i < numWrappers; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->copyToDevice(); + mats[i] = umatWrapper->umat; + } + return mats; + } + + // Replaces all umats in wrappers to specific ones. + static void update(const std::vector>& wrappers, + const std::vector& umats) + { + CV_Assert(wrappers.size() == umats.size()); + for (int i = 0, n = umats.size(); i < n; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->umat = umats[i]; + } + } + + ~OpenCLBackendWrapper() {} + + // Copies data from device to a host memory. + virtual void copyToHost() CV_OVERRIDE + { + umat.copyTo(*host); + } + + virtual void setHostDirty() CV_OVERRIDE + { + hostDirty = true; + }; + + void copyToDevice() + { + if (hostDirty) + { + host->copyTo(umat); + hostDirty = false; + } + } + +private: + UMat umat; + Mat* host; + bool hostDirty; +}; // OpenCLBackendWrapper +#endif // HAVE_OPENCL + + +struct BlobManager +{ +public: + // Increase references counter to layer output. + void addReference(const LayerPin& lp) + { + std::map::iterator it = refCounter.find(lp); + if (it == refCounter.end()) + refCounter[lp] = 1; + else + it->second += 1; + } + + void addReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + addReference(pins[i]); + } + } + + // Returns number of references to allocated memory that used in specific + // layer blob. + int numReferences(const LayerPin& lp) + { + std::map::const_iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + LayerPin memHost = mapIt->second; + + std::map::const_iterator refIt = refCounter.find(memHost); + CV_Assert(refIt != refCounter.end()); + return refIt->second; + } + + // Reuse data allocated in inside the blob. + void reuse(const LayerPin& host, const LayerPin& user) + { + CV_Assert(reuseMap.find(user) == reuseMap.end()); + CV_Assert(reuseMap.find(host) != reuseMap.end()); + LayerPin memHost = reuseMap[host]; + reuseMap[user] = memHost; + if (refCounter.find(memHost) != refCounter.end()) + { + std::map::iterator userRefIt = refCounter.find(user); + if (userRefIt != refCounter.end()) + { + refCounter[memHost] += userRefIt->second; + refCounter.erase(userRefIt); + } + else + refCounter[memHost] += 1; + } + } + + // Decrease references counter to allocated memory inside specific blob. + void releaseReference(const LayerPin& lp) + { + std::map::const_iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + + std::map::iterator refIt = refCounter.find(mapIt->second); + CV_Assert(refIt != refCounter.end()); + CV_Assert(refIt->second > 0); + refIt->second -= 1; + } + + void releaseReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + releaseReference(pins[i]); + } + } + + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype) + { + if (!getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS()) + { + Mat bestBlob; + LayerPin bestBlobPin; + + std::map::const_iterator hostIt; + std::map::const_iterator refIt; + + const int targetTotal = total(shape); + int bestBlobTotal = INT_MAX; + + for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) + { + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) + { + const Mat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal && unusedBlob.type() == dtype) + { + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); + } + } + } + if (!bestBlob.empty()) + { + reuse(bestBlobPin, lp); + dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); + return; + } + } + + { + // if dst already has been allocated with total(shape) elements, + // it won't be recreated and pointer of dst.data remains the same. + dst.create(shape, dtype); + addHost(lp, dst); + } + } + + void allocateBlobsForLayer(LayerData& ld, const LayerShapes& layerShapes, + std::vector& pinsForInternalBlobs) + { + CV_TRACE_FUNCTION(); + + pinsForInternalBlobs.clear(); + + std::vector&outputBlobs = ld.outputBlobs, + &internalBlobs = ld.internals; + + const ShapesVec &outShapes = layerShapes.out, + internalShapes = layerShapes.internal; + + outputBlobs.resize(std::max((size_t)1, outShapes.size())); // layer produce at least one output blob + internalBlobs.resize(internalShapes.size()); + + CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); + + // Check that layer could work in-place. + bool inPlace = false; + if (layerShapes.supportInPlace) + { + if (ld.inputBlobs.size() == 1) + { + // Get number of references to the input memory. + int numRef = numReferences(ld.inputBlobsId[0]); + // If current layer is one and only customer of this blob. + inPlace = numRef == 1; + } + } + + ShapesVec shapes(outShapes); + shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); + std::vector blobs; + for (int i = 0; i < outputBlobs.size(); i++) + { + blobs.push_back(&outputBlobs[i]); + } + + for (int i = 0; i < internalBlobs.size(); i++) + { + blobs.push_back(&internalBlobs[i]); + if (total(internalShapes[i])) + { + pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); + } + } + + addReferences(pinsForInternalBlobs); + + std::map> idxSizes; + for (int i = 0; i < shapes.size(); i++) + { + idxSizes[total(shapes[i])].push_back(i); + } + + std::map>::reverse_iterator it; + for (it = idxSizes.rbegin(); it != idxSizes.rend(); it++) + { + for (int j = 0; j < it->second.size(); j++) + { + int index = it->second[j]; + if (total(shapes[index])) + { + LayerPin blobPin(ld.id, index); + if (index < outShapes.size() && inPlace) + { + CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); + ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); + reuse(ld.inputBlobsId[0], blobPin); + } + else + reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype); + } + } + } + } + + // Clear internal state. Calls before an every reallocation. + void reset() + { + CV_TRACE_FUNCTION(); + + refCounter.clear(); + reuseMap.clear(); + memHosts.clear(); + } + +private: + // Register allocated memory. + void addHost(const LayerPin& lp, const Mat& mat) + { + CV_Assert(memHosts.find(lp) == memHosts.end()); + reuseMap[lp] = lp; + memHosts[lp] = mat; + } + + std::map refCounter; + // Maps pin to origin blob (for whom memory was allocated firstly). + // For origin blobs key == value. + std::map reuseMap; + std::map memHosts; +}; // BlobManager + + +Ptr wrapMat(int backendId, int targetId, cv::Mat& m); + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index bc8709d22e..45e0872a99 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -1500,4 +1500,4 @@ int TextDetectionModel_DB::getMaxCandidates() const } -}} // namespace +}} // namespace diff --git a/modules/dnn/src/net.cpp b/modules/dnn/src/net.cpp new file mode 100644 index 0000000000..901101b1e0 --- /dev/null +++ b/modules/dnn/src/net.cpp @@ -0,0 +1,414 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +Net::Net() + : impl(makePtr()) +{ +} + +Net::~Net() +{ +} + +int Net::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->addLayer(name, type, dtype, params); +} + +int Net::addLayer(const String& name, const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + return addLayer(name, type, CV_32F, params); +} + +int Net::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->addLayerToPrev(name, type, dtype, params); +} + +int Net::addLayerToPrev(const String& name, const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + return addLayerToPrev(name, type, CV_32F, params); +} + +void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + impl->connect(outLayerId, outNum, inpLayerId, inpNum); +} + +void Net::connect(String _outPin, String _inPin) +{ + CV_TRACE_FUNCTION(); + + CV_Assert(impl); + LayerPin outPin = impl->getPinByAlias(_outPin); + LayerPin inpPin = impl->getPinByAlias(_inPin); + + CV_Assert(outPin.valid() && inpPin.valid()); + + impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); +} + +int Net::registerOutput(const std::string& outputName, int layerId, int outputPort) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->registerOutput(outputName, layerId, outputPort); +} + +Mat Net::forward(const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputName); +} + +AsyncArray Net::forwardAsync(const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forwardAsync(outputName); +} + +void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outputName); +} + +void Net::forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outBlobNames); +} + +void Net::forward(std::vector>& outputBlobs, + const std::vector& outBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outBlobNames); +} + +// FIXIT drop from inference API +Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->quantize(calibData, inputsDtype, outputsDtype); +} + +// FIXIT drop from inference API +void Net::getInputDetails(std::vector& scales, std::vector& zeropoints) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->getInputDetails(scales, zeropoints); +} + +// FIXIT drop from inference API +void Net::getOutputDetails(std::vector& scales, std::vector& zeropoints) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->getOutputDetails(scales, zeropoints); +} + +void Net::setPreferableBackend(int backendId) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG(backendId); + CV_Assert(impl); + return impl->setPreferableBackend(backendId); +} + +void Net::setPreferableTarget(int targetId) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG(targetId); + CV_Assert(impl); + return impl->setPreferableTarget(targetId); +} + +void Net::setInputsNames(const std::vector& inputBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->setInputsNames(inputBlobNames); +} + +void Net::setInputShape(const String& inputName, const MatShape& shape) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->setInputShape(inputName, shape); +} + +void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_Assert(impl); + return impl->setInput(blob, name, scalefactor, mean); +} + +Mat Net::getParam(int layer, int numParam) const +{ + CV_Assert(impl); + return impl->getParam(layer, numParam); +} + +void Net::setParam(int layer, int numParam, const Mat& blob) +{ + CV_Assert(impl); + return impl->setParam(layer, numParam, blob); +} + +int Net::getLayerId(const String& layer) const +{ + CV_Assert(impl); + return impl->getLayerId(layer); +} + +String Net::dump() +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->dump(true); +} + +void Net::dumpToFile(const String& path) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + std::ofstream file(path.c_str()); + file << dump(); + file.close(); +} + +Ptr Net::getLayer(int layerId) const +{ + CV_Assert(impl); + return impl->getLayer(layerId); +} +Ptr Net::getLayer(const LayerId& layerId) const +{ + CV_Assert(impl); + return impl->getLayer(layerId); +} + +std::vector> Net::getLayerInputs(int layerId) const +{ + CV_Assert(impl); + return impl->getLayerInputs(layerId); +} + +std::vector Net::getLayerNames() const +{ + CV_Assert(impl); + return impl->getLayerNames(); +} + +bool Net::empty() const +{ + CV_Assert(impl); + return impl->empty(); +} + +// FIXIT drop "unconnected" API +std::vector Net::getUnconnectedOutLayers() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getUnconnectedOutLayers(); +} + +// FIXIT drop "unconnected" API +std::vector Net::getUnconnectedOutLayersNames() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getUnconnectedOutLayersNames(); +} + +void Net::getLayersShapes(const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) const +{ + CV_Assert(impl); + return impl->getLayersShapes(netInputShapes, layersIds, inLayersShapes, outLayersShapes); +} + +void Net::getLayersShapes(const MatShape& netInputShape, + std::vector& layerIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) const +{ + getLayersShapes(ShapesVec(1, netInputShape), + layerIds, inLayersShapes, outLayersShapes); +} + +void Net::getLayerShapes(const MatShape& netInputShape, + const int layerId, + ShapesVec& inLayerShapes, + ShapesVec& outLayerShapes) const +{ + getLayerShapes(ShapesVec(1, netInputShape), + layerId, inLayerShapes, outLayerShapes); +} + +void Net::getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + ShapesVec& inLayerShapes, + ShapesVec& outLayerShapes) const +{ + CV_Assert(impl); + LayerShapes shapes; + impl->getLayerShapes(netInputShapes, layerId, shapes); + inLayerShapes = shapes.in; + outLayerShapes = shapes.out; +} + +int64 Net::getFLOPS(const std::vector& netInputShapes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getFLOPS(netInputShapes); +} + +int64 Net::getFLOPS(const MatShape& netInputShape) const +{ + return getFLOPS(std::vector(1, netInputShape)); +} + +int64 Net::getFLOPS(const int layerId, + const std::vector& netInputShapes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getFLOPS(layerId, netInputShapes); +} + +int64 Net::getFLOPS(const int layerId, + const MatShape& netInputShape) const +{ + return getFLOPS(layerId, std::vector(1, netInputShape)); +} + +void Net::getLayerTypes(std::vector& layersTypes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getLayerTypes(layersTypes); +} + +int Net::getLayersCount(const String& layerType) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getLayersCount(layerType); +} + +void Net::getMemoryConsumption(const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(layerId, netInputShapes, weights, blobs); +} + +void Net::getMemoryConsumption(const std::vector& netInputShapes, + size_t& weights, size_t& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(netInputShapes, weights, blobs); +} + +void Net::getMemoryConsumption(const int layerId, + const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(layerId, std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(netInputShapes, layerIds, weights, blobs); +} + +void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, + std::vector& weights, std::vector& blobs) const +{ + getMemoryConsumption(std::vector(1, netInputShape), layerIds, + weights, blobs); +} + +// FIXIT return old value or add get method +void Net::enableFusion(bool fusion) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->enableFusion(fusion); +} + +void Net::setHalideScheduler(const String& scheduler) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); + CV_Assert(impl); + return impl->setHalideScheduler(scheduler); +} + +int64 Net::getPerfProfile(std::vector& timings) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getPerfProfile(timings); +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp new file mode 100644 index 0000000000..24f9c8cb2c --- /dev/null +++ b/modules/dnn/src/net_impl.cpp @@ -0,0 +1,2087 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +static int g_networkId = 0; + + +detail::NetImplBase::NetImplBase() + : networkId(CV_XADD(&g_networkId, 1)) + , networkDumpCounter(0) + , dumpLevel(getParam_DNN_NETWORK_DUMP()) +{ + // nothing +} + + +std::string detail::NetImplBase::getDumpFileNameBase() const +{ + std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); + return dumpFileNameBase; +} + + +Net::Impl::Impl() +{ + // allocate fake net input layer + netInputLayer = Ptr(new DataLayer()); + LayerData& inpl = layers.insert(make_pair(0, LayerData())).first->second; + inpl.id = 0; + netInputLayer->name = inpl.name = "_input"; + inpl.type = "__NetInputLayer__"; + inpl.layerInstance = netInputLayer; + layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); + + lastLayerId = 0; + netWasAllocated = false; + netWasQuantized = false; + fusion = true; + isAsync = false; + preferableBackend = DNN_BACKEND_DEFAULT; + preferableTarget = DNN_TARGET_CPU; + skipInfEngineInit = false; + hasDynamicShapes = false; +} + + +bool Net::Impl::empty() const +{ + return layers.size() <= 1; // first layer is default Data layer +} + + +void Net::Impl::clear() +{ + CV_TRACE_FUNCTION(); + + MapIdToLayerData::iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + if (it->second.id != 0) + { + it->second.inputBlobs.clear(); + it->second.outputBlobs.clear(); + it->second.internals.clear(); + } + it->second.skip = false; + // it->second.consumers.clear(); + Ptr currLayer = it->second.layerInstance; + + if (currLayer.empty()) + continue; + + currLayer->unsetAttached(); + } + netWasAllocated = false; + layersTimings.clear(); +} + + +void Net::Impl::setUpNet(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + if (dumpLevel && networkDumpCounter == 0) + { + dumpNetworkToFile(); + } + + if (preferableBackend == DNN_BACKEND_DEFAULT) + preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT(); +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) + preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam(); +#endif + + CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || + preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16); + CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || + preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL); +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + CV_Assert( + (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16 || + preferableTarget == DNN_TARGET_MYRIAD || + preferableTarget == DNN_TARGET_HDDL || + preferableTarget == DNN_TARGET_FPGA + ); + } +#endif +#ifdef HAVE_WEBNN + if (preferableBackend == DNN_BACKEND_WEBNN) + { + CV_Assert(preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL); + } +#endif + CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || + preferableTarget == DNN_TARGET_VULKAN); + CV_Assert(preferableBackend != DNN_BACKEND_CUDA || + IS_DNN_CUDA_TARGET(preferableTarget)); + if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) + { + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) +#ifndef HAVE_OPENCL + { + CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); + preferableTarget = DNN_TARGET_CPU; + } +#else + { + if (!getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()) + { + // Current implementation is only valid for GPU (#11494) + if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) + { + CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); + preferableTarget = DNN_TARGET_CPU; + } + else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) + { + CV_LOG_WARNING(NULL, + "DNN: OpenCL target with fp16 precision is not supported " + "with current OpenCL device (tested with Intel GPUs only), " + "switching to OpenCL with fp32 precision."); + preferableTarget = DNN_TARGET_OPENCL; + } + } + } +#endif + if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan()) + { + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + + if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA()) + { +#ifdef HAVE_CUDA + CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU"); +#else + CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU"); +#endif + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + + clear(); + + if (hasDynamicShapes) + { + updateLayersShapes(); + } + + this->blobsToKeep = blobsToKeep_; + + allocateLayers(blobsToKeep_); + + MapIdToLayerData::iterator it = layers.find(0); + CV_Assert(it != layers.end()); + it->second.skip = netInputLayer->skip; + + initBackend(blobsToKeep_); + + if (!netWasAllocated) + { +#ifdef HAVE_HALIDE + if (preferableBackend == DNN_BACKEND_HALIDE) + compileHalide(); +#else + CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); +#endif + } + + netWasAllocated = true; + + if (dumpLevel) + { + dumpNetworkToFile(); + } + } +} + + +Ptr Net::Impl::getLayer(int layerId) const +{ + LayerData& ld = getLayerData(layerId); + return ld.getLayerInstance(); +} + + +Ptr Net::Impl::getLayer(const LayerId& layerId) const +{ + LayerData& ld = getLayerData(layerId); + return ld.getLayerInstance(); +} + + +int Net::Impl::getLayerId(const String& layerName) const +{ + std::map::const_iterator it = layerNameToId.find(layerName); + return (it != layerNameToId.end()) ? it->second : -1; +} + + +int Net::Impl::getLayerId(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + return (it != layers.end()) ? id : -1; +} + + +int Net::Impl::getLayerId(DictValue& layerDesc) const +{ + if (layerDesc.isInt()) + return getLayerId(layerDesc.get()); + else if (layerDesc.isString()) + return getLayerId(layerDesc.get()); + + CV_Assert(layerDesc.isInt() || layerDesc.isString()); + return -1; +} + + +String Net::Impl::getLayerName(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + return (it != layers.end()) ? it->second.name : "(unknown layer)"; +} + + +LayerData& Net::Impl::getLayerData(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + + if (it == layers.end()) + CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); + + return const_cast(it->second); +} + + +LayerData& Net::Impl::getLayerData(const String& layerName) const +{ + int id = getLayerId(layerName); + + if (id < 0) + CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); + + return getLayerData(id); +} + + +LayerData& Net::Impl::getLayerData(const DictValue& layerDesc) const +{ + CV_Assert(layerDesc.isInt() || layerDesc.isString()); + if (layerDesc.isInt()) + return getLayerData(layerDesc.get()); + else /*if (layerDesc.isString())*/ + return getLayerData(layerDesc.get()); +} + + +/*static*/ +void Net::Impl::addLayerInput(LayerData& ld, int inNum, LayerPin from) +{ + if ((int)ld.inputBlobsId.size() <= inNum) + { + ld.inputBlobsId.resize(inNum + 1); + } + else + { + LayerPin storedFrom = ld.inputBlobsId[inNum]; + if (storedFrom.valid() && !storedFrom.equal(from)) + CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", + inNum, ld.name.c_str())); + } + + ld.inputBlobsId[inNum] = from; +} + + +int Net::Impl::resolvePinOutputName(LayerData& ld, const String& outName) const +{ + if (outName.empty()) + return 0; + return ld.getLayerInstance()->outputNameToIndex(outName); +} + + +LayerPin Net::Impl::getPinByAlias(const String& layerName) const +{ + LayerPin pin; + pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); + + if (pin.lid >= 0) + pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); + + return pin; +} + + +std::vector Net::Impl::getLayerOutPins(const String& layerName) const +{ + int lid = (layerName.empty()) ? 0 : getLayerId(layerName); + + MapIdToLayerData::const_iterator it = layers.find(lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); + const size_t nOutputs = it->second.outputBlobs.size(); + + std::vector pins; + for (int i = 0; i < nOutputs; i++) + { + pins.push_back(LayerPin(lid, i)); + } + + return pins; +} + + +// FIXIT remove dtype +int Net::Impl::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + int id = getLayerId(name); + if (id >= 0) + { + if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") + { + CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); + return -1; + } + else + { + LayerData& ld = layers.find(id)->second; + ld.type = type; + ld.params = params; + return -1; + } + } + + id = ++lastLayerId; + layerNameToId.insert(std::make_pair(name, id)); + layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); + if (params.get("has_dynamic_shapes", false)) + hasDynamicShapes = true; + + if (dtype == CV_8S) + netWasQuantized = true; + + return id; +} + + +int Net::Impl::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + int prvLid = lastLayerId; + int newLid = addLayer(name, type, dtype, params); + connect(prvLid, 0, newLid, 0); + return newLid; +} + + +void Net::Impl::connect(int outLayerId, int outNum, int inLayerId, int inNum) +{ + CV_Assert(outLayerId < inLayerId); + LayerData& ldOut = getLayerData(outLayerId); + LayerData& ldInp = getLayerData(inLayerId); + + addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); + ldOut.requiredOutputs.insert(outNum); + ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); + + CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); +} + + +int Net::Impl::registerOutput(const std::string& outputName, int layerId, int outputPort) +{ + int checkLayerId = getLayerId(outputName); + if (checkLayerId >= 0) + { + if (checkLayerId == layerId) + { + if (outputPort == 0) + { + // layer name correlates with its output name + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); + outputNameToId.insert(std::make_pair(outputName, layerId)); + return checkLayerId; + } + } + CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); + } +#if 0 // TODO + if (outputPort == 0) + // make alias only, need to adopt getUnconnectedOutLayers() call +#endif + LayerParams outputLayerParams; + outputLayerParams.name = outputName; + outputLayerParams.type = "Identity"; + int dtype = CV_32F; // FIXIT remove + int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); + connect(layerId, outputPort, outputLayerId, 0); + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); + outputNameToId.insert(std::make_pair(outputName, outputLayerId)); + return outputLayerId; +} + + +void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes) +{ + CV_TRACE_FUNCTION(); + + LayerData& ld = layers[lid]; + + // already allocated + if (ld.flag) + return; + + size_t ninputs = ld.inputBlobsId.size(); +#if 0 + printf("layer %s:", ld.name.c_str()); + for (size_t i = 0; i < ninputs; i++) + { + int inp_lid = ld.inputBlobsId[i].lid; + LayerData &inp_ld = layers[inp_lid]; + int inp_outputs = (int)inp_ld.outputBlobs.size(); + std::cout << " " << inp_ld.name << "(" << inp_outputs; + + for( int j = 0; j < inp_outputs; j++ ) + { + std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; + } + std::cout << ")"; + } + printf("\n"); +#endif + + // determine parent layers + for (size_t i = 0; i < ninputs; i++) + ld.inputLayersId.insert(ld.inputBlobsId[i].lid); + + // allocate parents + for (std::set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) + allocateLayer(*i, layersShapes); + + // bind inputs + if (ld.id == 0) // DataLayer + { + ninputs = netInputLayer->inputsData.size(); + ld.inputBlobsWrappers.resize(ninputs); + for (size_t i = 0; i < ninputs; i++) + ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); + } + else + { + ld.inputBlobs.resize(ninputs); + ld.inputBlobsWrappers.resize(ninputs); + for (size_t i = 0; i < ninputs; i++) + { + LayerPin from = ld.inputBlobsId[i]; + CV_Assert(from.valid()); + CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); + ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; + ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; + } + } + + LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); + + CV_Assert(layerShapesIt != layersShapes.end()); + + if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) + ld.dtype = CV_16S; + + std::vector pinsForInternalBlobs; + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); + ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); + for (int i = 0; i < ld.outputBlobs.size(); ++i) + ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); + + /* CUDA backend has its own system for internal blobs; we don't need these */ + ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size()); + for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) + ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); + + Ptr layerPtr = ld.getLayerInstance(); + { + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + { + inps[i] = *ld.inputBlobs[i]; + } + layerPtr->finalize(inps, ld.outputBlobs); + layerPtr->preferableTarget = preferableTarget; +#if 0 + std::cout << "\toutputs:"; + size_t noutputs = ld.outputBlobs.size(); + for (size_t j = 0; j < noutputs; j++) + { + std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; + } + std::cout << "\n"; +#endif + } + + // After allocation of layer, we decrease counters to it's input blobs. + blobManager.releaseReferences(ld.inputBlobsId); + blobManager.releaseReferences(pinsForInternalBlobs); + + ld.flag = 1; +} + + +void Net::Impl::allocateLayers(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + it->second.flag = 0; + + CV_Assert(!layers[0].outputBlobs.empty()); + ShapesVec inputShapes; + for (int i = 0; i < layers[0].outputBlobs.size(); i++) + { + Mat& inp = layers[0].outputBlobs[i]; + CV_Assert(inp.total()); + if (preferableBackend == DNN_BACKEND_OPENCV && + preferableTarget == DNN_TARGET_OPENCL_FP16 && + layers[0].dtype == CV_32F) + { + layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); + } + inputShapes.push_back(shape(inp)); + } + LayersShapesMap layersShapes; + getLayersShapes(inputShapes, layersShapes); + + blobManager.reset(); + backendWrappers.clear(); + + for (auto& layer : layers) + { + auto& ld = layer.second; + ld.inputBlobsWrappers.clear(); + ld.outputBlobsWrappers.clear(); + ld.internalBlobsWrappers.clear(); + } + + // Fake references to input blobs. + for (int i = 0; i < layers[0].outputBlobs.size(); ++i) + blobManager.addReference(LayerPin(0, i)); + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + blobManager.addReferences(ld.inputBlobsId); + } + + for (int i = 0; i < blobsToKeep_.size(); i++) + { + blobManager.addReference(blobsToKeep_[i]); + } + + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + allocateLayer(lid, layersShapes); + } + + layersTimings.resize(lastLayerId + 1, 0); + fuseLayers(blobsToKeep_); +} + + +void Net::Impl::forwardLayer(LayerData& ld) +{ + CV_TRACE_FUNCTION(); + + Ptr layer = ld.layerInstance; + + if (!ld.skip) + { + TickMeter tm; + tm.start(); + +#ifndef HAVE_VULKAN + std::map>::const_iterator it = ld.backendNodes.find(preferableBackend); +#else + std::map>::iterator it = ld.backendNodes.find(preferableBackend); +#endif + if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) + { + if (isAsync) + CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); + + if (!layer->supportBackend(DNN_BACKEND_OPENCV)) + CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", + ld.name.c_str(), ld.type.c_str())); + +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); + std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); + layer->forward(umat_inputBlobs, + umat_outputBlobs, + umat_internalBlobs); + if (getParam_DNN_CHECK_NAN_INF()) + { + bool fail = false; + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < umat_inputBlobs.size(); ++i) + { + UMat& u = umat_inputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_internalBlobs.size(); ++i) + { + UMat& u = umat_internalBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; + } + if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) + CV_Assert(!fail); + } + } + OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); + } + else +#endif + { + for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.inputBlobsWrappers[i].empty()) + ld.inputBlobsWrappers[i]->copyToHost(); + } + + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + { + inps[i] = *ld.inputBlobs[i]; + } + layer->forward(inps, ld.outputBlobs, ld.internals); + + if (getParam_DNN_CHECK_NAN_INF()) + { + bool fail = false; + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < ld.inputBlobs.size(); ++i) + { + const Mat* pM = ld.inputBlobs[i]; + if (!pM) + { + std::cout << "INPUT " << i << " is NULL" << std::endl; + continue; + } + const Mat& m = *pM; + std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.internals.size(); ++i) + { + const Mat& m = ld.internals[i]; + std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) + CV_Assert(!fail); + } + } + + for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.outputBlobsWrappers[i].empty()) + ld.outputBlobsWrappers[i]->setHostDirty(); + } + } + } + else + { + Ptr node = it->second; + CV_Assert(!node.empty()); + if (preferableBackend == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); + +#ifdef HAVE_CUDA + Ptr cudaNode = node.dynamicCast(); + CV_Assert(!cudaNode.empty()); + + cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace); + + for (auto id : ld.cudaD2HBackgroundTransfers) + { + auto wrapper = ld.outputBlobsWrappers[id].dynamicCast(); + wrapper->copyToHostInBackground(); + } +#endif + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { + forwardHalide(ld.outputBlobsWrappers, node); + } +#ifdef HAVE_INF_ENGINE + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + forwardNgraph(ld.outputBlobsWrappers, node, isAsync); + } +#endif + else if (preferableBackend == DNN_BACKEND_WEBNN) + { + forwardWebnn(ld.outputBlobsWrappers, node, isAsync); + } +#ifdef HAVE_VULKAN + else if (preferableBackend == DNN_BACKEND_VKCOM) + { + try + { + forwardVkCom(ld.outputBlobsWrappers, node); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); + it->second = Ptr(); + forwardLayer(ld); + } + } +#endif + else + { + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } + } + + tm.stop(); + int64 t = tm.getTimeTicks(); + layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only + } + else + { + layersTimings[ld.id] = 0; + } + + ld.flag = 1; +} + + +void Net::Impl::forwardToLayer(LayerData& ld, bool clearFlags) +{ + CV_TRACE_FUNCTION(); + + if (clearFlags) + { + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + it->second.flag = 0; + } + + // already was forwarded + if (ld.flag) + return; + + // forward parents + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) + { + LayerData& ld = it->second; + if (ld.flag) + continue; + forwardLayer(ld); + } + + // forward itself + forwardLayer(ld); + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + cudaInfo->context.stream.synchronize(); +#endif +} + + +Mat Net::Impl::forward(const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + forwardToLayer(getLayerData(layerName)); + + return getBlob(layerName); +} + + +AsyncArray Net::Impl::forwardAsync(const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + +#ifdef CV_CXX11 + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + + if (preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); + + isAsync = true; + forwardToLayer(getLayerData(layerName)); + isAsync = false; + + return getBlobAsync(layerName); +#else + CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11"); +#endif // CV_CXX11 +} + + +void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + forwardToLayer(getLayerData(layerName)); + + LayerPin pin = getPinByAlias(layerName); + LayerData& ld = layers[pin.lid]; + + if (outputBlobs.isUMat()) + { + getBlob(layerName).copyTo(outputBlobs); + } + else if (outputBlobs.isMat()) + { + outputBlobs.assign(getBlob(layerName)); + } + else if (outputBlobs.isMatVector()) + { + if (preferableTarget != DNN_TARGET_CPU) + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + CV_Assert(!ld.outputBlobsWrappers[i].empty()); + ld.outputBlobsWrappers[i]->copyToHost(); + } + } + if (ld.outputBlobs[0].depth() == CV_16S) + { + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + outputvec.resize(ld.outputBlobs.size()); + for (int i = 0; i < outputvec.size(); i++) + convertFp16(ld.outputBlobs[i], outputvec[i]); + } + else + { + // Output depth can be CV_32F or CV_8S + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + outputvec = ld.outputBlobs; + } + } + else if (outputBlobs.isUMatVector()) + { + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + if (preferableTarget == DNN_TARGET_OPENCL) + outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + else if (preferableTarget == DNN_TARGET_OPENCL_FP16) + { + std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + outputvec.resize(out_vec.size()); + for (int i = 0; i < out_vec.size(); i++) + convertFp16(out_vec[i], outputvec[i]); + } + } + else +#endif + { + outputvec.resize(ld.outputBlobs.size()); + for (int i = 0; i < outputvec.size(); ++i) + ld.outputBlobs[i].copyTo(outputvec[i]); + } + } +} + + +void Net::Impl::forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + std::vector pins; + for (int i = 0; i < outBlobNames.size(); i++) + { + pins.push_back(getPinByAlias(outBlobNames[i])); + } + + setUpNet(pins); + + LayerPin out = getLatestLayerPin(pins); + + forwardToLayer(getLayerData(out.lid)); + + std::vector matvec; + for (int i = 0; i < pins.size(); i++) + { + matvec.push_back(getBlob(pins[i])); + } + + outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector + outputBlobs.assign(matvec); +} + + +void Net::Impl::forward(std::vector>& outputBlobs, + const std::vector& outBlobNames) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + std::vector pins; + for (int i = 0; i < outBlobNames.size(); i++) + { + pins.push_back(getPinByAlias(outBlobNames[i])); + } + + setUpNet(pins); + + LayerPin out = getLatestLayerPin(pins); + + forwardToLayer(getLayerData(out.lid)); + + outputBlobs.resize(outBlobNames.size()); + for (int i = 0; i < outBlobNames.size(); i++) + { + std::vector lp = getLayerOutPins(outBlobNames[i]); + outputBlobs[i].resize(lp.size()); + for (int j = 0; j < lp.size(); j++) + { + outputBlobs[i][j] = getBlob(lp[j]); + } + } +} + + +void Net::Impl::getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) +{ + CV_CheckGE(id, 0, ""); + CV_CheckLT(id, (int)layers.size(), ""); + LayerData& layerData = layers[id]; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = inOutShapes[id]; + + if (id == 0 && layerShapes.in[0].empty()) + { + if (!layerData.outputBlobs.empty()) + { + ShapesVec shapes; + for (int i = 0; i < layerData.outputBlobs.size(); i++) + { + Mat& inp = layerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + shapes.push_back(shape(inp)); + } + layerShapes.in = shapes; + } + else + { + const std::vector& inputShapes = netInputLayer->shapes; + bool none = true; + for (size_t i = 0; i < inputShapes.size(); i++) + { + if (!inputShapes[i].empty()) + { + none = false; + break; + } + } + if (none) + { + layerShapes.out.clear(); + return; + } + else + { + layerShapes.in = inputShapes; + } + } + } + + if (layerShapes.in.empty()) + { + for (int i = 0; i < inputLayerIds.size(); i++) + { + int layerId = inputLayerIds[i].lid; + LayersShapesMap::const_iterator it = inOutShapes.find(layerId); + if (it == inOutShapes.end() || it->second.out.empty()) + { + getLayerShapesRecursively(layerId, inOutShapes); + } + const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; + layerShapes.in.push_back(shape); + } + } + const ShapesVec& is = layerShapes.in; + ShapesVec& os = layerShapes.out; + ShapesVec& ints = layerShapes.internal; + int requiredOutputs = layerData.requiredOutputs.size(); + Ptr l = layerData.getLayerInstance(); + CV_Assert(l); + bool layerSupportInPlace = false; + try + { + layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size()); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } + layerShapes.supportInPlace = layerSupportInPlace; + + try + { + for (int i = 0; i < ints.size(); i++) + CV_CheckGT(total(ints[i]), 0, ""); + + for (int i = 0; i < os.size(); i++) + CV_CheckGT(total(os[i]), 0, ""); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size() << + " inplace=" << layerSupportInPlace); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } +} + +void Net::Impl::getLayersShapes( + const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) /*const*/ +{ + layersIds.clear(); + inLayersShapes.clear(); + outLayersShapes.clear(); + + Impl::LayersShapesMap inOutShapes; + getLayersShapes(netInputShapes, inOutShapes); + + for (Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); + it != inOutShapes.end(); it++) + { + layersIds.push_back(it->first); + inLayersShapes.push_back(it->second.in); + outLayersShapes.push_back(it->second.out); + } +} + + +void Net::Impl::getLayersShapes(const ShapesVec& netInputShapes, + LayersShapesMap& inOutShapes) +{ + inOutShapes.clear(); + + inOutShapes[0].in = netInputShapes; // insert shape for first input layer + for (MapIdToLayerData::const_iterator it = layers.begin(); + it != layers.end(); it++) + { + getLayerShapesRecursively(it->first, inOutShapes); + } +} + +void Net::Impl::getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + LayerShapes& shapes) +{ + LayersShapesMap inOutShapes; + inOutShapes[0].in = netInputShapes; // insert shape for first input layer + getLayerShapesRecursively(layerId, inOutShapes); + shapes = inOutShapes[layerId]; +} + +void Net::Impl::updateLayersShapes() +{ + CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); + CV_Assert(netInputLayer); + DataLayer& inputLayer = *netInputLayer; + LayerData& inputLayerData = layers[0]; + CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); + CV_Assert(!inputLayerData.outputBlobs.empty()); + ShapesVec inputShapes; + for (int i = 0; i < inputLayerData.outputBlobs.size(); i++) + { + Mat& inp = inputLayerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation + preferableTarget == DNN_TARGET_OPENCL_FP16 && + inputLayerData.dtype == CV_32F) + { + inp.create(inp.dims, inp.size, CV_16S); + } + inputShapes.push_back(shape(inp)); + } + CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); + LayersShapesMap layersShapes; + layersShapes[0].in = inputShapes; + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + int layerId = it->first; + LayerData& layerData = it->second; + const std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = layersShapes[layerId]; + CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); + if (layerShapes.in.empty()) + { + for (int i = 0; i < inputLayerIds.size(); i++) + { + const LayerPin& inputPin = inputLayerIds[i]; + int inputLayerId = inputPin.lid; + CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); + LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); + if (inputIt == layersShapes.end() || inputIt->second.out.empty()) + { + getLayerShapesRecursively(inputLayerId, layersShapes); + } + const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; + layerShapes.in.push_back(shape); + } + layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in); + } + CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); + } + CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); +} + + +LayerPin Net::Impl::getLatestLayerPin(const std::vector& pins) const +{ + return *std::max_element(pins.begin(), pins.end()); +} + +Mat Net::Impl::getBlob(const LayerPin& pin) const +{ + CV_TRACE_FUNCTION(); + + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob not found"); + + MapIdToLayerData::const_iterator it = layers.find(pin.lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); + + const LayerData& ld = it->second; + if ((size_t)pin.oid >= ld.outputBlobs.size()) + { + CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " + "the #%d was requested", + ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); + } + if (preferableTarget != DNN_TARGET_CPU) + { + CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); + // Transfer data to CPU if it's require. + ld.outputBlobsWrappers[pin.oid]->copyToHost(); + } + + if (ld.outputBlobs[pin.oid].depth() == CV_16S) + { + Mat output_blob; + convertFp16(ld.outputBlobs[pin.oid], output_blob); + return output_blob; + } + else + return ld.outputBlobs[pin.oid]; +} + +Mat Net::Impl::getBlob(String outputName) const +{ + return getBlob(getPinByAlias(outputName)); +} + + +AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin) +{ + CV_TRACE_FUNCTION(); +#ifdef HAVE_INF_ENGINE + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob not found"); + + LayerData& ld = layers[pin.lid]; + if ((size_t)pin.oid >= ld.outputBlobs.size()) + { + CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " + "the #%d was requested", + ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid)); + } + if (preferableTarget != DNN_TARGET_CPU) + { + CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); + // Transfer data to CPU if it's require. + ld.outputBlobsWrappers[pin.oid]->copyToHost(); + } + CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + + Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); + return std::move(wrapper->futureMat); +#else + CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); +#endif // HAVE_INF_ENGINE +} + + +AsyncArray Net::Impl::getBlobAsync(String outputName) +{ + return getBlobAsync(getPinByAlias(outputName)); +} + + +void Net::Impl::setInputsNames(const std::vector& inputBlobNames) +{ + CV_Assert(netInputLayer); + netInputLayer->setNames(inputBlobNames); +} + + +void Net::Impl::setInputShape(const String& inputName, const MatShape& shape) +{ + CV_Assert(netInputLayer); + netInputLayer->setInputShape(inputName, shape); +} + + +void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) +{ + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + LayerPin pin; + pin.lid = 0; + pin.oid = resolvePinOutputName(getLayerData(pin.lid), name); + + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); + + Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff + MatShape blobShape = shape(blob_); + + if (pin.lid == 0) + { + CV_Assert(!netInputLayer.empty()); + const DataLayer& netInputLayer = *(this->netInputLayer); + if (!netInputLayer.shapes.empty()) + { + CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), ""); + const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid]; + if (!inputShapeLimitation.empty()) + { + CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); +#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 + const size_t dims = inputShapeLimitation.size(); + for (size_t dim = 0; dim < dims; dim++) + { + if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1) + continue; // don't limit batch + CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); + } +#endif + } + } + } + + LayerData& ld = layers[pin.lid]; + const int numInputs = std::max(pin.oid + 1, (int)ld.requiredOutputs.size()); + ld.outputBlobs.resize(numInputs); + ld.outputBlobsWrappers.resize(numInputs); + netInputLayer->inputsData.resize(numInputs); + netInputLayer->scaleFactors.resize(numInputs); + netInputLayer->means.resize(numInputs); + + MatShape prevShape = shape(netInputLayer->inputsData[pin.oid]); + bool oldShape = prevShape == blobShape; + + blob_.copyTo(netInputLayer->inputsData[pin.oid]); + if (!oldShape) + ld.outputBlobs[pin.oid] = netInputLayer->inputsData[pin.oid]; + + if (!ld.outputBlobsWrappers[pin.oid].empty()) + { + ld.outputBlobsWrappers[pin.oid]->setHostDirty(); + } + netInputLayer->scaleFactors[pin.oid] = scalefactor; + netInputLayer->means[pin.oid] = mean; + netWasAllocated = netWasAllocated && oldShape; +} + + +Mat Net::Impl::getParam(int layer, int numParam) const +{ + LayerData& ld = getLayerData(layer); + std::vector& layerBlobs = ld.getLayerInstance()->blobs; + CV_Assert(numParam < (int)layerBlobs.size()); + return layerBlobs[numParam]; +} + +void Net::Impl::setParam(int layer, int numParam, const Mat& blob) +{ + LayerData& ld = getLayerData(layer); + + std::vector& layerBlobs = ld.getLayerInstance()->blobs; + CV_Assert(numParam < (int)layerBlobs.size()); + // we don't make strong checks, use this function carefully + layerBlobs[numParam] = blob; +} + + +static +string dumpLayerParameterSize(const string& name, const LayerParams& lp) +{ + std::ostringstream out(name, std::ios::ate); + DictValue param = lp.get(name); + switch (param.size()) + { + case 1: out << " : "; break; + case 2: out << " (HxW): "; break; + case 3: out << " (DxHxW): "; break; + default: + CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size())); + out << ": "; + } + for (size_t i = 0; i < param.size(); i++) + { + if (i > 0) + out << " x "; + out << param.get(i); + } + return out.str(); +} + +string Net::Impl::dump(bool forceAllocation) const +{ + bool hasInput = !netInputLayer->inputsData.empty(); + if (forceAllocation) + { + if (!netWasAllocated) + const_cast(this)->setUpNet(); + } + + std::ostringstream out; + const std::map& map = layers; + + Backend prefBackend = (Backend)preferableBackend; + std::vector> skippedLayers; + std::vector skipId; + std::vector allLayers(map.size(), -1); + int idPrev = -1; + Ptr prevNode; + for (std::map::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) + { + std::map>::const_iterator itBackend = rit->second.backendNodes.find(prefBackend); + if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || itBackend->second.empty()) + { + if (rit->second.skip) + skipId.push_back(rit->first); + else if (!skipId.empty()) + { + if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) + skipId.push_back(rit->first); + else if (idPrev != -1) + skipId.push_back(idPrev); + + std::sort(skipId.begin(), skipId.end()); + for (int i = 0; i < skipId.size(); i++) + { + allLayers[skipId[i]] = skippedLayers.size(); + } + skippedLayers.push_back(skipId); + skipId.clear(); + } + } + else + { + if (itBackend->second == prevNode) + skipId.push_back(idPrev); + else if (!skipId.empty()) + { + skipId.push_back(idPrev); + std::sort(skipId.begin(), skipId.end()); + for (int i = 0; i < skipId.size(); i++) + { + allLayers[skipId[i]] = skippedLayers.size(); + } + skippedLayers.push_back(skipId); + skipId.clear(); + } + idPrev = rit->first; + prevNode = itBackend->second; + } + } + std::vector colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff" }; + string backend; + switch (prefBackend) + { + case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; + case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; + case DNN_BACKEND_INFERENCE_ENGINE: // fallthru + case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru + case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; + case DNN_BACKEND_OPENCV: backend = "OCV/"; break; + case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; + case DNN_BACKEND_CUDA: backend = "CUDA/"; break; + case DNN_BACKEND_WEBNN: + backend = "WEBNN/"; + break; + // don't use default: + } + out << "digraph G {\n"; + // Add nodes + for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) + { + const LayerData& ld = it->second; + string name = ld.params.name; + std::vector clusterIds(1, it->first); + if (allLayers[it->first] == -1 && !name.empty()) + { + out << "\t\"" << name << "\" [label=\""; + } + else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) + { + continue; + } + else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] + { + int cluster = allLayers[it->first]; + out << "\t\"" + << "cluster_" << cluster << "\" [label=\"{"; + clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster + } + for (int i = 0; i < clusterIds.size(); i++) + { + CV_DbgAssert(map.find(clusterIds[i]) != map.end()); + const LayerParams& lp = map.find(clusterIds[i])->second.params; + if (!lp.name.empty()) + { + if (i > 0) + { + out << " | "; + } + out << lp.name << "\\n" + << lp.type << "\\n"; // align center + if (lp.has("kernel_size")) + { + string kernel = dumpLayerParameterSize("kernel_size", lp); + out << kernel; + out << "\\l"; // align left + } + else if (lp.has("kernel_h") && lp.has("kernel_w")) + { + DictValue h = lp.get("kernel_h"); + DictValue w = lp.get("kernel_w"); + out << "kernel (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("stride")) + { + string stride = dumpLayerParameterSize("stride", lp); + out << stride; + out << "\\l"; // align left + } + else if (lp.has("stride_h") && lp.has("stride_w")) + { + DictValue h = lp.get("stride_h"); + DictValue w = lp.get("stride_w"); + out << "stride (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("dilation")) + { + string dilation = dumpLayerParameterSize("dilation", lp); + out << dilation; + out << "\\l"; // align left + } + else if (lp.has("dilation_h") && lp.has("dilation_w")) + { + DictValue h = lp.get("dilation_h"); + DictValue w = lp.get("dilation_w"); + out << "dilation (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("pad")) + { + DictValue pad = lp.get("pad"); + out << "pad "; + switch (pad.size()) + { + case 1: out << ": " << pad; break; + case 2: + out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")"; + break; + case 4: + out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) + << ") x (" << pad.get(1) << ", " << pad.get(3) << ")"; + break; + case 6: + out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) + << ") x (" << pad.get(1) << ", " << pad.get(4) + << ") x (" << pad.get(2) << ", " << pad.get(5) << ")"; + break; + default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); + } + out << "\\l"; // align left + } + else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) + { + DictValue l = lp.get("pad_l"); + DictValue t = lp.get("pad_t"); + DictValue r = lp.get("pad_r"); + DictValue b = lp.get("pad_b"); + out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")"; + out << "\\l"; // align left + } + else if (lp.has("pooled_w") || lp.has("pooled_h")) + { + DictValue h = lp.get("pooled_h"); + DictValue w = lp.get("pooled_w"); + out << "pad pooled (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("pool")) + { + out << "pool: " << lp.get("pool"); + out << "\\l"; // align left + } + if (lp.has("global_pooling")) + { + out << "global_pooling: " << lp.get("global_pooling"); + out << "\\l"; // align left + } + if (lp.has("group")) + { + out << "group: " << lp.get("group"); + out << "\\l"; // align left + } + } + } + if (!ld.outputBlobs.empty()) + { + out << "output: " << ld.outputBlobs[0].size; + out << "\\l"; // align left + } + + Ptr layerBackend; + std::map>::const_iterator ibn = ld.backendNodes.find(prefBackend); + if (ibn != ld.backendNodes.end()) + layerBackend = ibn->second; + out << (!layerBackend.empty() ? backend : "OCV/"); + int colorId = 0; + const Target target = ld.layerInstance.empty() + ? DNN_TARGET_CPU + : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type + switch (target) + { + case DNN_TARGET_CPU: + out << "CPU"; + colorId = layerBackend.empty() ? 0 : 5; + break; + case DNN_TARGET_OPENCL: + out << "OCL"; + colorId = 1; + break; + case DNN_TARGET_OPENCL_FP16: + out << "OCL_FP16"; + colorId = 2; + break; + case DNN_TARGET_MYRIAD: + out << "MYRIAD"; + colorId = 3; + break; + case DNN_TARGET_HDDL: + out << "HDDL"; + colorId = 8; + break; + case DNN_TARGET_VULKAN: + out << "VULKAN"; + colorId = 7; + break; + case DNN_TARGET_FPGA: + out << "FPGA"; + colorId = 4; + break; + case DNN_TARGET_CUDA: + out << "CUDA"; + colorId = 5; + break; + case DNN_TARGET_CUDA_FP16: + out << "CUDA_FP16"; + colorId = 6; + break; + // don't use default: + } + CV_Assert(colorId < colors.size()); + out << "\\n"; // align center + out << ((clusterIds.size() == 1) ? "\" " : " }\" "); + out << "fillcolor=\"" << colors[colorId] << "\" "; + out << "style=filled "; + out << "shape=" << ((clusterIds.size() == 1) ? "box" : "record") << "]\n"; + } + out << '\n'; + // Add edges + int inputsSize = hasInput ? netInputLayer->outNames.size() : 0; + for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) + { + const LayerData& ld = it->second; + if (allLayers[it->first] == -1) // node + { + for (int i = 0; i < ld.consumers.size(); i++) + { + int outId = ld.consumers[i].lid; + if (it == map.begin() && inputsSize > 1) + out << "\t\"" << ld.name << "_" << i << "\"" + << " -> "; + else + out << "\t\"" << ld.name << "\"" + << " -> "; + if (allLayers[outId] == -1) // node + { + CV_DbgAssert(map.find(outId) != map.end()); + out << "\"" << map.find(outId)->second.name << "\"\n"; + } + else // cluster + { + out << "\"" + << "cluster_" << allLayers[outId] << "\"\n"; + } + } + } + else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster + { + for (int i = 0; i < ld.consumers.size(); i++) + { + int outId = ld.consumers[i].lid; + if (allLayers[outId] == -1) // node + { + CV_DbgAssert(map.find(outId) != map.end()); + out << "\t\"" + << "cluster_" << allLayers[it->first] << "\"" + << " -> "; + out << "\"" << map.find(outId)->second.name << "\"\n"; + } + else if (allLayers[outId] != allLayers[it->first]) + { // another cluster + out << "\t\"" + << "cluster_" << allLayers[it->first] << "\"" + << " -> "; + out << "\"" + << "cluster_" << allLayers[outId] << "\"\n"; + } + } + } + } + out << "}\n"; + return out.str(); +} + + +void Net::Impl::dumpNetworkToFile() const +{ +#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP + string dumpFileNameBase = getDumpFileNameBase(); + string dumpFileName = dumpFileNameBase + ".dot"; + try + { + string dumpStr = dump(); + std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary); + out << dumpStr; + } + catch (const std::exception& e) + { + std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); + out << "Exception: " << e.what() << std::endl; + } + catch (...) + { + std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); + out << "Can't dump: unknown exception" << std::endl; + } +#endif +} + + +std::vector> Net::Impl::getLayerInputs(int layerId) const +{ + LayerData& ld = getLayerData(layerId); + + std::vector> inputLayers; + inputLayers.reserve(ld.inputBlobsId.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); + } + return inputLayers; +} + +std::vector Net::Impl::getLayerNames() const +{ + std::vector res; + res.reserve(layers.size()); + + Impl::MapIdToLayerData::const_iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + if (it->second.id) // skip Data layer + res.push_back(it->second.name); + } + + return res; +} + + +// FIXIT drop "unconnected" API +std::vector Net::Impl::getUnconnectedOutLayers() const +{ + std::vector layersIds; + + // registerOutput() flow + if (!outputNameToId.empty()) + { + for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) + { + layersIds.push_back(it->second); + } + return layersIds; + } + + Impl::MapIdToLayerData::const_iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + const LayerData& ld = it->second; + + if (ld.requiredOutputs.size() == 0) + layersIds.push_back(lid); + } + + return layersIds; +} + + +// FIXIT drop "unconnected" API +std::vector Net::Impl::getUnconnectedOutLayersNames() /*const*/ +{ + std::vector ids = getUnconnectedOutLayers(); + const size_t n = ids.size(); + std::vector names(n); + for (size_t i = 0; i < n; ++i) + { + names[i] = layers[ids[i]].name; + } + return names; +} + + +int64 Net::Impl::getFLOPS(const std::vector& netInputShapes) /*const*/ +{ + int64 flops = 0; + std::vector ids; + std::vector> inShapes, outShapes; + getLayersShapes(netInputShapes, ids, inShapes, outShapes); + CV_Assert(inShapes.size() == outShapes.size()); + CV_Assert(inShapes.size() == ids.size()); + + for (int i = 0; i < ids.size(); i++) + { + flops += layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]); + } + + return flops; +} + + +int64 Net::Impl::getFLOPS( + const int layerId, + const std::vector& netInputShapes) /*const*/ +{ + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); + CV_Assert(layer != layers.end()); + + LayerShapes shapes; + getLayerShapes(netInputShapes, layerId, shapes); + + return const_cast(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out); +} + + +void Net::Impl::getMemoryConsumption( + const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/ +{ + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); + CV_Assert(layer != layers.end()); + + weights = blobs = 0; + + for (int i = 0; i < layer->second.params.blobs.size(); i++) + { + const Mat& weightsBlob = layer->second.params.blobs[i]; + weights += weightsBlob.total() * weightsBlob.elemSize(); + } + + LayerShapes shapes; + getLayerShapes(netInputShapes, layerId, shapes); + const ShapesVec& outLayerShapes = shapes.out; + + // FIXIT netWasQuantized check is not enough - per layer check should be done + size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); + for (int i = 0; i < outLayerShapes.size(); i++) + { + blobs += total(outLayerShapes[i]) * elemSize; + } +} + + +void Net::Impl::getMemoryConsumption( + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/ +{ + std::vector layerIds; + std::vector w, b; + getMemoryConsumption(netInputShapes, layerIds, w, b); + + weights = blobs = 0; + for (int i = 0; i < layerIds.size(); i++) + { + weights += w[i]; + blobs += b[i]; + } +} + + +int64 Net::Impl::getPerfProfile(std::vector& timings) const +{ + timings = std::vector(layersTimings.begin() + 1, layersTimings.end()); + int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); + return total; +} + +void Net::Impl::getMemoryConsumption( + const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) /*const*/ +{ + layerIds.clear(); + weights.clear(); + blobs.clear(); + + std::vector> inLayerShapes, outLayerShapes; + + getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); + // FIXIT netWasQuantized check is not enough - per layer check should be done + size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); + for (int i = 0; i < layerIds.size(); i++) + { + int w = 0, b = 0; + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerIds[i]); + CV_Assert(layer != layers.end()); + + for (int j = 0; j < layer->second.params.blobs.size(); j++) + { + const Mat& weightsBlob = layer->second.params.blobs[j]; + w += weightsBlob.total() * weightsBlob.elemSize(); + } + + for (int j = 0; j < outLayerShapes[i].size(); j++) + { + b += total(outLayerShapes[i][j]) * elemSize; + } + + weights.push_back(w); + blobs.push_back(b); + } +} + + +// TODO drop? +void Net::Impl::getLayerTypes(std::vector& layersTypes) const +{ + layersTypes.clear(); + + std::map layers_type_map; + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + if (layers_type_map.find(it->second.type) == layers_type_map.end()) + layers_type_map[it->second.type] = 0; + layers_type_map[it->second.type]++; + } + + for (std::map::const_iterator it = layers_type_map.begin(); it != layers_type_map.end(); it++) + { + layersTypes.push_back(it->first); + } +} + + +// TODO drop? +int Net::Impl::getLayersCount(const String& layerType) const +{ + int count = 0; + for (Impl::MapIdToLayerData::const_iterator it = layers.begin(); + it != layers.end(); it++) + { + if (it->second.type == layerType) + count++; + } + return count; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp new file mode 100644 index 0000000000..022e2374ca --- /dev/null +++ b/modules/dnn/src/net_impl.hpp @@ -0,0 +1,261 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_NET_IMPL_HPP__ +#define __OPENCV_DNN_SRC_NET_IMPL_HPP__ + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" + +#include +#include +#include + +#include + +#include + +#include "layer_internals.hpp" // LayerPin LayerData DataLayer + +#include "legacy_backend.hpp" // wrapMat BlobManager OpenCLBackendWrapper + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +using std::make_pair; +using std::string; + +// NB: Implementation is divided between of multiple .cpp files +struct Net::Impl : public detail::NetImplBase +{ + typedef std::map LayersShapesMap; + typedef std::map MapIdToLayerData; + + Impl(); + + Ptr netInputLayer; + std::vector blobsToKeep; + MapIdToLayerData layers; + std::map layerNameToId; + std::map outputNameToId; // use registerOutput() to populate outputs + BlobManager blobManager; + int preferableBackend; + int preferableTarget; + String halideConfigFile; + bool skipInfEngineInit; + bool hasDynamicShapes; + // Map host data to backend specific wrapper. + std::map> backendWrappers; + + int lastLayerId; + + bool netWasAllocated; + bool netWasQuantized; + bool fusion; + bool isAsync; + std::vector layersTimings; + + + bool empty() const; + void setPreferableBackend(int backendId); + void setPreferableTarget(int targetId); + + // FIXIT use inheritance + Ptr wrap(Mat& host); + + + void clear(); + + void setUpNet(const std::vector& blobsToKeep_ = std::vector()); + + + Ptr getLayer(int layerId) const; + Ptr getLayer(const LayerId& layerId) const; + + int getLayerId(const String& layerName) const; + + int getLayerId(int id) const; + + int getLayerId(DictValue& layerDesc) const; + + String getLayerName(int id) const; + + LayerData& getLayerData(int id) const; + + LayerData& getLayerData(const String& layerName) const; + + LayerData& getLayerData(const DictValue& layerDesc) const; + + static void addLayerInput(LayerData& ld, int inNum, LayerPin from); + + int resolvePinOutputName(LayerData& ld, const String& outName) const; + + LayerPin getPinByAlias(const String& layerName) const; + + std::vector getLayerOutPins(const String& layerName) const; + + // FIXIT remove dtype + int addLayer(const String& name, const String& type, const int& dtype, LayerParams& params); + + int addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params); + + + void connect(int outLayerId, int outNum, int inLayerId, int inNum); + + int registerOutput(const std::string& outputName, int layerId, int outputPort); + + // FIXIT drop "unconnected" API + std::vector getUnconnectedOutLayers() const; + std::vector getUnconnectedOutLayersNames() /*const*/; + + + void setInputsNames(const std::vector& inputBlobNames); + void setInputShape(const String& inputName, const MatShape& shape); + void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean); + Mat getParam(int layer, int numParam) const; + void setParam(int layer, int numParam, const Mat& blob); + std::vector> getLayerInputs(int layerId) const; + std::vector getLayerNames() const; + + + // TODO drop? + void getLayerTypes(std::vector& layersTypes) const; + int getLayersCount(const String& layerType) const; + + + // FIXIT use inheritance + void initBackend(const std::vector& blobsToKeep_); + + void setHalideScheduler(const String& scheduler); +#ifdef HAVE_HALIDE + void compileHalide(); + void initHalideBackend(); +#endif + +#ifdef HAVE_DNN_NGRAPH + void addNgraphOutputs(LayerData& ld); + void initNgraphBackend(const std::vector& blobsToKeep_); +#endif + +#ifdef HAVE_WEBNN + void addWebnnOutputs(LayerData& ld); + void initWebnnBackend(const std::vector& blobsToKeep_); +#endif + +#ifdef HAVE_VULKAN + void initVkComBackend(); +#endif + +#ifdef HAVE_CUDA + struct CudaInfo_t + { + CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_) + : context(std::move(ctxt)) + , d2h_stream(std::move(d2h_stream_)) + {} + cuda4dnn::csl::CSLContext context; + cuda4dnn::csl::Stream d2h_stream; + cuda4dnn::csl::Workspace workspace; + }; + + std::unique_ptr cudaInfo; + + void initCUDABackend(const std::vector& blobsToKeep_); +#endif + + void allocateLayer(int lid, const LayersShapesMap& layersShapes); + + // TODO add getter + void enableFusion(bool fusion_); + + void fuseLayers(const std::vector& blobsToKeep_); + + void allocateLayers(const std::vector& blobsToKeep_); + + void forwardLayer(LayerData& ld); + + void forwardToLayer(LayerData& ld, bool clearFlags = true); + + Mat forward(const String& outputName); + AsyncArray forwardAsync(const String& outputName); + void forward(OutputArrayOfArrays outputBlobs, const String& outputName); + void forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames); + void forward(std::vector>& outputBlobs, + const std::vector& outBlobNames); + + + void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes); + + void getLayersShapes( + const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) /*const*/; + + void getLayersShapes(const ShapesVec& netInputShapes, + LayersShapesMap& inOutShapes); + + void getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + LayerShapes& shapes); + + void updateLayersShapes(); + + int64 getFLOPS(const std::vector& netInputShapes) /*const*/; + int64 getFLOPS( + const int layerId, + const std::vector& netInputShapes) /*const*/; + + void getMemoryConsumption( + const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/; + void getMemoryConsumption( + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/; + void getMemoryConsumption( + const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) /*const*/; + int64 getPerfProfile(std::vector& timings) const; + + // TODO drop + LayerPin getLatestLayerPin(const std::vector& pins) const; + + Mat getBlob(const LayerPin& pin) const; + + Mat getBlob(String outputName) const; + +#ifdef CV_CXX11 + AsyncArray getBlobAsync(const LayerPin& pin); + + AsyncArray getBlobAsync(String outputName); +#endif // CV_CXX11 + +#ifdef HAVE_INF_ENGINE + static + Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); +#endif + + string dump(bool forceAllocation = false) const; + + void dumpNetworkToFile() const; + + // FIXIT drop from inference API + Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) /*const*/; + void getInputDetails(std::vector& scales, std::vector& zeropoints) /*const*/; + void getOutputDetails(std::vector& scales, std::vector& zeropoints) /*const*/; + +}; // Net::Impl + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_NET_IMPL_HPP__ diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp new file mode 100644 index 0000000000..4de4fb595a --- /dev/null +++ b/modules/dnn/src/net_impl_backend.cpp @@ -0,0 +1,200 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" +#include "legacy_backend.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Ptr Net::Impl::wrap(Mat& host) +{ + if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) + return Ptr(); + + MatShape shape(host.dims); + for (int i = 0; i < host.dims; ++i) + shape[i] = host.size[i]; + + void* data = host.data; + if (backendWrappers.find(data) != backendWrappers.end()) + { + Ptr baseBuffer = backendWrappers[data]; + if (preferableBackend == DNN_BACKEND_OPENCV) + { +#ifdef HAVE_OPENCL + CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget)); + return OpenCLBackendWrapper::create(baseBuffer, host); +#else + CV_Error(Error::StsInternal, ""); +#endif + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); +#ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(baseBuffer, shape)); +#endif + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + return wrapMat(preferableBackend, preferableTarget, host); + } + else if (preferableBackend == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + return wrapMat(preferableBackend, preferableTarget, host); +#endif + } + else if (preferableBackend == DNN_BACKEND_VKCOM) + { +#ifdef HAVE_VULKAN + return Ptr(new VkComBackendWrapper(baseBuffer, host)); +#endif + } + else if (preferableBackend == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); +#ifdef HAVE_CUDA + switch (preferableTarget) + { + case DNN_TARGET_CUDA: + return CUDABackendWrapperFP32::create(baseBuffer, shape); + case DNN_TARGET_CUDA_FP16: + return CUDABackendWrapperFP16::create(baseBuffer, shape); + default: + CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); + } +#endif + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } + + Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); + backendWrappers[data] = wrapper; + return wrapper; +} + + +void Net::Impl::initBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + if (preferableBackend == DNN_BACKEND_OPENCV) + { + CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { +#ifdef HAVE_HALIDE + initHalideBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Halide"); +#endif + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { +#ifdef HAVE_DNN_NGRAPH + initNgraphBackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO"); +#endif + } + else if (preferableBackend == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + initWebnnBackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); +#endif + } + else if (preferableBackend == DNN_BACKEND_VKCOM) + { +#ifdef HAVE_VULKAN + initVkComBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Vulkan"); +#endif + } + else if (preferableBackend == DNN_BACKEND_CUDA) + { +#ifdef HAVE_CUDA + initCUDABackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of CUDA/CUDNN"); +#endif + } + else + { + CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); + } +} + + +void Net::Impl::setPreferableBackend(int backendId) +{ + if (backendId == DNN_BACKEND_DEFAULT) + backendId = (Backend)getParam_DNN_BACKEND_DEFAULT(); + + if (netWasQuantized && backendId != DNN_BACKEND_OPENCV) + { + CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks"); + backendId = DNN_BACKEND_OPENCV; + } + +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; +#endif + + if (preferableBackend != backendId) + { + preferableBackend = backendId; + clear(); + } +} + +void Net::Impl::setPreferableTarget(int targetId) +{ + if (netWasQuantized && targetId != DNN_TARGET_CPU && + targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) + { + CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks"); + targetId = DNN_TARGET_CPU; + } + + if (preferableTarget != targetId) + { + preferableTarget = targetId; + if (IS_DNN_OPENCL_TARGET(targetId)) + { +#ifndef HAVE_OPENCL +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_OPENCV) +#else + if (preferableBackend == DNN_BACKEND_DEFAULT || + preferableBackend == DNN_BACKEND_OPENCV) +#endif // HAVE_INF_ENGINE + preferableTarget = DNN_TARGET_CPU; +#else + bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16"); + if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16) + preferableTarget = DNN_TARGET_OPENCL; +#endif + } + clear(); + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp new file mode 100644 index 0000000000..c8d79c2959 --- /dev/null +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -0,0 +1,607 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +#ifdef HAVE_CUDA +#include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers +#endif + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::enableFusion(bool fusion_) +{ + if (fusion != fusion_) + { + fusion = fusion_; + clear(); + } +} + + +#if 0 +#define printf_(args) printf args +#else +#define printf_(args) +#endif + + +void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && + preferableBackend != DNN_BACKEND_CUDA && + preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) + return; + +#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return; +#endif + + // scan through all the layers. If there is convolution layer followed by the activation layer, + // we try to embed this activation into the convolution and disable separate execution of the activation + + // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)" + std::set pinsToKeep(blobsToKeep_.begin(), + blobsToKeep_.end()); + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + LayerData& ld = layers[lid]; + if (ld.skip) + { + printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); + continue; + } + printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); + + // the optimization #1. try to fuse batch norm, scaling and/or activation layers + // with the current layer if they follow it. Normally, the are fused with the convolution layer, + // but some of them (like activation) may be fused with fully-connected, elemwise (+) and + // some other layers. + Ptr& currLayer = ld.layerInstance; + if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0) + { + LayerData* nextData = &layers[ld.consumers[0].lid]; + LayerPin lpNext(ld.consumers[0].lid, 0); + while (nextData) + { +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0) + { + CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type); + break; + } +#endif + /* we use `tryFuse` member of convolution layer to fuse eltwise later + * it's not intended to be fused here; hence, we stop when we encounter eltwise + */ + if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise") + break; + Ptr nextLayer = nextData->layerInstance; + if (currLayer->tryFuse(nextLayer)) + { + printf_(("\tfused with %s\n", nextLayer->name.c_str())); + nextData->skip = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; + if (nextData->consumers.size() == 1) + { + int nextLayerId = nextData->consumers[0].lid; + nextData = &layers[nextLayerId]; + lpNext = LayerPin(nextLayerId, 0); + } + else + { + nextData = 0; + break; + } + } + else + break; + } + + if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) + continue; // Go to the next layer. + + // TODO: OpenCL target support more fusion styles. + if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && + (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && + ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" && + ld.layerInstance->type != "Concat")) ) + continue; + + if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget) + && ld.layerInstance->type != "Convolution" + && ld.layerInstance->type != "Concat") + continue; + + while (nextData) + { + // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh + if (IS_DNN_OPENCL_TARGET(preferableTarget) && + nextData->type != "ReLU" && + nextData->type != "ChannelsPReLU" && + nextData->type != "ReLU6" && + nextData->type != "TanH" && + nextData->type != "Power") + break; + + Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); + if (nextActivLayer.empty()) + break; + + if (currLayer->setActivation(nextActivLayer)) + { + printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); + nextData->skip = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; + if (nextData->consumers.size() == 1) + { + int nextLayerId = nextData->consumers[0].lid; + nextData = &layers[nextLayerId]; + lpNext = LayerPin(nextLayerId, 0); + } + else + { + nextData = 0; + break; + } + } + else + break; + } + + // OpenCL: fuse convolution layer followed by eltwise + relu + // CUDA: fuse convolution layer followed by eltwise (and optional activation) + while (nextData && + (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) && + ld.layerInstance->type == "Convolution" + ) // semantic of 'if' + { + Ptr nextEltwiseLayer = nextData->layerInstance.dynamicCast(); + if (nextEltwiseLayer.empty()) + break; + +#ifdef HAVE_CUDA + // CUDA backend supports fusion with eltwise sum (without variable channels) + if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) + { + // we create a temporary backend node for eltwise layer to obtain the eltwise configuration + cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init + const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers); + auto eltwiseNode = node.dynamicCast(); + + // broadcasting not supported in fused ops + auto required_shape = shape(nextData->outputBlobs[0]); + for (int i = 0; i < nextData->inputBlobs.size(); i++) + { + if (shape(*nextData->inputBlobs[i]) != required_shape) + { + eltwiseNode.reset(); + break; + } + } + + // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used. + // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors. + if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty()) + break; + } +#endif + + if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0) + break; + if (nextData->inputBlobsId.size() != 2) + break; + + if (IS_DNN_OPENCL_TARGET(preferableTarget)) + { + if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) == "sum") + { + if (nextData->params.has("coeff")) + { + DictValue paramCoeff = nextData->params.get("coeff"); + int n = paramCoeff.size(); + bool isCoeffOneOne = (n == 2); + for (int i = 0; isCoeffOneOne && i < n; i++) + { + float c = paramCoeff.get(i); + isCoeffOneOne &= (c == 1.0f); + } + if (!isCoeffOneOne) + { + CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only"); + break; + } + } + } + else + { + CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get("operation")); + break; + } + } + + { + LayerData *eltwiseData = nextData; + + // Eltwise layer has two inputs. We need to determine which + // is a base convolution layer and which could be used as it's bias. + LayerData* biasLayerData = 0; + for (int i = 0; i < 2; ++i) + { + LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid]; + CV_Assert(downLayerData); + while (downLayerData->skip) + { + if (downLayerData->inputBlobsId.size() == 1) + downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; + else + { + downLayerData = 0; + break; + } + } + if (downLayerData && ld.id == downLayerData->id) + { + biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid]; + break; + } + } + CV_Assert(biasLayerData); + { + // fuse eltwise + activation layer + // bias must already be computed to fuse => bias layer must appear before convolution + if (biasLayerData->id < ld.id) + { + /* we can fuse activation if: + * => activation layer that follows is the only consumer of eltwise output + * => activation layer does not process multiple inputs + * => we do not require to keep the output of eltwise + */ + Ptr nextFusabeleActivLayer; + if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0) + { + nextData = &layers[eltwiseData->consumers[0].lid]; + lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); + CV_Assert(nextData); + if (nextData->outputBlobs.size() == 1) + nextFusabeleActivLayer = nextData->layerInstance.dynamicCast(); + } + else + { + // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise + nextData = 0; + } + + // the requirements of OCV OpenCL backend and CUDA backend are different + // we need to check them separately; hence, the fuse variables + bool fuse_eltwise = false, fuse_activation = false; + + Ptr activ_power; + if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() && + nextData && + (!nextData->type.compare("ReLU") || + !nextData->type.compare("ChannelsPReLU") || + (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast()) && activ_power->scale == 1.0f) + ) && + currLayer->setActivation(nextFusabeleActivLayer)) + { + fuse_eltwise = true; + fuse_activation = true; + } + + if (IS_DNN_CUDA_TARGET(preferableTarget)) + { + /* supported fusion options: + * => convolution + eltwise + * => activation(convolution) + eltwise + * > convolution + activation would have been fused already; we have to fuse eltwise + * => activation(convolution + eltwise) + * > fuse eltwise and then activation + */ + auto layer = nextEltwiseLayer.staticCast(); + if (currLayer->tryFuse(layer)) + { + fuse_eltwise = true; /* eltwise was successfully fused */ + if (!nextFusabeleActivLayer.empty() && nextData) + { + if ((!nextData->type.compare("ReLU") || + !nextData->type.compare("ReLU6") || + !nextData->type.compare("Power") || + !nextData->type.compare("TanH") || + !nextData->type.compare("Sigmoid") || + !nextData->type.compare("Swish") || + !nextData->type.compare("Mish")) && + currLayer->setActivation(nextFusabeleActivLayer)) + { + // activation was fused + fuse_activation = true; + } + } + } + } + + CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */ + if(fuse_eltwise && fuse_activation) + { + CV_Assert(nextData); + CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); + ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str())); + eltwiseData->skip = true; + nextData->skip = true; + // This optimization for cases like + // some_layer conv + // | | + // +-- eltwise --+ + // | + // activ + // This way all the element-wise computations + // (i.e. some_layer+conv or some_layer*conv) + // would be done at [conv] layer. So we need to + // replace [conv]'s output blob to [eltwise]'s one + // considering that [activ] is an in-place layer. + // Also we need to move all the consumers' references. + // To prevent memory collisions (i.e. when input of + // [conv] and output of [eltwise] is the same blob) + // we allocate a new blob. + CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); + ld.outputBlobs[0] = ld.outputBlobs[0].clone(); + ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); + + eltwiseData->outputBlobs = ld.outputBlobs; + nextData->outputBlobs = ld.outputBlobs; + eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; + nextData->outputBlobsWrappers = ld.outputBlobsWrappers; + + // Move references of [activ] layer consumers to the newly allocated blob. + for (int i = 0; i < nextData->consumers.size(); ++i) + { + LayerData& consumer = layers[nextData->consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); ++j) + { + if (consumer.inputBlobsId[j].lid == lpNext.lid) + { + consumer.inputBlobs[j] = &ld.outputBlobs[0]; + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } + else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise) + { + CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); + CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); + ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + eltwiseData->skip = true; + // This optimization is for cases like + // some_layer conv (maybe fused with activ) + // | | + // +-- eltwise --+ + // + // This way all the element-wise computations + // (i.e. some_layer+conv or some_layer*conv) + // would be done at [conv] layer. So we need to + // replace [conv]'s output blob to [eltwise]'s one. + // Also we need to move all the consumers' references. + // To prevent memory collisions (i.e. when input of + // [conv] and output of [eltwise] is the same blob) + // we allocate a new blob. + CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); + ld.outputBlobs[0] = ld.outputBlobs[0].clone(); + ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); + + eltwiseData->outputBlobs = ld.outputBlobs; + eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; + + // Move references of [eltwise] layer consumers to the newly allocated blob. + for (int i = 0; i < eltwiseData->consumers.size(); ++i) + { + LayerData& consumer = layers[eltwiseData->consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); ++j) + { + if (consumer.inputBlobsId[j].lid == eltwiseData->id) + { + consumer.inputBlobs[j] = &ld.outputBlobs[0]; + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } + } + } + } + + break; + } + } + + if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) + continue; // Go to the next layer. + + // the optimization #2. if there is concat layer that concatenates channels + // from the inputs together (i.e. axis == 1) then we make the inputs of + // the concat layer to write to the concatenation output buffer + // (and so we eliminate the concatenation layer, because the channels + // are concatenated implicitly). + Ptr concatLayer = ld.layerInstance.dynamicCast(); + if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 ) + { + Mat& output = ld.outputBlobs[0]; + UMat umat_output; +#ifdef HAVE_OPENCL + if (!ld.outputBlobsWrappers.empty() && + (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))) + { + size_t i, ninputs = ld.inputBlobsId.size(); + bool conv_layer = true; + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = ld.inputBlobsId[i]; + LayerData* inp_i_data = &layers[pin.lid]; + while(inp_i_data->skip && + inp_i_data->inputBlobsId.size() == 1 && + inp_i_data->consumers.size() == 1) + { + pin = inp_i_data->inputBlobsId[0]; + inp_i_data = &layers[pin.lid]; + } + conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution"); + } + if (!conv_layer) + continue; + std::vector umat_outputBlobs; + umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + umat_output = umat_outputBlobs[0]; + } +#endif + + // TODO: in general, this optimization can always be done, but + // many layers currently check that the input/output blobs are + // continuous arrays. Unfortunately, this is not true when + // the concatenation optimization is applied with batch_size > 1. + // so, for now, we only apply this optimization in the most popular + // case batch_size == 1. + int axis = normalize_axis(concatLayer->axis, output.dims); + if( output.total(0, axis) == 1 ) + { + size_t i, ninputs = ld.inputBlobsId.size(); + std::vector realinputs(ninputs); + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = ld.inputBlobsId[i]; + LayerData* inp_i_data = &layers[pin.lid]; + while(inp_i_data->skip && + inp_i_data->inputBlobsId.size() == 1 && + inp_i_data->consumers.size() == 1) + { + pin = inp_i_data->inputBlobsId[0]; + inp_i_data = &layers[pin.lid]; + } + printf_(("\treal input for %s is %s\n", + layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), + inp_i_data->getLayerInstance()->name.c_str())); + + if(inp_i_data->skip || inp_i_data->consumers.size() != 1) + break; +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA && + (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false || + (inp_i_data->layerInstance->type != "Convolution" && + inp_i_data->layerInstance->type != "Pooling" && + inp_i_data->layerInstance->type != "Resize" && + inp_i_data->layerInstance->type != "Flatten" && + inp_i_data->layerInstance->type != "Permute" && + inp_i_data->layerInstance->type != "Reorg" && + inp_i_data->layerInstance->type != "Eltwise" && + inp_i_data->layerInstance.dynamicCast().empty()))) + { + break; + } +#endif + realinputs[i] = pin; + } + + if( i >= ninputs ) + { + // Allocate new memory to prevent collisions during memory + // reusing (see https://github.com/opencv/opencv/pull/10456). + output = output.clone(); +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && + IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umats(1); + umat_output = umat_output.clone(); + umats[0] = umat_output; + OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats); + } +#endif + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + ld.outputBlobsWrappers[0] = wrap(output); +#endif + std::vector chrange(output.dims, Range::all()); + int ofs = 0; + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = realinputs[i]; + LayerData* inp_i_data = &layers[pin.lid]; + int channels_i = ld.inputBlobs[i]->size[axis]; + chrange[axis] = Range(ofs, ofs + channels_i); + printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), + pin.oid, ofs, ofs + channels_i)); + ofs += channels_i; + Mat output_slice = output(chrange); + Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; + CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); + Mat* oldPtr = &curr_output; + curr_output = output_slice; +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umats(inp_i_data->outputBlobsWrappers.size()); + umats[pin.oid] = umat_output(chrange); + OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats); + } +#endif +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + { + auto cuda_wrapper = wrap(output).dynamicCast(); + auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims); + auto new_shape = shape(output_slice); + cuda_wrapper->update(new_shape, offset); + inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast(); + } +#endif + // Layers that refer old input Mat will refer to the + // new data but the same Mat object. + CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output); + } + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + { + for (int i = 0; i < ld.consumers.size(); i++) + { + LayerData& consumer = layers[ld.consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); j++) + { + if (consumer.inputBlobsId[j].lid == ld.id) + { + CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data); + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } +#endif + ld.skip = true; + printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); + } + } + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp new file mode 100644 index 0000000000..a546b0237d --- /dev/null +++ b/modules/dnn/src/net_openvino.cpp @@ -0,0 +1,568 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include +#include + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +#ifdef HAVE_INF_ENGINE + + +/** mark input pins as outputs from other subnetworks + * FIXIT must be done by DNN engine not ngraph. + */ +void Net::Impl::addNgraphOutputs(LayerData& ld) +{ + CV_TRACE_FUNCTION(); + + CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type); + + Ptr layerNet; + auto it = ld.backendNodes.find(preferableBackend); + if (it != ld.backendNodes.end()) + { + Ptr node = it->second; + if (!node.empty()) + { + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + CV_Assert(!ieNode->net.empty()); + layerNet = ieNode->net; + } + } + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieInpNode = inpNode.dynamicCast(); + CV_Assert(!ieInpNode.empty()); + CV_Assert(!ieInpNode->net.empty()); + if (layerNet != ieInpNode->net) + { + CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); + ieInpNode->net->addOutput(ieInpNode); + } + } + } +} + +void Net::Impl::initNgraphBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, ""); + + Ptr net; + + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + if (ld.id == 0) + { + CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || + (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; + outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; + dataPtr->setName(outputName); + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; + dataPtr->setName(outputName); + } + } + } + + if (skipInfEngineInit) + { + Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; + CV_Assert(!node.empty()); + + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + + CV_Assert(ieNode->net); + InfEngineNgraphNet& ienet = *ieNode->net; + ienet.reset(); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + if (ld.id == 0) + { + for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]); + dataPtr->setName(netInputLayer->outNames[i]); + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + auto it = ienet.outputsDesc.find(ld.name); + if (it != ienet.outputsDesc.end()) + { + const InferenceEngine::TensorDesc& descriptor = it->second; + InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name); + dataPtr->setName(ld.name); + } + else + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + dataPtr->setName(ld.name); + } + } + } + ienet.addBlobs(ld.inputBlobsWrappers); + ienet.addBlobs(ld.outputBlobsWrappers); + ld.skip = true; + } + layers[lastLayerId].skip = false; + ienet.init((Target)preferableTarget); + return; + } + + bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || + openvino::checkTarget(DNN_TARGET_CPU)); + + // Build Inference Engine networks from sets of layers that support this + // backend. Split a whole model on several Inference Engine networks if + // some of layers are not implemented. + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + + CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ..."); + + if (ld.id == 0 && ld.skip) + { + CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!"); + continue; + } + + bool fused = ld.skip; + Ptr layer = ld.layerInstance; + if (!fused && !layer->supportBackend(preferableBackend)) + { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); + bool customizable = ld.id != 0 && supportsCPUFallback; + + // TODO: there is a bug in Myriad plugin with custom layers shape infer. + if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) + { + for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) + { + customizable = ld.inputBlobs[i]->size[0] == 1; + } + } + + // TODO: fix these workarounds + if (preferableTarget == DNN_TARGET_MYRIAD || + preferableTarget == DNN_TARGET_HDDL || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16) + customizable &= ld.type != "Concat"; + + if (preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16) + customizable &= ld.type != "Power"; + + if (preferableTarget == DNN_TARGET_OPENCL) + customizable &= ld.type != "Eltwise"; + + if (!customizable) + { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!"); + addNgraphOutputs(ld); + net = Ptr(); + layer->preferableTarget = DNN_TARGET_CPU; + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieNode = inpNode.dynamicCast(); + CV_Assert(!ieNode.empty()); + ieNode->net->addOutput(ieNode); + } + } + continue; + } + } + ld.skip = true; // Initially skip all Inference Engine supported layers. + + // Create a new network if one of inputs from different Inference Engine graph. + std::vector> inputNodes; + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) + if (inputNodes.size() == ld.inputBlobsId.size()) + { + break; + } + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieInpNode = inpNode.dynamicCast(); + CV_Assert(!ieInpNode.empty()); + CV_Assert(!ieInpNode->net.empty()); + if (ieInpNode->net == net && !fused) + { + inputNodes.push_back(inpNode); + continue; + } + } + + if (net.empty()) + { + net = Ptr(new InfEngineNgraphNet(*this)); + } + + if (!fused) + { + std::vector inputNames; + std::vector inputs; + + auto curr_pos = inpLd.consumers.begin(); + auto compare = [&ld](const LayerPin& lp) { return lp.lid == ld.id; }; + auto cons = curr_pos; + while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != + inpLd.consumers.end()) { + int cons_inp = cons->oid; + Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. + dynamicCast(); + CV_Assert(!inpWrapper.empty()); + auto iter = std::find(inputNames.begin(), inputNames.end(), + inpWrapper->dataPtr->getName()); + if (iter == inputNames.end()) + { + inputNames.push_back(inpWrapper->dataPtr->getName()); + inputs.push_back(inpLd.outputBlobs[cons_inp]); + } + curr_pos = cons + 1; + } + + auto inps = net->setInputs(inputs, inputNames); + for (auto& inp : inps) + { + inputNodes.emplace_back(Ptr(new InfEngineNgraphNode(inp))); + } + } + } + + Ptr node; + if (!net.empty()) + { + if (fused) + { + bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && + ld.inputBlobs[0]->data == ld.outputBlobs[0].data; + CV_Assert(inPlace); + node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; + ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; + } + } + else + { + net = Ptr(new InfEngineNgraphNet(*this)); + } + + if (!fused) + { + CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + int lid = ld.inputBlobsId[i].lid; + int oid = ld.inputBlobsId[i].oid; + if (oid == 0 || lid == 0) + continue; + + auto ieInpNode = inputNodes[i].dynamicCast(); + const auto& ngraph_input_node = ieInpNode->node; + CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); + + // Handle parameters from other subnets. Output port is not used in this case + if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && + ngraph_input_node->get_output_size() == 1) + { + inputNodes[i] = Ptr(new InfEngineNgraphNode(ngraph_input_node)); + continue; + } + CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) + // FIXIT refactor ".initNgraph()" API to use Output + // WA: use Concat to emulate Identity operation with requested output port + auto oid_node = std::make_shared(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0); + inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); +#elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) + inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); +#else + inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); +#endif + } + + if (layer->supportBackend(preferableBackend)) + { + CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size()); + node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes); +#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node) + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + node.dynamicCast()->setName(dataPtr->getName()); + } +#else + node.dynamicCast()->setName(layer->name); +#endif + } + else + { + CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type); + node = Ptr(new InfEngineNgraphNode(inputNodes, + ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); + } + } + else if (node.empty()) + { + CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass..."); + continue; + } + + ld.backendNodes[preferableBackend] = node; + + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + ieNode->net = net; + + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode); + break; + } + } + ieNode->net->setNodePtr(&ieNode->node); + + net->addBlobs(ld.inputBlobsWrappers); + net->addBlobs(ld.outputBlobsWrappers); + addNgraphOutputs(ld); + } + + // Initialize all networks. + for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData& ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr ieNode = node.dynamicCast(); + if (ieNode.empty()) + continue; + + CV_Assert(!ieNode->net.empty()); + + if (!ieNode->net->isInitialized()) + { + ieNode->net->addOutput(ieNode); + ieNode->net->createNet((Target)preferableTarget); + ld.skip = false; + } + } +} + +//} // Net::Impl + +/*static*/ +Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) +{ + CV_TRACE_FUNCTION(); + + CV_TRACE_REGION("register_inputs"); + + std::vector inputsNames; + std::vector inp_shapes; + for (auto& it : ieNet.getInputsInfo()) + { + inputsNames.push_back(it.first); + std::vector dims = it.second->getTensorDesc().getDims(); + inp_shapes.push_back(std::vector(dims.begin(), dims.end())); + } + + Net cvNet; + cvNet.setInputsNames(inputsNames); + + // set empty input to determine input shapes + for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id) + { + cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]); + } + + CV_TRACE_REGION_NEXT("backendNode"); + + Ptr backendNode; + { + auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape {}); + Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); + backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); + backendNode = backendNodeNGraph; + } + + CV_TRACE_REGION_NEXT("register_outputs"); + + auto ngraphFunction = ieNet.getFunction(); + CV_Assert(ngraphFunction); + std::vector> ngraphOperations = ngraphFunction->get_ops(); + + for (auto& it : ieNet.getOutputsInfo()) + { + CV_TRACE_REGION("output"); + const auto& outputName = it.first; + + LayerParams lp; + int lid = cvNet.addLayer(it.first, "", lp); + + LayerData& ld = cvNet.impl->layers[lid]; + + { + Ptr cvLayer(new NgraphBackendLayer(ieNet)); + cvLayer->name = outputName; + cvLayer->type = "_unknown_"; + + auto process_layer = [&](const std::string& name) -> bool + { + CV_TRACE_REGION("ngraph_function"); + for (const auto& op : ngraphOperations) + { + CV_Assert(op); + if (op->get_friendly_name() == name) + { + const std::string typeName = op->get_type_info().name; + cvLayer->type = typeName; + return true; + } + } + return false; + }; + + bool found = process_layer(outputName); + if (!found) + { + auto pos = outputName.rfind('.'); // cut port number: ".0" + if (pos != std::string::npos) + { + std::string layerName = outputName.substr(0, pos); + found = process_layer(layerName); + } + } + if (!found) + CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'"); + + ld.layerInstance = cvLayer; + ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; + } + + for (int i = 0; i < inputsNames.size(); ++i) + cvNet.connect(0, i, lid, i); + } + + CV_TRACE_REGION_NEXT("finalize"); + + cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + + cvNet.impl->skipInfEngineInit = true; + return cvNet; +} +#endif // HAVE_INF_ENGINE + +Net Net::readFromModelOptimizer(const String& xml, const String& bin) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(xml); CV_UNUSED(bin); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + InferenceEngine::Core& ie = getCore(""); + InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + +Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!bufferModelConfig.empty()); + CV_Assert(!bufferWeights.empty()); + return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), + bufferWeights.data(), bufferWeights.size()); +} + +Net Net::readFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); + CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + InferenceEngine::Core& ie = getCore(""); + + std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::CNNNetwork ieNet; + try + { + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); + + ieNet = ie.ReadNetwork(model, weights_blob); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_quantization.cpp b/modules/dnn/src/net_quantization.cpp new file mode 100644 index 0000000000..b8ee2d3219 --- /dev/null +++ b/modules/dnn/src/net_quantization.cpp @@ -0,0 +1,296 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +// FIXIT drop from inference API +static +void getQuantizationParams(const Mat& src, std::vector& scales, std::vector& zeropoints) +{ + const int qmin = -128; // INT8_MIN + const int qmax = 127; // INT8_MAX + + double rmin, rmax, sc, zp; + cv::minMaxIdx(src, &rmin, &rmax); + + // 0 must be present in the range [rmin, rmax] + rmin = std::min(rmin, 0.0); + rmax = std::max(rmax, 0.0); + + sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); + zp = qmin - (rmin/sc); + + scales.push_back((float)sc); + zeropoints.push_back((int)std::round(zp)); +} + +// FIXIT drop from inference API +Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) +{ + // Net can be quantized only once. + if (netWasQuantized) + CV_Error(Error::StsBadArg, "Cannot quantize a quantized net"); + + CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S"); + CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S"); + + bool originalFusion = fusion; + int prefBackend = preferableBackend; + int prefTarget = preferableTarget; + + // Disable fusions and use CPU backend to quantize net + setPreferableBackend(DNN_BACKEND_OPENCV); + setPreferableTarget(DNN_TARGET_CPU); + enableFusion(false); + + if (calibData.isMat()) + { + setInput(calibData.getMat(), /*name=*/"", /*scalefactor=*/1.0, /*mean=*/Scalar()); + } + else if (calibData.isMatVector()) + { + std::vector calibDataVec; + calibData.getMatVector(calibDataVec); + + std::vector inpNames = netInputLayer->outNames; + CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs"); + for (int i = 0; i < calibDataVec.size(); i++) + setInput(calibDataVec[i], inpNames[i], /*scalefactor=*/1.0, /*mean=*/Scalar()); + } + + std::vector outNames = getUnconnectedOutLayersNames(); + std::vector pins; + for (int i = 0; i < outNames.size(); i++) + pins.push_back(getPinByAlias(outNames[i])); + setUpNet(pins); + + // Compute scales and zeropoints for all the layers + std::vector > scales; + std::vector > zeropoints; + for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData& ld = it->second; + if (!ld.skip) + { + Ptr layer = ld.layerInstance; + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + inps[i] = *ld.inputBlobs[i]; + layer->forward(inps, ld.outputBlobs, ld.internals); + } + + std::vector sc; + std::vector zp; + if (ld.type == "TanH") + { + sc.push_back(1.f/128); + zp.push_back(0); + } + else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax") + { + if (ld.params.get("log_softmax", false)) + { + sc.push_back(16.f/256); + zp.push_back(127); + } + else + { + sc.push_back(1.f/256); + zp.push_back(-128); + } + } + else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop") + { + std::vector inp_sc; std::vector inp_zp; + getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp); + sc.assign(ld.outputBlobs.size(), inp_sc[0]); + zp.assign(ld.outputBlobs.size(), inp_zp[0]); + } + else + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + getQuantizationParams(ld.outputBlobs[i], sc, zp); + } + scales.push_back(sc); + zeropoints.push_back(zp); + } + + // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs + // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints + // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer + for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData& ld = it->second; + // Layers with multiple outputs. Number of outputs is equal to number of inputs + if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || + ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || + ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || + (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][i]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; + } + } + // Layers with multiple inputs and single output. + else if ((ld.type == "Pooling" && toLowerCase(ld.params.get("pool", "max")) == "max") /* Max Pooling */ || + (ld.type == "Eltwise" && toLowerCase(ld.params.get("operation", "sum")) == "max") /* Elementwise max */ || + ld.type == "Concat") + { + for (int i = 0; i < ld.inputBlobsId.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][0]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; + } + } + } + + // Create a new Net and add quantized layers to it. + Net dstNet_; + Net::Impl& dstNet = *(dstNet_.impl); + dstNet.netWasQuantized = true; + dstNet.setInputsNames(netInputLayer->outNames); + dstNet.setPreferableBackend(prefBackend); + dstNet.setPreferableTarget(prefTarget); + dstNet.enableFusion(originalFusion); + + for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData ld = it->second; + if (ld.id == 0) + { + LayerData &quantInpLd = dstNet.layers[0]; + quantInpLd.dtype = inputsDtype; + quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); + continue; + } + + std::vector inpPins = ld.inputBlobsId; + // Fill input and output scales/zeropoints for the layer + std::vector > inp_out_sc(2); + std::vector > inp_out_zp(2); + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + inp_out_sc[0].push_back(scales[pin.lid][pin.oid]); + inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]); + } + inp_out_sc[1] = scales[ld.id]; + inp_out_zp[1] = zeropoints[ld.id]; + + // Quantize layer + Ptr layer = ld.layerInstance; + if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params)) + { + ld.type += "Int8"; + ld.dtype = CV_8S; + } + ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size())); + ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size())); + + // Check and add quantize/dequantize node before layer + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + LayerData &inpLd = dstNet.getLayerData(getLayerName(pin.lid)); + pin.lid = inpLd.id; + if (inpLd.dtype != ld.dtype) + { + String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid) + : cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid); + // Check if quantize/dequantize node for the input layer already exists + if (dstNet.getLayerId(layerName) >= 0) + { + pin.lid = dstNet.getLayerId(layerName); + pin.oid = 0; + } + else + { + LayerParams lp; + lp.set("scales", inp_out_sc[0][i]); + lp.set("zeropoints", inp_out_zp[0][i]); + lp.name = layerName; + lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize"; + int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp); + dstNet.connect(pin.lid, pin.oid, newLid, 0); + pin.lid = newLid; pin.oid = 0; + } + } + } + + // Add quantized layer to Net and connect to its inputs. + int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params); + for( int i = 0; i < inpPins.size(); i++ ) + dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i); + + // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. + if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) + { + LayerParams lp; + lp.set("scales", inp_out_sc[1][0]); + lp.set("zeropoints", inp_out_zp[1][0]); + lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name; + lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize"; + dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp); + } + } + // Restore FP32 Net's backend, target and fusion + setPreferableBackend(prefBackend); + setPreferableTarget(prefTarget); + enableFusion(originalFusion); + return dstNet_; +} + +// FIXIT drop from inference API +void Net::Impl::getInputDetails(std::vector& scales, std::vector& zeropoints) /*const*/ +{ + if (!netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + LayerParams &lp = layers[0].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } +} + +// FIXIT drop from inference API +void Net::Impl::getOutputDetails(std::vector& scales, std::vector& zeropoints) /*const*/ +{ + if (!netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + std::vector outLayerIds = getUnconnectedOutLayers(); + for (auto &lid : outLayerIds) + { + LayerParams &lp = layers[lid].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/op_cuda.cpp b/modules/dnn/src/op_cuda.cpp new file mode 100644 index 0000000000..a1b588ecfb --- /dev/null +++ b/modules/dnn/src/op_cuda.cpp @@ -0,0 +1,106 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#ifdef HAVE_CUDA +#include "op_cuda.hpp" +#include "cuda4dnn/init.hpp" +#include "net_impl.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::initCUDABackend(const std::vector& blobsToKeep_) +{ + CV_Assert(preferableBackend == DNN_BACKEND_CUDA); + + if (!cudaInfo) /* we need to check only once */ + cuda4dnn::checkVersions(); + + if (cuda4dnn::getDeviceCount() <= 0) + CV_Error(Error::StsError, "No CUDA capable device found."); + + if (cuda4dnn::getDevice() < 0) + CV_Error(Error::StsError, "No CUDA capable device selected."); + + if (!cuda4dnn::isDeviceCompatible()) + CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); + + if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) + { + CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); + preferableTarget = DNN_TARGET_CUDA; + } + + if (!cudaInfo) + { + cuda4dnn::csl::CSLContext context; + context.stream = cuda4dnn::csl::Stream(true); + context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream); + context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream); + + auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers + cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); + } + + cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any + + for (auto& layer : layers) + { + auto& ld = layer.second; + if (ld.id == 0) + { + for (auto& wrapper : ld.inputBlobsWrappers) + { + auto cudaWrapper = wrapper.dynamicCast(); + cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); + } + } + + for (auto& wrapper : ld.outputBlobsWrappers) + { + auto cudaWrapper = wrapper.dynamicCast(); + cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); + } + } + + for (auto& layer : layers) + { + auto& ld = layer.second; + auto& layerInstance = ld.layerInstance; + + if (!layerInstance->supportBackend(DNN_BACKEND_CUDA)) + { + std::ostringstream os; + os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name + << "\" of type " << ld.type << '\n'; + CV_LOG_INFO(NULL, os.str().c_str()); + continue; + } + + /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */ + auto context = cudaInfo->context; + auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); + ld.backendNodes[DNN_BACKEND_CUDA] = node; + + auto cudaNode = node.dynamicCast(); + cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); + } + + if (blobsToKeep_.size() > 1) + { + for (const auto& pin : blobsToKeep_) + { + LayerData& ld = layers[pin.lid]; + ld.cudaD2HBackgroundTransfers.push_back(pin.oid); + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // HAVE_CUDA diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp index c96971bc6a..653de36146 100644 --- a/modules/dnn/src/op_halide.cpp +++ b/modules/dnn/src/op_halide.cpp @@ -8,15 +8,135 @@ #include "precomp.hpp" #include #include "op_halide.hpp" +#include "net_impl.hpp" #ifdef HAVE_HALIDE +#include "halide_scheduler.hpp" + #include #endif // HAVE_HALIDE -namespace cv +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::setHalideScheduler(const String& scheduler) { -namespace dnn + halideConfigFile = scheduler; +} + + +#ifdef HAVE_HALIDE + + +void Net::Impl::compileHalide() { + CV_TRACE_FUNCTION(); + + CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); + + HalideScheduler scheduler(halideConfigFile); + std::vector< std::reference_wrapper > compileList; compileList.reserve(64); + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + Ptr layer = ld.layerInstance; + if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) + { + CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); + bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); + if (!scheduled) + { + // Use automatic scheduling provided by layer. + layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], + ld.inputBlobs, ld.outputBlobs, + preferableTarget); + } + compileList.emplace_back(ld); + } + } + std::atomic progress(0); + auto fn = ([&] () -> void + { + for (;;) + { + int id = progress.fetch_add(1); + if ((size_t)id >= compileList.size()) + return; + const LayerData& ld = compileList[id].get(); + Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; + dnn::compileHalide(ld.outputBlobs, node, preferableTarget); + } + }); + size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); + num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); + std::vector threads(num_threads - 1); + for (auto& t: threads) t = std::thread(fn); + fn(); // process own tasks + for (auto& t: threads) t.join(); +} + + +void Net::Impl::initHalideBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); + + // Iterator to current layer. + MapIdToLayerData::iterator it = layers.begin(); + // Iterator to base layer for fusion. In example, in case of conv+bn+relu + // it'll be a conv layer. + MapIdToLayerData::iterator baseIt = layers.begin(); + for (; it != layers.end(); it++) + { + LayerData &ldTop = it->second; + Ptr layerTop = ldTop.layerInstance; + if (!layerTop->supportBackend(preferableBackend)) + { + // Move base iterator to layer that don't support preferable + // backend to prevent fusion over layer of different backend. + baseIt = it; + continue; + } + // Try to do layers fusion. + LayerData &ldBot = baseIt->second; + Ptr layerBot = ldBot.layerInstance; + // 1. Check that bottom and top from the same backends. + if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) + { + // 2. Check that current layer works in-place. + bool inPlace = ldTop.inputBlobs.size() == 1 && + ldBot.outputBlobs.size() == 1 && + ldTop.inputBlobs[0]->data == + ldBot.outputBlobs[0].data; + if (inPlace) + { + // 3. Try to attach node. + CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); + Ptr fusedNode = + layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); + if (!fusedNode.empty()) + { + ldTop.skip = true; + ldBot.backendNodes[preferableBackend] = fusedNode; + ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; + continue; + } + } + } + // No layers fusion. + ldTop.skip = false; + ldTop.backendNodes[DNN_BACKEND_HALIDE] = + layerTop->initHalide(ldTop.inputBlobsWrappers); + baseIt = it; + } +} + + +#endif // HAVE_HALIDE +CV__DNN_INLINE_NS_END + #ifdef HAVE_HALIDE static MatShape getBufferShape(const MatShape& shape) @@ -226,5 +346,83 @@ bool haveHalide() #endif // HAVE_HALIDE } -} // namespace dnn -} // namespace cv + +CV__DNN_INLINE_NS_BEGIN + + +void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, + const std::vector &outputs, int targetId) const +{ +#ifndef HAVE_HALIDE + CV_Error(Error::StsNotImplemented, ""); +#else + CV_TRACE_FUNCTION(); + + Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), + xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); + Halide::Func& top = node.dynamicCast()->funcs.back(); + + int outW, outH, outC, outN; + getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); + + if (targetId == DNN_TARGET_CPU) + { + if (outW == 1 && outH == 1) + { + if (outC + outN == 1) + return; + + if (outC > 8) + top.split(c, co, ci, 8) + .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) + .parallel(tile) + .vectorize(ci, 8); + else + top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) + .parallel(tile); + } + else + { + if (outH > 2) + { + top.reorder(x, c, y) + .split(y, yo, yi, 2) + .fuse(yo, n, tile) + .parallel(tile) + .unroll(yi) + .vectorize(x, outW >= 16 ? 16 : outW); + } + } + } + else if (targetId == DNN_TARGET_OPENCL) + { + if (outW == 1 && outH == 1) + { + int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; + top.split(c, co, ci, c_split) + .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) + .gpu_blocks(tile) + .gpu_threads(ci); + } + else + { + int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; + int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; + // Supported vectorization widths: 2, 3, 4, 8, 16 + int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC); + top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) + .split(c, co, ci, c_split) + .gpu_blocks(xo, yo, co) + .gpu_threads(xi, yi) + .reorder(xi, yi, ci, xo, yo, co) + .vectorize(ci); + } + } + else + CV_Error(Error::StsNotImplemented, "Unknown target identifier"); +#endif // HAVE_HALIDE +} + + +CV__DNN_INLINE_NS_END +}} // namespace diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 2899545c6d..8a27dc2221 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -254,6 +254,31 @@ cv::String getInferenceEngineCPUType() return cpu_type; } + +namespace openvino { + +bool checkTarget(Target target) +{ + // Lightweight detection + const std::vector devices = getCore("").GetAvailableDevices(); + for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) + { + if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD) + return true; + if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL) + return true; + else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA) + return true; + else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU) + return true; + else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + return true; + } + return false; +} + +} // namespace openvino + #else // HAVE_INF_ENGINE cv::String getInferenceEngineBackendType() diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index ed1323d7dd..856441e71d 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -73,6 +73,13 @@ void infEngineBlobsToMats(const std::vector& blobs, CV__DNN_INLINE_NS_BEGIN +namespace openvino { + +// TODO: use std::string as parameter +bool checkTarget(Target target); + +} // namespace openvino + bool isMyriadX(); bool isArmComputePlugin(); diff --git a/modules/dnn/src/op_vkcom.cpp b/modules/dnn/src/op_vkcom.cpp index a252104240..7249c27ce2 100644 --- a/modules/dnn/src/op_vkcom.cpp +++ b/modules/dnn/src/op_vkcom.cpp @@ -8,12 +8,51 @@ #include "precomp.hpp" #include #include "op_vkcom.hpp" +#include "net_impl.hpp" namespace cv { namespace dnn { #ifdef HAVE_VULKAN + +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::initVkComBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert(preferableBackend == DNN_BACKEND_VKCOM); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData &ld = it->second; + Ptr layer = ld.layerInstance; + if (!layer->supportBackend(preferableBackend)) + { + continue; + } + + ld.skip = false; + + try + { + ld.backendNodes[DNN_BACKEND_VKCOM] = + layer->initVkCom(ld.inputBlobsWrappers); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what()); + ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr(); + } + } +} + +CV__DNN_INLINE_NS_END + + +/////////////////////////////////////////////////////////////////////////////// + void copyToTensor(vkcom::Tensor &dst, const Mat &src) { CV_Assert(src.isContinuous() && src.type() == CV_32F); diff --git a/modules/dnn/src/op_webnn.cpp b/modules/dnn/src/op_webnn.cpp index 4dba55bcbe..d5c1740423 100644 --- a/modules/dnn/src/op_webnn.cpp +++ b/modules/dnn/src/op_webnn.cpp @@ -2,6 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. +#include "precomp.hpp" #include #include "op_webnn.hpp" @@ -13,10 +14,281 @@ #include +#include "net_impl.hpp" + namespace cv { namespace dnn { #ifdef HAVE_WEBNN +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::addWebnnOutputs(LayerData &ld) +{ + CV_TRACE_FUNCTION(); + + Ptr layerNet; + auto it = ld.backendNodes.find(preferableBackend); + if (it != ld.backendNodes.end()) + { + Ptr node = it->second; + if (!node.empty()) + { + Ptr webnnNode = node.dynamicCast(); + CV_Assert(!webnnNode.empty()); CV_Assert(!webnnNode->net.empty()); + layerNet = webnnNode->net; + } + } + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr webnnInpNode = inpNode.dynamicCast(); + CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); + if (layerNet != webnnInpNode->net) + { + webnnInpNode->net->addOutput(webnnInpNode->name); + webnnInpNode->net->setUnconnectedNodes(webnnInpNode); + } + } + } +} + + +void Net::Impl::initWebnnBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn()); + + Ptr net; + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData &ld = it->second; + if (ld.id == 0) + { + CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || + (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; + outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; + wrapper->name = outputName; + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; + wrapper->name = outputName; + } + } + } + + // Build WebNN networks from sets of layers that support this + // backend. Split a whole model on several WebNN networks if + // some of layers are not implemented. + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData &ld = it->second; + + if (ld.id == 0 && ld.skip) + continue; + + bool fused = ld.skip; + Ptr layer = ld.layerInstance; + if (!fused && !layer->supportBackend(preferableBackend)) + { + // For test use. when not using WebNN, the test case will fail + // with the following code. + CV_LOG_WARNING(NULL, "Layer " + ld.type + " name " + ld.name + " is unsupported by WebNN backend."); + + addWebnnOutputs(ld); + net = Ptr(); + layer->preferableTarget = DNN_TARGET_CPU; + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) { + Ptr webnnNode = inpNode.dynamicCast(); + CV_Assert(!webnnNode.empty()); + webnnNode->net->setUnconnectedNodes(webnnNode); + } + } + continue; + } + ld.skip = true; // Initially skip all WebNN supported layers. + + // Create a new network if one of inputs from different WebNN graph. + std::vector> inputNodes; + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) + if (inputNodes.size() == ld.inputBlobsId.size()) { + break; + } + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr webnnInpNode = inpNode.dynamicCast(); + CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); + if (webnnInpNode->net == net && !fused) { + inputNodes.push_back(inpNode); + continue; + } + } + + if (net.empty()) { + net = Ptr(new WebnnNet()); + } + + if (!fused) { + std::vector inputNames; + std::vector inputs; + + auto curr_pos = inpLd.consumers.begin(); + auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; + auto cons = curr_pos; + while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != + inpLd.consumers.end()) { + int cons_inp = cons->oid; + Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. + dynamicCast(); + CV_Assert(!inpWrapper.empty()); + auto iter = std::find(inputNames.begin(), inputNames.end(), + inpWrapper->name); + if (iter == inputNames.end()) { + inputNames.push_back(inpWrapper->name); + inputs.push_back(inpLd.outputBlobs[cons_inp]); + } + curr_pos = cons + 1; + } + + auto inps = net->setInputs(inputs, inputNames); + for (auto& inp : inps) { + WebnnBackendNode* node = new WebnnBackendNode(inp); + node->net = net; + inputNodes.emplace_back(Ptr(node)); + } + } + } + + Ptr node; + if (!net.empty()) + { + if (fused) + { + bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && + ld.inputBlobs[0]->data == ld.outputBlobs[0].data; + CV_Assert(inPlace); + node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; + ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; + } + } + else { + net = Ptr(new WebnnNet()); + } + + if (!fused) + { + CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + int lid = ld.inputBlobsId[i].lid; + int oid = ld.inputBlobsId[i].oid; + if (oid == 0 || lid == 0) + continue; + + auto webnnInpNode = inputNodes[i].dynamicCast(); + inputNodes[i] = Ptr(new WebnnBackendNode(webnnInpNode->operand)); + } + + if (layer->supportBackend(preferableBackend)) + { + if (ld.type == "Const") { + ml::Operand fake_operand; + Ptr fake_input_node = Ptr(new WebnnBackendNode(fake_operand)); + fake_input_node->net = net; + inputNodes.push_back(fake_input_node); + } + node = layer->initWebnn(ld.inputBlobsWrappers, inputNodes); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + node.dynamicCast()->name = wrapper->name; + } + } + else + { + continue; + } + } + else if (node.empty()) + continue; + + ld.backendNodes[preferableBackend] = node; + + Ptr webnnNode = node.dynamicCast(); + CV_Assert(!webnnNode.empty()); + webnnNode->net = net; + + if (ld.consumers.empty()) { + // TF EAST_text_detection + webnnNode->net->setUnconnectedNodes(webnnNode); + } + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + webnnNode->net->addOutput(webnnNode->name); + break; + } + } + net->addBlobs(ld.inputBlobsWrappers); + net->addBlobs(ld.outputBlobsWrappers); + addWebnnOutputs(ld); + } + + // Initialize all networks. + for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData &ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr webnnNode = node.dynamicCast(); + if (webnnNode.empty()) + continue; + + CV_Assert(!webnnNode->net.empty()); + + if (!webnnNode->net->isInitialized()) + { + webnnNode->net->setUnconnectedNodes(webnnNode); + webnnNode->net->createNet((Target)preferableTarget); + ld.skip = false; + } + } +} + + +CV__DNN_INLINE_NS_END + + namespace webnn { ml::Operand BuildConstant(const ml::GraphBuilder& builder, const std::vector& dimensions, diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index 6ee693dd6b..abcd3745f9 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -66,6 +66,15 @@ #undef HAVE_CUDA #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include #include diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp new file mode 100644 index 0000000000..a802e1602b --- /dev/null +++ b/modules/dnn/src/registry.cpp @@ -0,0 +1,144 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" + +#include "halide_scheduler.hpp" + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +class BackendRegistry +{ +public: + typedef std::vector< std::pair > BackendsList; + const BackendsList & getBackends() const { return backends; } + static BackendRegistry & getRegistry() + { + static BackendRegistry impl; + return impl; + } + + +private: + BackendRegistry() + { +#ifdef HAVE_HALIDE + backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU)); +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL()) + backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL)); +#endif +#endif // HAVE_HALIDE + +#ifdef HAVE_INF_ENGINE + if (openvino::checkTarget(DNN_TARGET_CPU)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_MYRIAD)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_HDDL)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL)); +#endif + } +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) + { + if (openvino::checkTarget(DNN_TARGET_OPENCL)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_OPENCL_FP16)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16)); +#endif + } + } +#endif +#endif // HAVE_INF_ENGINE + +#ifdef HAVE_WEBNN + if (haveWebnn()) + { + backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU)); + } +#endif // HAVE_WEBNN + +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL()) + { + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)); + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)); + } +#endif + + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); + +#ifdef HAVE_VULKAN + if (haveVulkan()) + backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN)); +#endif + +#ifdef HAVE_CUDA + if (haveCUDA()) + { + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); + } +#endif + } + + BackendsList backends; +}; + + +std::vector> getAvailableBackends() +{ + return BackendRegistry::getRegistry().getBackends(); +} + +std::vector getAvailableTargets(Backend be) +{ + if (be == DNN_BACKEND_DEFAULT) + be = (Backend)getParam_DNN_BACKEND_DEFAULT(); +#ifdef HAVE_INF_ENGINE + if (be == DNN_BACKEND_INFERENCE_ENGINE) + be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; +#endif + + std::vector result; + const BackendRegistry::BackendsList all_backends = getAvailableBackends(); + for (BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i) + { + if (i->first == be) + result.push_back(i->second); + } + return result; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn From a120adde63abdf6bc877e1d047302302f82542ba Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 28 Feb 2022 09:42:38 +0000 Subject: [PATCH 19/84] dnn: add dnn.cpp file with information about git commits history --- modules/dnn/src/dnn.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 modules/dnn/src/dnn.cpp diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp new file mode 100644 index 0000000000..8c397b14f2 --- /dev/null +++ b/modules/dnn/src/dnn.cpp @@ -0,0 +1,10 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +// This is a stub file to provide git history information. +// Content has been moved, see PR: https://github.com/opencv/opencv/pull/21662 +// Base commit: 19926e2979ef049a89dd029e2231555db40c2776 +// Original dnn.cpp content: https://github.com/opencv/opencv/blame/19926e2979ef049a89dd029e2231555db40c2776/modules/dnn/src/dnn.cpp From 6390b50d6e2cb8463ea747a7976317ff6abc5572 Mon Sep 17 00:00:00 2001 From: Pierre Chatelier Date: Fri, 11 Mar 2022 19:07:12 +0100 Subject: [PATCH 20/84] Merge pull request #21701 from chacha21:tiff_10_12_14 Add 10-12-14bit (integer) TIFF decoding support * Add 12bit (integer) TIFF decoding support An (slow) unpacking step is inserted when the native bpp is not equal to the dst_bpp Currently, I do not know if there can be several packing flavours in TIFF data. * added tests * move sample files to opencv_extra * added 10b and 14b unpacking * fix compilation for non MSVC compilers by using more standard typedefs * yet another typdef usage change to fix buildbot Mac compilation * fixed unpacking of partial packets * fixed warnings returned by buildbot * modifications as suggested by reviewer --- modules/imgcodecs/src/grfmt_tiff.cpp | 211 ++++++++++++++++++++++++--- modules/imgcodecs/test/test_tiff.cpp | 110 ++++++++++++++ 2 files changed, 301 insertions(+), 20 deletions(-) diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index 36cf17e1e3..2941284e84 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -325,6 +325,9 @@ bool TiffDecoder::readHeader() result = true; break; } + case 10: + case 12: + case 14: case 16: { CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, ""); @@ -347,7 +350,7 @@ bool TiffDecoder::readHeader() result = true; break; default: - CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 16, 32 or 64."); + CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 10, 12, 14, 16, 32 or 64."); } } } @@ -437,6 +440,147 @@ static void fixOrientation(Mat &img, uint16 orientation, int dst_bpp) } } +static void _unpack10To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //5*8b=4*10b : 5 src for 4 dst + constexpr const size_t packedBitsCount = 10; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint64_t u64; + uint8_t u8[8]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u64 & packedBitsMask); + buf.u64 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u64 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack10To16() + +static void _unpack12To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //3*8b=2*12b : 3 src for 2 dst + constexpr const size_t packedBitsCount = 12; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint32_t u32; + uint8_t u8[4]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u32 & packedBitsMask); + buf.u32 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u32 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack12To16() + +static void _unpack14To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //7*8b=4*14b : 7 src for 4 dst + constexpr const size_t packedBitsCount = 14; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint64_t u64; + uint8_t u8[8]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u64 & packedBitsMask); + buf.u64 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u64 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack14To16() + bool TiffDecoder::readData( Mat& img ) { int type = img.type(); @@ -470,7 +614,7 @@ bool TiffDecoder::readData( Mat& img ) CV_TIFF_CHECK_CALL_DEBUG(TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &ncn)); uint16 img_orientation = ORIENTATION_TOPLEFT; CV_TIFF_CHECK_CALL_DEBUG(TIFFGetField(tif, TIFFTAG_ORIENTATION, &img_orientation)); - const int bitsPerByte = 8; + constexpr const int bitsPerByte = 8; int dst_bpp = (int)(img.elemSize1() * bitsPerByte); bool vert_flip = dst_bpp == 8 && (img_orientation == ORIENTATION_BOTRIGHT || img_orientation == ORIENTATION_RIGHTBOT || @@ -529,10 +673,15 @@ bool TiffDecoder::readData( Mat& img ) CV_Assert(ncn == img.channels()); CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP)); } - const size_t buffer_size = (bpp / bitsPerByte) * ncn * tile_height0 * tile_width0; - AutoBuffer _buffer(buffer_size); - uchar* buffer = _buffer.data(); - ushort* buffer16 = (ushort*)buffer; + const size_t src_buffer_bytes_per_row = divUp(static_cast(ncn * tile_width0 * bpp), static_cast(bitsPerByte)); + const size_t src_buffer_size = tile_height0 * src_buffer_bytes_per_row; + const size_t src_buffer_unpacked_bytes_per_row = divUp(static_cast(ncn * tile_width0 * dst_bpp), static_cast(bitsPerByte)); + const size_t src_buffer_unpacked_size = tile_height0 * src_buffer_unpacked_bytes_per_row; + const bool needsUnpacking = (bpp < dst_bpp); + AutoBuffer _src_buffer(src_buffer_size); + uchar* src_buffer = _src_buffer.data(); + AutoBuffer _src_buffer_unpacked(needsUnpacking ? src_buffer_unpacked_size : 0); + uchar* src_buffer_unpacked = needsUnpacking ? _src_buffer_unpacked.data() : nullptr; int tileidx = 0; for (int y = 0; y < m_height; y += (int)tile_height0) @@ -549,14 +698,14 @@ bool TiffDecoder::readData( Mat& img ) { case 8: { - uchar* bstart = buffer; + uchar* bstart = src_buffer; if (!is_tiled) { - CV_TIFF_CHECK_CALL(TIFFReadRGBAStrip(tif, y, (uint32*)buffer)); + CV_TIFF_CHECK_CALL(TIFFReadRGBAStrip(tif, y, (uint32*)src_buffer)); } else { - CV_TIFF_CHECK_CALL(TIFFReadRGBATile(tif, x, y, (uint32*)buffer)); + CV_TIFF_CHECK_CALL(TIFFReadRGBATile(tif, x, y, (uint32*)src_buffer)); // Tiles fill the buffer from the bottom up bstart += (tile_height0 - tile_height) * tile_width0 * 4; } @@ -594,28 +743,48 @@ bool TiffDecoder::readData( Mat& img ) { if (!is_tiled) { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, (uint32*)buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, (uint32*)src_buffer, src_buffer_size) >= 0); } else { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, (uint32*)buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, (uint32*)src_buffer, src_buffer_size) >= 0); } for (int i = 0; i < tile_height; i++) { + ushort* buffer16 = (ushort*)(src_buffer+i*src_buffer_bytes_per_row); + if (needsUnpacking) + { + const uchar* src_packed = src_buffer+i*src_buffer_bytes_per_row; + uchar* dst_unpacked = src_buffer_unpacked+i*src_buffer_unpacked_bytes_per_row; + if (bpp == 10) + _unpack10To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + else if (bpp == 12) + _unpack12To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + else if (bpp == 14) + _unpack14To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + buffer16 = (ushort*)dst_unpacked; + } + if (color) { if (ncn == 1) { CV_CheckEQ(wanted_channels, 3, ""); - icvCvt_Gray2BGR_16u_C1C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_Gray2BGR_16u_C1C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } else if (ncn == 3) { CV_CheckEQ(wanted_channels, 3, ""); - icvCvt_RGB2BGR_16u_C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_RGB2BGR_16u_C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } @@ -623,14 +792,14 @@ bool TiffDecoder::readData( Mat& img ) { if (wanted_channels == 4) { - icvCvt_BGRA2RGBA_16u_C4R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2RGBA_16u_C4R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } else { CV_CheckEQ(wanted_channels, 3, "TIFF-16bpp: BGR/BGRA images are supported only"); - icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2BGR_16u_C4C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1), 2); } @@ -646,12 +815,12 @@ bool TiffDecoder::readData( Mat& img ) if( ncn == 1 ) { memcpy(img.ptr(img_y + i, x), - buffer16 + i*tile_width0*ncn, + buffer16, tile_width*sizeof(ushort)); } else { - icvCvt_BGRA2Gray_16u_CnC1R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2Gray_16u_CnC1R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1), ncn, 2); } @@ -665,14 +834,14 @@ bool TiffDecoder::readData( Mat& img ) { if( !is_tiled ) { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, src_buffer, src_buffer_size) >= 0); } else { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, src_buffer, src_buffer_size) >= 0); } - Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), buffer); + Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), src_buffer); Rect roi_tile(0, 0, tile_width, tile_height); Rect roi_img(x, img_y, tile_width, tile_height); if (!m_hdr && ncn == 3) @@ -691,6 +860,8 @@ bool TiffDecoder::readData( Mat& img ) } // for x } // for y } + if (bpp < dst_bpp) + img *= (1<<(dst_bpp-bpp)); fixOrientation(img, img_orientation, dst_bpp); } diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp index 1c6e4a6b29..28e084d5b0 100644 --- a/modules/imgcodecs/test/test_tiff.cpp +++ b/modules/imgcodecs/test/test_tiff.cpp @@ -117,6 +117,116 @@ TEST(Imgcodecs_Tiff, decode_tile_remainder) // What about 32, 64 bit? } +TEST(Imgcodecs_Tiff, decode_10_12_14) +{ + /* see issue #21700 + */ + const string root = cvtest::TS::ptr()->get_data_path(); + + const double maxDiff = 256;//samples do not have the exact same values because of the tool that created them + cv::Mat tmp; + double diff = 0; + + cv::Mat img8UC1 = imread(root + "readwrite/pattern_8uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC1.empty()); + ASSERT_EQ(img8UC1.type(), CV_8UC1); + + cv::Mat img8UC3 = imread(root + "readwrite/pattern_8uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC3.empty()); + ASSERT_EQ(img8UC3.type(), CV_8UC3); + + cv::Mat img8UC4 = imread(root + "readwrite/pattern_8uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC4.empty()); + ASSERT_EQ(img8UC4.type(), CV_8UC4); + + cv::Mat img16UC1 = imread(root + "readwrite/pattern_16uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC1.empty()); + ASSERT_EQ(img16UC1.type(), CV_16UC1); + ASSERT_EQ(img8UC1.size(), img16UC1.size()); + img8UC1.convertTo(tmp, img16UC1.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img16UC3 = imread(root + "readwrite/pattern_16uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC3.empty()); + ASSERT_EQ(img16UC3.type(), CV_16UC3); + ASSERT_EQ(img8UC3.size(), img16UC3.size()); + img8UC3.convertTo(tmp, img16UC3.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img16UC4 = imread(root + "readwrite/pattern_16uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC4.empty()); + ASSERT_EQ(img16UC4.type(), CV_16UC4); + ASSERT_EQ(img8UC4.size(), img16UC4.size()); + img8UC4.convertTo(tmp, img16UC4.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC1 = imread(root + "readwrite/pattern_10uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC1.empty()); + ASSERT_EQ(img10UC1.type(), CV_16UC1); + ASSERT_EQ(img10UC1.size(), img16UC1.size()); + diff = cv::norm(img10UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC3 = imread(root + "readwrite/pattern_10uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC3.empty()); + ASSERT_EQ(img10UC3.type(), CV_16UC3); + ASSERT_EQ(img10UC3.size(), img16UC3.size()); + diff = cv::norm(img10UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC4 = imread(root + "readwrite/pattern_10uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC4.empty()); + ASSERT_EQ(img10UC4.type(), CV_16UC4); + ASSERT_EQ(img10UC4.size(), img16UC4.size()); + diff = cv::norm(img10UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC1 = imread(root + "readwrite/pattern_12uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC1.empty()); + ASSERT_EQ(img12UC1.type(), CV_16UC1); + ASSERT_EQ(img12UC1.size(), img16UC1.size()); + diff = cv::norm(img12UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC3 = imread(root + "readwrite/pattern_12uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC3.empty()); + ASSERT_EQ(img12UC3.type(), CV_16UC3); + ASSERT_EQ(img12UC3.size(), img16UC3.size()); + diff = cv::norm(img12UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC4 = imread(root + "readwrite/pattern_12uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC4.empty()); + ASSERT_EQ(img12UC4.type(), CV_16UC4); + ASSERT_EQ(img12UC4.size(), img16UC4.size()); + diff = cv::norm(img12UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC1 = imread(root + "readwrite/pattern_14uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC1.empty()); + ASSERT_EQ(img14UC1.type(), CV_16UC1); + ASSERT_EQ(img14UC1.size(), img16UC1.size()); + diff = cv::norm(img14UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC3 = imread(root + "readwrite/pattern_14uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC3.empty()); + ASSERT_EQ(img14UC3.type(), CV_16UC3); + ASSERT_EQ(img14UC3.size(), img16UC3.size()); + diff = cv::norm(img14UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC4 = imread(root + "readwrite/pattern_14uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC4.empty()); + ASSERT_EQ(img14UC4.type(), CV_16UC4); + ASSERT_EQ(img14UC4.size(), img16UC4.size()); + diff = cv::norm(img14UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); +} + TEST(Imgcodecs_Tiff, decode_infinite_rowsperstrip) { const uchar sample_data[142] = { From eb067fee5532cd3271f9299c1e69054ded7df71b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 13 Mar 2022 04:08:51 +0000 Subject: [PATCH 21/84] videoio(ffmpeg): fix memory leak --- modules/videoio/src/cap_ffmpeg_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 47dc00ab04..43c555309b 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -1519,7 +1519,7 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, #if USE_AV_HW_CODECS if (sw_picture != picture) { - av_frame_unref(sw_picture); + av_frame_free(&sw_picture); } #endif return true; From 419918076e95b3669e6879abadbabe3145f6053c Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Mon, 14 Mar 2022 08:28:43 +0300 Subject: [PATCH 22/84] Changed call of NodeTypeInfo constructor --- modules/dnn/src/ie_ngraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index c646c1fe3a..ba881a8023 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -77,7 +77,7 @@ class NgraphCustomOp: public ngraph::op::Op { public: const ngraph::NodeTypeInfo& get_type_info() const override { - static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, 0}; + static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, static_cast(0)}; return type_info; } From e16cb8b4a23fc1b6b7b99155a0d33f523262db76 Mon Sep 17 00:00:00 2001 From: rogday Date: Mon, 14 Mar 2022 16:10:04 +0300 Subject: [PATCH 23/84] Merge pull request #21703 from rogday:transpose Add n-dimensional transpose to core * add n-dimensional transpose to core * add performance test, write sequentially and address review comments --- modules/core/include/opencv2/core.hpp | 10 ++++ modules/core/perf/perf_arithm.cpp | 24 +++++++++ modules/core/src/matrix_transform.cpp | 67 ++++++++++++++++++++++++ modules/core/test/test_arithm.cpp | 74 +++++++++++++++++++++++++++ 4 files changed, 175 insertions(+) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 70ea4f8c1f..f7807e37ec 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1739,6 +1739,16 @@ should be done separately if needed. */ CV_EXPORTS_W void transpose(InputArray src, OutputArray dst); +/** @brief Transpose for n-dimensional matrices. + * + * @note Input should be continuous single-channel matrix. + * @param src input array. + * @param order a permutation of [0,1,..,N-1] where N is the number of axes of src. + * The i’th axis of dst will correspond to the axis numbered order[i] of the input. + * @param dst output array of the same type as src. + */ +CV_EXPORTS_W void transposeND(InputArray src, const std::vector& order, OutputArray dst); + /** @brief Performs the matrix transformation of every array element. The function cv::transform performs the matrix transformation of every diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp index 70e2f49210..3ac9a24639 100644 --- a/modules/core/perf/perf_arithm.cpp +++ b/modules/core/perf/perf_arithm.cpp @@ -1,4 +1,5 @@ #include "perf_precomp.hpp" +#include namespace opencv_test { @@ -393,6 +394,29 @@ PERF_TEST_P_(BinaryOpTest, reciprocal) SANITY_CHECK_NOTHING(); } + +PERF_TEST_P_(BinaryOpTest, transposeND) +{ + Size sz = get<0>(GetParam()); + int type = get<1>(GetParam()); + cv::Mat a = Mat(sz, type).reshape(1); + + std::vector order(a.dims); + std::iota(order.begin(), order.end(), 0); + std::reverse(order.begin(), order.end()); + + std::vector new_sz(a.dims); + std::copy(a.size.p, a.size.p + a.dims, new_sz.begin()); + std::reverse(new_sz.begin(), new_sz.end()); + cv::Mat b = Mat(new_sz, type); + + declare.in(a,WARMUP_RNG).out(b); + + TEST_CYCLE() cv::transposeND(a, order, b); + + SANITY_CHECK_NOTHING(); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest, testing::Combine( testing::Values(szVGA, sz720p, sz1080p), diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 727eaf7fee..05ecf450e1 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -4,6 +4,7 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" +#include "opencv2/core/detail/dispatch_helper.impl.hpp" namespace cv { @@ -282,6 +283,72 @@ void transpose( InputArray _src, OutputArray _dst ) } +void transposeND(InputArray src_, const std::vector& order, OutputArray dst_) +{ + Mat inp = src_.getMat(); + CV_Assert(inp.isContinuous()); + CV_CheckEQ(inp.channels(), 1, "Input array should be single-channel"); + CV_CheckEQ(order.size(), static_cast(inp.dims), "Number of dimensions shouldn't change"); + + auto order_ = order; + std::sort(order_.begin(), order_.end()); + for (size_t i = 0; i < order_.size(); ++i) + { + CV_CheckEQ(static_cast(order_[i]), i, "New order should be a valid permutation of the old one"); + } + + std::vector newShape(order.size()); + for (size_t i = 0; i < order.size(); ++i) + { + newShape[i] = inp.size[order[i]]; + } + + dst_.create(static_cast(newShape.size()), newShape.data(), inp.type()); + Mat out = dst_.getMat(); + CV_Assert(out.isContinuous()); + CV_Assert(inp.data != out.data); + + int continuous_idx = 0; + for (int i = static_cast(order.size()) - 1; i >= 0; --i) + { + if (order[i] != i) + { + continuous_idx = i + 1; + break; + } + } + + size_t continuous_size = continuous_idx == 0 ? out.total() : out.step1(continuous_idx - 1); + size_t outer_size = out.total() / continuous_size; + + std::vector steps(order.size()); + for (int i = 0; i < static_cast(steps.size()); ++i) + { + steps[i] = inp.step1(order[i]); + } + + auto* src = inp.ptr(); + auto* dst = out.ptr(); + + size_t src_offset = 0; + size_t es = out.elemSize(); + for (size_t i = 0; i < outer_size; ++i) + { + std::memcpy(dst, src + es * src_offset, es * continuous_size); + dst += es * continuous_size; + for (int j = continuous_idx - 1; j >= 0; --j) + { + src_offset += steps[j]; + if ((src_offset / steps[j]) % out.size[j] != 0) + { + break; + } + src_offset -= steps[j] * out.size[j]; + } + } +} + + #if CV_SIMD128 template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 014a0cff0a..06d295f694 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" #include "ref_reduce_arg.impl.hpp" +#include namespace opencv_test { namespace { @@ -2128,6 +2129,79 @@ TEST(Core_minMaxIdx, regression_9207_1) } +class TransposeND : public testing::TestWithParam< tuple, perf::MatType> > +{ +public: + std::vector m_shape; + int m_type; + + void SetUp() + { + std::tie(m_shape, m_type) = GetParam(); + } +}; + + +TEST_P(TransposeND, basic) +{ + Mat inp(m_shape, m_type); + randu(inp, 0, 255); + + std::vector order(m_shape.size()); + std::iota(order.begin(), order.end(), 0); + auto transposer = [&order] (const std::vector& id) + { + std::vector ret(id.size()); + for (size_t i = 0; i < id.size(); ++i) + { + ret[i] = id[order[i]]; + } + return ret; + }; + auto advancer = [&inp] (std::vector& id) + { + for (int j = static_cast(id.size() - 1); j >= 0; --j) + { + ++id[j]; + if (id[j] != inp.size[j]) + { + break; + } + id[j] = 0; + } + }; + + do + { + Mat out; + cv::transposeND(inp, order, out); + std::vector id(order.size()); + for (size_t i = 0; i < inp.total(); ++i) + { + auto new_id = transposer(id); + switch (inp.type()) + { + case CV_8UC1: + ASSERT_EQ(inp.at(id.data()), out.at(new_id.data())); + break; + case CV_32FC1: + ASSERT_EQ(inp.at(id.data()), out.at(new_id.data())); + break; + default: + FAIL() << "Unsupported type: " << inp.type(); + } + advancer(id); + } + } while (std::next_permutation(order.begin(), order.end())); +} + + +INSTANTIATE_TEST_CASE_P(Arithm, TransposeND, testing::Combine( + testing::Values(std::vector{2, 3, 4}, std::vector{5, 10}), + testing::Values(perf::MatType(CV_8UC1), CV_32FC1) +)); + + TEST(Core_minMaxIdx, regression_9207_2) { const int rows = 13; From 93353aea7050146a88032f8ed406b84307d2f3ba Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 15 Mar 2022 09:14:05 +0300 Subject: [PATCH 24/84] Merge pull request #21522 from rogday:lstm Fix LSTM support in ONNX * fix LSTM and add peephole support * disable old tests * turn lambdas into functions * more hacks for c++98 * add assertions * slice fixes * backport of cuda-related fixes * address review comments --- modules/dnn/src/layers/recurrent_layers.cpp | 151 +++++++++- modules/dnn/src/onnx/onnx_importer.cpp | 300 +++++++++++++++++--- modules/dnn/test/test_onnx_importer.cpp | 13 +- 3 files changed, 404 insertions(+), 60 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index f37498ed09..8c420f9f0e 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -103,7 +103,7 @@ static ActivationFunction get_activation_function(const String& activation) { class LSTMLayerImpl CV_FINAL : public LSTMLayer { - int numTimeStamps, numSamples; + int numTimeStamps, numSamples, numHidden; bool allocated; MatShape outTailShape; //shape of single output sample @@ -127,6 +127,10 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer bool useAVX2; #endif + // CUDA needs input blobs to be rearranged in a specific way, but some transformations + // in ONNXImporter are destructive, so we keep a copy. + std::vector originalBlobs; + public: LSTMLayerImpl(const LayerParams& params) @@ -140,6 +144,13 @@ public: { setParamsFrom(params); + if (params.get("is_onnx", false)) + { + // collect copies of onnx blobs + originalBlobs.insert(originalBlobs.begin(), blobs.begin(), blobs.begin() + 3); + blobs.erase(blobs.begin(), blobs.begin() + 3); + } + bidirectional = params.get("bidirectional", false); if (!blobs.empty()) { @@ -181,6 +192,7 @@ public: useCellClip = params.get("use_cell_clip", false); usePeephole = params.get("use_peephole", false); reverse = params.get("reverse", false); + numHidden = params.get("hidden_size", 1); CV_Assert(!reverse || !bidirectional); // read activations @@ -269,8 +281,21 @@ public: outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end()); outResShape.back() *= (1 + static_cast(bidirectional)); - size_t noutputs = produceCellOutput ? 2 : 1; - outputs.assign(noutputs, outResShape); + outputs.assign(1, outResShape); + if (produceCellOutput) + { + // the producer is ONNX, so CellState is different + if (!originalBlobs.empty()) + { + int shp[] = {(1 + static_cast(bidirectional)), _numSamples, numHidden}; + MatShape newShape(shp, shp + sizeof(shp)/sizeof(shp[0])); + outputs.push_back(newShape); + } + else + { + outputs.push_back(outResShape); + } + } internals.assign(1, shape(_numSamples, _numOut)); // hInternal internals.push_back(shape(_numSamples, _numOut)); // cInternal @@ -335,14 +360,39 @@ public: outputs_arr.getMatVector(output); internals_arr.getMatVector(internals); + Mat cOut = produceCellOutput ? output[0].clone() : Mat(); + const bool needYcTransform = !originalBlobs.empty(); // if the producer is onnx const int numDirs = 1 + static_cast(bidirectional); for (int i = 0; i < numDirs; ++i) { - const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); - const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); - const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); - const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs); - const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs); + Mat Wh = blobs[0]; + Mat Wx = blobs[1]; + Mat bias = blobs[2]; + Mat h_0 = blobs[3]; + Mat c_0 = blobs[4]; + Mat pI, pF, pO; + + Wh = Wh.rowRange(i * Wh.rows / numDirs, (i + 1) * Wh.rows / numDirs); + Wx = Wx.rowRange(i * Wx.rows / numDirs, (i + 1) * Wx.rows / numDirs); + bias = bias.colRange(i * bias.cols / numDirs, (i + 1) * bias.cols / numDirs); + h_0 = h_0.rowRange(i * h_0.rows / numDirs, (i + 1) * h_0.rows / numDirs); + c_0 = c_0.rowRange(i * c_0.rows / numDirs, (i + 1) * c_0.rows / numDirs); + + if (usePeephole) + { + pI = blobs[5]; + pF = blobs[6]; + pO = blobs[7]; + + pI = pI.rowRange(i * pI.rows / numDirs, (i + 1) * pI.rows / numDirs); + pI = pI.colRange(i * pI.cols / numDirs, (i + 1) * pI.cols / numDirs); + + pF = pF.rowRange(i * pF.rows / numDirs, (i + 1) * pF.rows / numDirs); + pF = pF.colRange(i * pF.cols / numDirs, (i + 1) * pF.cols / numDirs); + + pO = pO.rowRange(i * pO.rows / numDirs, (i + 1) * pO.rows / numDirs); + pO = pO.colRange(i * pO.cols / numDirs, (i + 1) * pO.cols / numDirs); + } int numOut = Wh.size[1]; Mat hInternal = internals[0], cInternal = internals[1], @@ -356,7 +406,12 @@ public: Mat hOutTs = output[0].reshape(1, numSamplesTotal); hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs); - Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); + Mat cOutTs; + if (produceCellOutput) + { + cOutTs = cOut.reshape(1, numSamplesTotal); + cOutTs = cOutTs.colRange(i * cOutTs.cols / numDirs, (i + 1) * cOutTs.cols / numDirs); + } #if CV_TRY_AVX2 || CV_TRY_AVX bool canUseAvx = gates.isContinuous() && bias.isContinuous() @@ -471,8 +526,8 @@ public: if (usePeephole) { Mat gatesIF = gates.colRange(0, 2*numOut); - gemm(cInternal, blobs[5], 1, gateI, 1, gateI); - gemm(cInternal, blobs[6], 1, gateF, 1, gateF); + gemm(cInternal, pI, 1, gateI, 1, gateI); + gemm(cInternal, pF, 1, gateF, 1, gateF); f_activation(gatesIF, gatesIF); } else @@ -495,7 +550,7 @@ public: } if (usePeephole) { - gemm(cInternal, blobs[7], 1, gateO, 1, gateO); + gemm(cInternal, pO, 1, gateO, 1, gateO); f_activation(gateO, gateO); } @@ -509,6 +564,78 @@ public: cInternal.copyTo(cOutTs.rowRange(curRowRange)); } } + + if (needYcTransform && produceCellOutput) + { + fixCellState(cOut, numDirs); + } + if (produceCellOutput) + { + cOut.copyTo(output[1]); + } + } + + void fixCellState(Mat& cOut, int numDirs) + { + // seq, batch, dirs, hidden + int shp[] = {0, numSamples, numDirs, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + // permute to {0, 2, 1, 3}; + std::vector newShape = shape(cOut); + std::swap(newShape[1], newShape[2]); + cv::Mat newCellState(newShape, CV_32FC1); + const float* src = cOut.ptr(); + float* dst = newCellState.ptr(); + size_t sj = newCellState.size[3]; + size_t sk = newCellState.size[2] * sj; + size_t si = newCellState.size[1] * sk; + for (size_t i = 0; i < newCellState.size[0]; i++) + { + for (size_t j = 0; j < newCellState.size[2]; j++) + { + for (size_t k = 0; k < newCellState.size[1]; k++) + { + std::memcpy(dst, src, sizeof(float) * newCellState.size[3]); + src += cOut.size[3]; + dst += sk; + } + dst = dst + sj - si; + } + dst = dst + si - sk; + } + + cOut = newCellState; + + if (numDirs == 1) + { + // Slice: Yh = Y[-1, :, :, :] + Range ranges[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range::all(), cv::Range::all(), cv::Range::all()}; + cOut = cOut(ranges); + // Reshape: 1x1xBxH -> 1xBxH + int shp[] = {1, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + Range ranges1[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range(0, 1), cv::Range::all(), cv::Range::all()}; + Mat part1 = cOut(ranges1); + + // Slice: SxDxBxH -> first sequence, last direction + Range ranges2[] = {cv::Range(0, 1), cv::Range(cOut.size[1] - 1, cOut.size[1]), cv::Range::all(), cv::Range::all()}; + Mat part2 = cOut(ranges2); + + int shp[] = {1, part1.size[2] * part1.size[3]}; + part1 = part1.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + part2 = part2.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + vconcat(part1, part2, cOut); + + // Reshape: 1x2xBxH -> 2xBxH + int finalShape[] = {2, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(finalShape)/sizeof(finalShape[0]), finalShape); + } } }; diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index be0800ca14..e755226535 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -65,6 +65,14 @@ class ONNXImporter void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n); void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id); + void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size); + void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n); + std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n); + std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index); + void lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name); public: ONNXImporter(Net& net, const char *onnxFile) @@ -1298,38 +1306,24 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No addConstant(node_proto.output(0), layerParams.blobs[0]); } -void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +void transformBlobs(std::vector& blobs) { - opencv_onnx::NodeProto node_proto = node_proto_; - const std::string output_name = node_proto.output(0); - LayerParams lstmParams = layerParams; - lstmParams.name += "/lstm"; + Mat Wx = blobs[0]; + Mat Wh = blobs[1]; + Mat b = blobs[2]; + std::vector cudaWorkaround; + cudaWorkaround.push_back(Wx.clone()); + cudaWorkaround.push_back(Wh.clone()); + cudaWorkaround.push_back(b.clone()); - // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() >= 7); - Mat Wx = getBlob(node_proto, 1); - Mat Wh = getBlob(node_proto, 2); - Mat b = getBlob(node_proto, 3); - - const int numHidden = lstmParams.get("hidden_size"); + const int numHidden = Wh.size[2]; const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. const int numFeatures = Wx.size[2]; - Mat h0, c0; - if (!node_proto.input(5).empty()) { - h0 = getBlob(node_proto, 5); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - } else { - // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros - h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } - if (!node_proto.input(6).empty()) { - c0 = getBlob(node_proto, 6); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); - } else { - // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros - c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } + Mat h0 = blobs[3]; + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + Mat c0 = blobs[4]; + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); @@ -1360,31 +1354,245 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + blobs[0] = Wh; + blobs[1] = Wx; + blobs[2] = b.reshape(1, 1); + blobs[3] = h0; + blobs[4] = c0; - lstmParams.blobs.resize(5); - lstmParams.blobs[0] = Wh; - lstmParams.blobs[1] = Wx; - lstmParams.blobs[2] = b; - lstmParams.blobs[3] = h0; - lstmParams.blobs[4] = c0; + if (blobs.size() == 5) { + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); + return; + } - // read direction attribute - lstmParams.set("reverse", lstmParams.get("direction", "") == "reverse"); - lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + Mat P = blobs[5]; + blobs[5] = P.colRange(0, numHidden); + blobs[5] = blobs[5].clone().reshape(1, blobs[5].total()); // Single column. + blobs[5] = Mat::diag(blobs[5]); - node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name - addLayer(lstmParams, node_proto); + blobs.push_back(P.colRange(numHidden, 2 * numHidden)); + blobs[6] = blobs[6].clone().reshape(1, blobs[6].total()); // Single column. + blobs[6] = Mat::diag(blobs[6]); - MatShape lstmShape = outShapes[node_proto.output(0)]; + blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden)); + blobs[7] = blobs[7].clone().reshape(1, blobs[7].total()); // Single column. + blobs[7] = Mat::diag(blobs[7]); - // Add fake 1 as it is done in ONNX - lstmShape.insert(lstmShape.begin() + 1, 1); + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); +} - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); - node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, output_name); // keep origin LSTM's name - addLayer(layerParams, node_proto); +void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size) +{ + MatShape blobShape(blobShape_, blobShape_ + size); + Mat blob; + if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty()) + { + blob = getBlob(lstm_proto, idx); + CV_Assert(shape(blob) == blobShape); + } + else + { + blob = Mat(blobShape, CV_32FC1, 0.); + } + layerParams.blobs.push_back(blob); +}; + +void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n) +{ + LayerParams reshapeLp; + reshapeLp.name = cv::format("%s/reshape", input_name.c_str()); + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + + reshapeLp.set("dim", DictValue::arrayInt(layerShape, n)); + + opencv_onnx::NodeProto reshape_proto; + reshape_proto.add_input(input_name); + reshape_proto.add_output(output_name); + addLayer(reshapeLp, reshape_proto); +}; + +std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n) +{ + LayerParams sliceLP; + sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index); + sliceLP.type = "Slice"; + CV_Assert(layer_id.find(sliceLP.name) == layer_id.end()); + + sliceLP.set("begin", DictValue::arrayInt(begin, n)); + sliceLP.set("end", DictValue::arrayInt(end, n)); + sliceLP.set("axis", 0); + + opencv_onnx::NodeProto slice_proto; + slice_proto.add_input(input_name); + slice_proto.add_output(sliceLP.name); + addLayer(sliceLP, slice_proto); + + return slice_proto.output(0); +}; + +std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index) +{ + std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index); + + // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden + // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged' + int layerShape[] = {0, batch_size, num_directions, hidden_size}; + lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + + // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden + LayerParams permuteLP; + permuteLP.name = reshape_output + "/permute"; + permuteLP.type = "Permute"; + CV_Assert(layer_id.find(permuteLP.name) == layer_id.end()); + + int order[] = {0, 2, 1, 3}; + permuteLP.set("order", DictValue::arrayInt(order, 4)); + + opencv_onnx::NodeProto permute_proto; + permute_proto.add_input(reshape_output); + permute_proto.add_output((need_y && index == 0) ? y_name : static_cast(permuteLP.name)); + addLayer(permuteLP, permute_proto); + + return permute_proto.output(0); +}; + +void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name) +{ + if (num_directions == 1) + { + // Slice: Yh = Y[-1, :, :, :] + int begin[] = {-1}, end[] = {INT_MAX}; + std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0])); + + // Reshape: 1x1xBxH -> 1xBxH + int layerShape[] = {1, batch_size, hidden_size}; + lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1}; + std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0])); + + // Slice: SxDxBxH -> first sequence, last direction + int begin1[] = {0, -1}, end1[] = {1, INT_MAX}; + std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0])); + + LayerParams concatLP; + concatLP.name = cv::format("%s/concat", input_name.c_str()); + concatLP.type = "Concat"; + CV_Assert(layer_id.find(concatLP.name) == layer_id.end()); + + concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH + + opencv_onnx::NodeProto concat_proto; + concat_proto.add_input(slice_0); + concat_proto.add_input(slice_1); + concat_proto.add_output(concatLP.name); + addLayer(concatLP, concat_proto); + + // Reshape: 1x2xBxH -> 2xBxH + int layerShape[] = {2, batch_size, hidden_size}; + lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } +}; + +void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto lstm_proto = node_proto_; + layerParams.name += "/lstm"; + + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM + CV_Assert(lstm_proto.input_size() >= 3); + for (size_t i = 1; i < 3; ++i) + { + const std::string& name = lstm_proto.input(i); + CV_Assert(!name.empty() && constBlobs.count(name) == 1); + } + + IterShape_t shapeIt = outShapes.find(lstm_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + const MatShape x_shape = shapeIt->second; + + const int seq_length = x_shape[0]; + const int batch_size = x_shape[1]; + const int input_size = x_shape[2]; + const int hidden_size = layerParams.get("hidden_size"); + const int num_directions = constBlobs[lstm_proto.input(1)].size[0]; + + int w_size[] = {num_directions, 4*hidden_size, input_size}; + lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W + + int r_size[] = {num_directions, 4*hidden_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R + + int b_size[] = {num_directions, 8*hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B + + if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty()) + { + Mat blob = getBlob(lstm_proto, 4); + CV_Assert(blob.total() == batch_size); + for (MatIterator_ it = blob.begin(); it != blob.end(); ++it) + { + CV_Assert(*it == seq_length); + } + } + + int h_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h + + int c_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c + + if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty()) + { + layerParams.set("use_peephole", true); + int p_size[] = {num_directions, 3 * hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P + } + + transformBlobs(layerParams.blobs); + + layerParams.set("is_onnx", true); + layerParams.set("reverse", layerParams.get("direction", "") == "reverse"); + layerParams.set("bidirectional", layerParams.get("direction", "") == "bidirectional"); + + bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty(); + bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty(); + bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty(); + + const std::string y_name = need_y ? lstm_proto.output(0) : ""; + const std::string yh_name = need_yh ? lstm_proto.output(1) : ""; + const std::string yc_name = need_yc ? lstm_proto.output(2) : ""; + + layerParams.set("produce_cell_output", need_yc); + + lstm_proto.clear_output(); + if (need_y || need_yh) + { + // give random names to LSTMLayer's outputs because every output needs postprocessing + lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str())); + } + if (need_yc) + { + lstm_proto.add_output(yc_name); + } + + addLayer(layerParams, lstm_proto); + + std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y, + y_name, 0); + if (need_yh) + { + lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name); + } } void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index ac1b89d99c..dda479c4fa 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -765,12 +765,14 @@ TEST_P(Test_ONNX_layers, LSTM_Activations) testONNXModels("lstm_cntk_tanh", pb, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM) { testONNXModels("lstm", npy, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM_bidirectional) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM_bidirectional) { testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); } @@ -785,6 +787,13 @@ TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, LSTM_cell) +{ + testONNXModels("lstm_cell_forward", npy, 0, 0, false, false); + testONNXModels("lstm_cell_bidirectional", npy, 0, 0, false, false); + testONNXModels("lstm_cell_with_peepholes", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Pad2d_Unfused) { testONNXModels("ReflectionPad2d"); From 54733eba6f37aa333a3841ec0b18591ca3f2ffa2 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Tue, 15 Mar 2022 11:01:03 +0300 Subject: [PATCH 25/84] Merge pull request #21687 from sivanov-work:vpp_pp_integr G-API: Add VPP preproc CPU/GPU dispatcher * Add VPP preproc acceleration dispatcher & UTs * Fix compilation * Apply some comments --- modules/gapi/CMakeLists.txt | 1 + .../surface/base_frame_adapter.cpp | 11 +- .../surface/base_frame_adapter.hpp | 6 +- .../surface/cpu_frame_adapter.cpp | 2 +- .../surface/dx11_frame_adapter.cpp | 2 +- .../engine/preproc/preproc_dispatcher.cpp | 85 +++++++++ .../engine/preproc/preproc_dispatcher.hpp | 53 ++++++ .../onevpl/engine/preproc/preproc_engine.cpp | 15 +- .../engine/preproc/vpp_preproc_defines.hpp | 6 +- .../onevpl/engine/preproc_defines.hpp | 21 ++- .../gapi_streaming_vpp_preproc_test.cpp | 168 +++++++++++++++++- 11 files changed, 340 insertions(+), 30 deletions(-) create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index af50af3f8c..04e1906c75 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -203,6 +203,7 @@ set(gapi_srcs src/streaming/onevpl/engine/transcode/transcode_session.cpp src/streaming/onevpl/engine/preproc/preproc_engine.cpp src/streaming/onevpl/engine/preproc/preproc_session.cpp + src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp src/streaming/onevpl/data_provider_dispatcher.cpp diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp index 82859e474c..76da3dbe50 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp @@ -15,8 +15,11 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { -BaseFrameAdapter::BaseFrameAdapter(std::shared_ptr surface, SessionHandle assoc_handle): - parent_surface_ptr(surface), parent_handle(assoc_handle) { +BaseFrameAdapter::BaseFrameAdapter(std::shared_ptr surface, + SessionHandle assoc_handle, + AccelType accel): + parent_surface_ptr(surface), parent_handle(assoc_handle), + acceleration_type(accel) { GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); GAPI_Assert(parent_handle && "mfxSession is nullptr"); @@ -63,6 +66,10 @@ const BaseFrameAdapter::SessionHandle BaseFrameAdapter::get_session_handle() con cv::GFrameDesc BaseFrameAdapter::meta() const { return frame_desc; } +AccelType BaseFrameAdapter::accel_type() const { + return acceleration_type; +} + } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp index 3d8d951535..a3dfcf542f 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp @@ -9,6 +9,7 @@ #include #include +#include #include "streaming/onevpl/accelerators/surface/surface.hpp" #ifdef HAVE_ONEVPL @@ -25,14 +26,17 @@ public: const SessionHandle get_session_handle() const; cv::GFrameDesc meta() const override; + AccelType accel_type() const; protected: - BaseFrameAdapter(std::shared_ptr assoc_surface, SessionHandle assoc_handle); + BaseFrameAdapter(std::shared_ptr assoc_surface, SessionHandle assoc_handle, + AccelType accel); ~BaseFrameAdapter(); std::shared_ptr surface(); std::shared_ptr parent_surface_ptr; SessionHandle parent_handle; GFrameDesc frame_desc; + AccelType acceleration_type; }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp index 58be29f628..751ed7abbd 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp @@ -18,7 +18,7 @@ namespace onevpl { VPLMediaFrameCPUAdapter::VPLMediaFrameCPUAdapter(std::shared_ptr surface, SessionHandle assoc_handle): - BaseFrameAdapter(surface, assoc_handle) { + BaseFrameAdapter(surface, assoc_handle, AccelType::HOST) { } VPLMediaFrameCPUAdapter::~VPLMediaFrameCPUAdapter() = default; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp index db23a3c69f..885fa1589a 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp @@ -42,7 +42,7 @@ void unlock_mid(mfxMemId mid, mfxFrameData &data, MediaFrame::Access mode) { VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface, SessionHandle assoc_handle): - BaseFrameAdapter(assoc_surface, assoc_handle) { + BaseFrameAdapter(assoc_surface, assoc_handle, AccelType::DX11) { Surface::data_t& data = assoc_surface->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp new file mode 100644 index 0000000000..23ad385b51 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp @@ -0,0 +1,85 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include + +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" +#include "logger.hpp" + + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::MediaFrame& in_frame) { + cv::util::optional param; + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + for (const auto &w : workers) { + param = w->is_applicable(in_frame); + if (param.has_value()) { + auto &vpp_param = param.value().get(); + BaseFrameAdapter* adapter = reinterpret_cast(vpp_param.reserved); + const IDeviceSelector::DeviceScoreTable &devs = + (std::static_pointer_cast(w))->get_accel()->get_device_selector()->select_devices(); + GAPI_DbgAssert(devs.size() >= 1 && "Invalid device selector"); + auto worker_accel_type = std::get<1>(*devs.begin()).get_type(); + GAPI_LOG_DEBUG(nullptr, "acceleration types for frame: " << to_cstring(adapter->accel_type()) << + ", for worker: " << to_cstring(worker_accel_type)); + if (worker_accel_type == adapter->accel_type()){ + vpp_param.reserved = reinterpret_cast(w.get()); + GAPI_LOG_DEBUG(nullptr, "selected worker: " << vpp_param.reserved); + break; + } + } + } + return param; +} + +pp_session VPPPreprocDispatcher::initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) { + const auto &vpp_param = initial_frame_param.get(); + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + for (auto &w : workers) { + if (reinterpret_cast(w.get()) == vpp_param.reserved) { + pp_session sess = w->initialize_preproc(initial_frame_param, required_frame_descr); + vpp_pp_session &vpp_sess = sess.get(); + vpp_sess.reserved = reinterpret_cast(w.get()); + GAPI_LOG_DEBUG(nullptr, "initialized session preproc for worker: " << vpp_sess.reserved); + return sess; + } + } + GAPI_Assert(false && "Cannot initialize VPP preproc in dispatcher, no suitable worker"); +} + +cv::MediaFrame VPPPreprocDispatcher::run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) { + const auto &vpp_sess = session_handle.get(); + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + for (auto &w : workers) { + if (reinterpret_cast(w.get()) == vpp_sess.reserved) { + GAPI_LOG_DEBUG(nullptr, "trigger execution on worker: " << vpp_sess.reserved); + return w->run_sync(session_handle, in_frame, opt_roi); + } + } + GAPI_Assert(false && "Cannot invoke VPP preproc in dispatcher, no suitable worker"); +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp new file mode 100644 index 0000000000..6e2ebc81f9 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp @@ -0,0 +1,53 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP +#define GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP + +#include +#include + +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" +#include "streaming/onevpl/engine/preproc_defines.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +// GAPI_EXPORTS for tests +class GAPI_EXPORTS VPPPreprocDispatcher final : public cv::gapi::wip::IPreprocEngine { +public: + + cv::util::optional is_applicable(const cv::MediaFrame& in_frame) override; + + pp_session initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) override; + + cv::MediaFrame run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) override; + + template + void insert_worker(Args&& ...args) { + workers.emplace_back(std::make_shared(std::forward(args)...)); + } + + size_t size() const { + return workers.size(); + } +private: + std::vector> workers; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp index 7de363fad5..d205211903 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -163,7 +163,8 @@ cv::util::optional VPPPreprocEngine::is_applicable(const cv::MediaFra if (vpl_adapter) { ret = cv::util::make_optional( pp_params::create(vpl_adapter->get_session_handle(), - vpl_adapter->get_surface()->get_info())); + vpl_adapter->get_surface()->get_info(), + vpl_adapter)); GAPI_LOG_DEBUG(nullptr, "VPP preprocessing applicable, session [" << vpl_adapter->get_session_handle() << "]"); } @@ -203,7 +204,7 @@ pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_p // check In & Out equally to bypass preproc if (mfxVPPParams.vpp.Out == mfxVPPParams.vpp.In) { GAPI_LOG_DEBUG(nullptr, "no preproc required"); - return pp_session::create(nullptr); + return pp_session::create(nullptr); } // recalculate size param according to VPP alignment @@ -221,7 +222,7 @@ pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_p auto it = preproc_session_map.find(mfxVPPParams.vpp.In); if (it != preproc_session_map.end()) { GAPI_LOG_DEBUG(nullptr, "[" << it->second->session << "] found"); - return pp_session::create(std::static_pointer_cast(it->second)); + return pp_session::create(std::static_pointer_cast(it->second)); } // NB: make some sanity checks @@ -311,7 +312,7 @@ pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_p bool inserted = preproc_session_map.emplace(mfxVPPParams.vpp.In, sess_ptr).second; GAPI_Assert(inserted && "preproc session is exist"); GAPI_LOG_INFO(nullptr, "VPPPreprocSession created, total sessions: " << preproc_session_map.size()); - return pp_session::create(std::static_pointer_cast(sess_ptr)); + return pp_session::create(std::static_pointer_cast(sess_ptr)); } void VPPPreprocEngine::on_frame_ready(session_type& sess, @@ -339,12 +340,12 @@ VPPPreprocEngine::initialize_session(mfxSession, cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::MediaFrame& in_frame, const cv::util::optional &roi) { - std::shared_ptr pp_sess_impl = sess.get(); - if (!pp_sess_impl) { + vpp_pp_session pp_sess_impl = sess.get(); + if (!pp_sess_impl.handle) { // bypass case return in_frame; } - session_ptr_type s = std::static_pointer_cast(pp_sess_impl); + session_ptr_type s = std::static_pointer_cast(pp_sess_impl.handle); GAPI_DbgAssert(s && "Session is nullptr"); GAPI_DbgAssert(is_applicable(in_frame) && "VPP preproc is not applicable for the given frame"); diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp index 820510a55d..780c9cf5d7 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp @@ -18,9 +18,13 @@ namespace onevpl { struct vpp_pp_params { mfxSession handle; mfxFrameInfo info; + void *reserved = nullptr; }; -using vpp_pp_session_ptr = std::shared_ptr; +struct vpp_pp_session { + std::shared_ptr handle; + void *reserved = nullptr; +}; } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp index 2c72d7c547..5f68d9c4f7 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp @@ -19,12 +19,12 @@ namespace wip { #ifdef VPP_PREPROC_ENGINE #define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::onevpl::vpp_pp_params -#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::onevpl::vpp_pp_session_ptr +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::onevpl::vpp_pp_session #else // VPP_PREPROC_ENGINE struct empty_pp_params {}; struct empty_pp_session {}; #define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::empty_pp_params; -#define GAPI_BACKEND_PP_SESSIONS std::shared_ptr; +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::empty_pp_session; #endif // VPP_PREPROC_ENGINE struct pp_params { @@ -57,26 +57,25 @@ private: struct pp_session { using value_type = cv::util::variant; - template - static pp_session create(std::shared_ptr session) { - static_assert(cv::detail::contains, + template + static pp_session create(Args&& ...args) { + static_assert(cv::detail::contains::value, "Invalid BackendSpecificSesionType requested"); pp_session ret; - ret.value = session; + ret.value = BackendSpecificSesionType{std::forward(args)...};; return ret; } template - std::shared_ptr get() { - using ptr_type = std::shared_ptr; - static_assert(cv::detail::contains::value, + BackendSpecificSesionType &get() { + static_assert(cv::detail::contains::value, "Invalid BackendSpecificSesionType requested"); - return cv::util::get(value); + return cv::util::get(value); } template - std::shared_ptr get() const { + const BackendSpecificSesionType &get() const { return const_cast(this)->get(); } private: diff --git a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp index a0a66c7b93..9c0cc9ca4a 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp @@ -49,6 +49,7 @@ #include "streaming/onevpl/engine/preproc/preproc_engine.hpp" #include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" #include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" #include "streaming/onevpl/engine/transcode/transcode_session.hpp" @@ -279,8 +280,8 @@ TEST(OneVPL_Source_PreprocEngine, functional_single_thread) pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), required_frame_param); - ASSERT_EQ(pp_sess.get().get(), - first_pp_sess.get().get()); + ASSERT_EQ(pp_sess.get().handle.get(), + first_pp_sess.get().handle.get()); cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, @@ -319,7 +320,7 @@ void decode_function(cv::gapi::wip::onevpl::VPLLegacyDecodeEngine &decode_engine queue.push_stop(); } -void preproc_function(cv::gapi::wip::onevpl::VPPPreprocEngine &preproc_engine, SafeQueue&queue, +void preproc_function(cv::gapi::wip::IPreprocEngine &preproc_engine, SafeQueue&queue, size_t &preproc_number, const out_frame_info_t &required_frame_param, const cv::util::optional &roi_rect = {}) { using namespace cv::gapi::wip; @@ -361,12 +362,15 @@ void preproc_function(cv::gapi::wip::onevpl::VPPPreprocEngine &preproc_engine, S cv::util::optional params = preproc_engine.is_applicable(decoded_frame); ASSERT_TRUE(params.has_value()); - ASSERT_TRUE(0 == memcmp(¶ms.value(), &first_pp_params.value(), sizeof(pp_params))); + const auto &vpp_params = params.value().get(); + const auto &first_vpp_params = first_pp_params.value().get(); + ASSERT_EQ(vpp_params.handle, first_vpp_params.handle); + ASSERT_TRUE(0 == memcmp(&vpp_params.info, &first_vpp_params.info, sizeof(mfxFrameInfo))); pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), required_frame_param); - ASSERT_EQ(pp_sess.get().get(), - first_pp_sess.get().get()); + ASSERT_EQ(pp_sess.get().handle.get(), + first_pp_sess.get().handle.get()); cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, empty_roi); cv::GFrameDesc pp_desc = pp_frame.desc(); @@ -381,6 +385,71 @@ void preproc_function(cv::gapi::wip::onevpl::VPPPreprocEngine &preproc_engine, S ASSERT_NE(preproc_number, 1); } +void multi_source_preproc_function(size_t source_num, + cv::gapi::wip::IPreprocEngine &preproc_engine, SafeQueue&queue, + size_t &preproc_number, const out_frame_info_t &required_frame_param, + const cv::util::optional &roi_rect = {}) { + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + // create preproc session based on frame description & network info + cv::MediaFrame first_decoded_frame = queue.pop(); + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = + preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + roi_rect); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // launch pipeline + bool in_progress = false; + preproc_number = 1; + size_t received_stop_count = 0; + try { + while(received_stop_count != source_num) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + ++received_stop_count; + continue; + } + in_progress = true; + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, empty_roi); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + decoded_frame = cv::MediaFrame(); + preproc_number++; + } + } catch (const std::exception& ex) { + GAPI_LOG_WARNING(nullptr, "Caught exception in preproc worker: " << ex.what()); + } + + // test if interruption has happened + if (in_progress) { + while (true) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + break; + } + } + } + ASSERT_FALSE(in_progress); + ASSERT_NE(preproc_number, 1); +} using roi_t = cv::util::optional; using preproc_roi_args_t = decltype(std::tuple_cat(std::declval(), std::declval>())); @@ -548,6 +617,93 @@ TEST_P(VPPInnerPreprocParams, functional_inner_preproc_size) INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocInner, VPPInnerPreprocParams, testing::ValuesIn(files)); + +// Dispatcher test suite +class VPPPreprocDispatcherROIParams : public ::testing::TestWithParam {}; +TEST_P(VPPPreprocDispatcherROIParams, functional_roi_different_threads) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel = MFX_ACCEL_MODE_VIA_D3D11; + out_frame_info_t required_frame_param; + roi_t opt_roi; + std::tie(file_path, decoder_id, std::ignore, required_frame_param, opt_roi) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + std::shared_ptr cpu_data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxSession mfx_cpu_decode_session{}; + sts = MFXCreateSession(mfx, 0, &mfx_cpu_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engines + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + std::vector cfg_params_cpu; + auto cpu_device_selector = std::make_shared(cfg_params_cpu); + VPLLegacyDecodeEngine cpu_decode_engine(std::unique_ptr{ + new VPLCPUAccelerationPolicy(cpu_device_selector)}); + auto cpu_sess_ptr = cpu_decode_engine.initialize_session(mfx_cpu_decode_session, + cfg_params_cpu, + cpu_data_provider); + + // create VPP preproc engines + VPPPreprocDispatcher preproc_dispatcher; + preproc_dispatcher.insert_worker(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + preproc_dispatcher.insert_worker(std::unique_ptr{ + new VPLCPUAccelerationPolicy(cpu_device_selector)}); + + // launch threads + SafeQueue queue; + size_t decoded_number = 1; + size_t cpu_decoded_number = 1; + size_t preproc_number = 0; + + std::thread decode_thread(decode_function, std::ref(decode_engine), sess_ptr, + std::ref(queue), std::ref(decoded_number)); + std::thread cpu_decode_thread(decode_function, std::ref(cpu_decode_engine), cpu_sess_ptr, + std::ref(queue), std::ref(cpu_decoded_number)); + std::thread preproc_thread(multi_source_preproc_function, + preproc_dispatcher.size(), + std::ref(preproc_dispatcher), + std::ref(queue), std::ref(preproc_number), + std::cref(required_frame_param), + std::cref(opt_roi)); + + decode_thread.join(); + cpu_decode_thread.join(); + preproc_thread.join(); + ASSERT_EQ(preproc_number, decoded_number + cpu_decoded_number); +} + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocDispatcherROI, VPPPreprocDispatcherROIParams, + testing::ValuesIn(files_w_roi)); + #endif // HAVE_DIRECTX #endif // HAVE_D3D11 } // namespace opencv_test From e5f2a8ebf27c86556e7a23488315e272648d3407 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Tue, 15 Mar 2022 18:27:39 +0300 Subject: [PATCH 26/84] Merge pull request #21636 from TolyaTalamanov:at/gapi_modeling_tool_drop_frames [G-API] Pipeline modeling tool - support frame dropping for source * Implement drop frames functionality for dummy src * Reconsider frame dropping * Fix comments --- .../gapi/samples/pipeline_modeling_tool.cpp | 20 ++++- .../pipeline_modeling_tool/dummy_source.hpp | 75 ++++++++++++++----- .../pipeline_builder.hpp | 14 ++-- .../test_pipeline_modeling_tool.py | 55 ++++++++++---- 4 files changed, 120 insertions(+), 44 deletions(-) diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp index ca6187e1ca..4ff2cbd82c 100644 --- a/modules/gapi/samples/pipeline_modeling_tool.cpp +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -224,6 +224,7 @@ int main(int argc, char* argv[]) { " if set to 0. If it's specified will be" " applied for every pipeline. }" "{ app_mode | realtime | Application mode (realtime/benchmark). }" + "{ drop_frames | false | Drop frames if they come earlier than pipeline is completed. }" "{ exec_list | | A comma-separated list of pipelines that" " will be executed. Spaces around commas" " are prohibited. }"; @@ -238,10 +239,11 @@ int main(int argc, char* argv[]) { const auto load_config = cmd.get("load_config"); const auto cached_dir = cmd.get("cache_dir"); const auto log_file = cmd.get("log_file"); - const auto pl_mode = strToPLMode(cmd.get("pl_mode")); + const auto cmd_pl_mode = strToPLMode(cmd.get("pl_mode")); const auto qc = cmd.get("qc"); const auto app_mode = strToAppMode(cmd.get("app_mode")); const auto exec_str = cmd.get("exec_list"); + const auto drop_frames = cmd.get("drop_frames"); cv::FileStorage fs; if (cfg.empty()) { @@ -306,7 +308,8 @@ int main(int argc, char* argv[]) { if (app_mode == AppMode::BENCHMARK) { latency = 0.0; } - builder.setSource(src_name, latency, output); + auto src = std::make_shared(latency, output, drop_frames); + builder.setSource(src_name, src); } const auto& nodes_fn = check_and_get_fn(pl_fn, "nodes", name); @@ -352,9 +355,18 @@ int main(int argc, char* argv[]) { builder.addEdge(edge); } + auto cfg_pl_mode = readOpt(pl_fn["mode"]); // NB: Pipeline mode from config takes priority over cmd. - auto mode = readOpt(pl_fn["mode"]); - builder.setMode(mode.has_value() ? strToPLMode(mode.value()) : pl_mode); + auto pl_mode = cfg_pl_mode.has_value() + ? strToPLMode(cfg_pl_mode.value()) : cmd_pl_mode; + // NB: Using drop_frames with streaming pipelines will follow to + // incorrect performance results. + if (drop_frames && pl_mode == PLMode::STREAMING) { + throw std::logic_error( + "--drop_frames option is supported only for pipelines in \"regular\" mode"); + } + + builder.setMode(pl_mode); // NB: Queue capacity from config takes priority over cmd. auto config_qc = readOpt(pl_fn["queue_capacity"]); diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp index 1514eb2671..3079b99204 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp @@ -14,21 +14,23 @@ class DummySource final: public cv::gapi::wip::IStreamSource { public: using Ptr = std::shared_ptr; DummySource(const double latency, - const OutputDescr& output); + const OutputDescr& output, + const bool drop_frames); bool pull(cv::gapi::wip::Data& data) override; cv::GMetaArg descr_of() const override; private: double m_latency; cv::Mat m_mat; - using TimePoint = - std::chrono::time_point; - cv::optional m_prev_pull_tp; + bool m_drop_frames; + double m_next_tick_ts = -1; + int64_t m_curr_seq_id = 0; }; DummySource::DummySource(const double latency, - const OutputDescr& output) - : m_latency(latency) { + const OutputDescr& output, + const bool drop_frames) + : m_latency(latency), m_drop_frames(drop_frames) { utils::createNDMat(m_mat, output.dims, output.precision); utils::generateRandom(m_mat); } @@ -36,23 +38,60 @@ DummySource::DummySource(const double latency, bool DummySource::pull(cv::gapi::wip::Data& data) { using namespace std::chrono; using namespace cv::gapi::streaming; - // NB: In case it's the first pull. - if (!m_prev_pull_tp) { - m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + + // NB: Wait m_latency before return the first frame. + if (m_next_tick_ts == -1) { + m_next_tick_ts = utils::timestamp() + m_latency; } + + int64_t curr_ts = utils::timestamp(); + if (curr_ts < m_next_tick_ts) { + /* + * curr_ts + * | + * ------|----*-----|-------> + * ^ + * m_next_tick_ts + * + * + * NB: New frame will be produced at the m_next_tick_ts point. + */ + utils::sleep(m_next_tick_ts - curr_ts); + } else { + /* + * curr_ts + * +1 +2 | + * |----------|----------|----------|----*-----|-------> + * ^ ^ + * m_next_tick_ts -------------> + * + * + * NB: Shift m_next_tick_ts to the nearest tick before curr_ts and + * update current seq_id correspondingly. + * + * if drop_frames is enabled, wait for the next tick, otherwise + * return last writen frame (+2 at the picture above) immediately. + */ + int64_t num_frames = + static_cast((curr_ts - m_next_tick_ts) / m_latency); + m_curr_seq_id += num_frames; + m_next_tick_ts += num_frames * m_latency; + if (m_drop_frames) { + m_next_tick_ts += m_latency; + ++m_curr_seq_id; + utils::sleep(m_next_tick_ts - curr_ts); + } + } + // NB: Just increase reference counter not to release mat memory // after assigning it to the data. cv::Mat mat = m_mat; - auto end = high_resolution_clock::now(); - auto elapsed = - duration_cast>(end - *m_prev_pull_tp).count(); - auto delta = m_latency - elapsed; - if (delta > 0) { - utils::sleep(delta); - } - data.meta[meta_tag::timestamp] = int64_t{utils::timestamp()}; + + data.meta[meta_tag::timestamp] = utils::timestamp(); + data.meta[meta_tag::seq_id] = m_curr_seq_id++; data = mat; - m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + m_next_tick_ts += m_latency; + return true; } diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp index 63ada28603..a4f69b60ad 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -184,9 +184,8 @@ public: void addInfer(const std::string& name, const InferParams& params); - void setSource(const std::string& name, - double latency, - const OutputDescr& output); + void setSource(const std::string& name, + std::shared_ptr src); void addEdge(const Edge& edge); void setMode(PLMode mode); @@ -315,11 +314,10 @@ void PipelineBuilder::addEdge(const Edge& edge) { out_data->out_nodes.push_back(dst_node); } -void PipelineBuilder::setSource(const std::string& name, - double latency, - const OutputDescr& output) { - GAPI_Assert(!m_state->src); - m_state->src = std::make_shared(latency, output); +void PipelineBuilder::setSource(const std::string& name, + std::shared_ptr src) { + GAPI_Assert(!m_state->src && "Only single source pipelines are supported!"); + m_state->src = src; addCall(name, SourceCall{}); } diff --git a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py index ef4bce6476..f36d0efc3b 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py +++ b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py @@ -907,25 +907,52 @@ def test_error_invalid_pl_mode(): cfg_file = """\"%YAML:1.0 work_time: 1000 Pipelines: -PL1: - source: - name: 'Src' - latency: 20 - output: - dims: [1,2,3,4] - precision: 'U8' - nodes: - - name: 'Node0' - type: 'Dummy' - time: 0.2 + PL1: + source: + name: 'Src' + latency: 20 output: dims: [1,2,3,4] precision: 'U8' - edges: - - from: 'Src' - to: 'Node0'\" """ + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ exec_str = '{} --cfg={} --app_mode=unknown'.format(pipeline_modeling_tool, cfg_file) out = get_output(exec_str) assert out.startswith('Unsupported AppMode: unknown\n' 'Please chose between: realtime and benchmark') + + +def test_error_drop_frames_with_streaming(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={} --pl_mode=streaming --drop_frames'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('--drop_frames option is supported only for pipelines in "regular" mode') From 0d16b5fc389234d8c1bc61ac4afb7f5582ddd136 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 15 Mar 2022 19:00:52 +0300 Subject: [PATCH 27/84] Fix libva dynamic loading --- modules/core/src/va_wrapper.impl.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/modules/core/src/va_wrapper.impl.hpp b/modules/core/src/va_wrapper.impl.hpp index 260d3ba49b..77faa984d0 100644 --- a/modules/core/src/va_wrapper.impl.hpp +++ b/modules/core/src/va_wrapper.impl.hpp @@ -15,18 +15,33 @@ typedef VAStatus (*FN_vaDestroyImage)(VADisplay dpy, VAImageID image); typedef VAStatus (*FN_vaMapBuffer)(VADisplay dpy, VABufferID buf_id, void **pbuf); typedef VAStatus (*FN_vaSyncSurface)(VADisplay dpy, VASurfaceID render_target); typedef VAStatus (*FN_vaUnmapBuffer)(VADisplay dpy, VABufferID buf_id); +typedef int (*FN_vaMaxNumImageFormats)(VADisplay dpy); +typedef VAStatus (*FN_vaQueryImageFormats)(VADisplay dpy, VAImageFormat *format_list, int *num_formats); +typedef VAStatus (*FN_vaCreateImage)(VADisplay dpy, VAImageFormat *format, int width, int height, VAImage *image); +typedef VAStatus (*FN_vaPutImage)(VADisplay dpy, VASurfaceID surface, VAImageID image, int src_x, int src_y, unsigned int src_width, unsigned int src_height, int dest_x, int dest_y, unsigned int dest_width, unsigned int dest_height); +typedef VAStatus (*FN_vaGetImage)(VADisplay dpy, VASurfaceID surface, int x, int y, unsigned int width, unsigned int height, VAImageID image); static FN_vaDeriveImage fn_vaDeriveImage = NULL; static FN_vaDestroyImage fn_vaDestroyImage = NULL; static FN_vaMapBuffer fn_vaMapBuffer = NULL; static FN_vaSyncSurface fn_vaSyncSurface = NULL; static FN_vaUnmapBuffer fn_vaUnmapBuffer = NULL; +static FN_vaMaxNumImageFormats fn_vaMaxNumImageFormats = NULL; +static FN_vaQueryImageFormats fn_vaQueryImageFormats = NULL; +static FN_vaCreateImage fn_vaCreateImage = NULL; +static FN_vaPutImage fn_vaPutImage = NULL; +static FN_vaGetImage fn_vaGetImage = NULL; #define vaDeriveImage fn_vaDeriveImage #define vaDestroyImage fn_vaDestroyImage #define vaMapBuffer fn_vaMapBuffer #define vaSyncSurface fn_vaSyncSurface #define vaUnmapBuffer fn_vaUnmapBuffer +#define vaMaxNumImageFormats fn_vaMaxNumImageFormats +#define vaQueryImageFormats fn_vaQueryImageFormats +#define vaCreateImage fn_vaCreateImage +#define vaPutImage fn_vaPutImage +#define vaGetImage fn_vaGetImage static std::shared_ptr loadLibVA() @@ -76,6 +91,11 @@ static void init_libva() VA_LOAD_SYMBOL(vaMapBuffer); VA_LOAD_SYMBOL(vaSyncSurface); VA_LOAD_SYMBOL(vaUnmapBuffer); + VA_LOAD_SYMBOL(vaMaxNumImageFormats); + VA_LOAD_SYMBOL(vaQueryImageFormats); + VA_LOAD_SYMBOL(vaCreateImage); + VA_LOAD_SYMBOL(vaPutImage); + VA_LOAD_SYMBOL(vaGetImage); initialized = true; } if (!library) From ef6f421f8953c0a9d4e5d49074b0dc99975a3fa0 Mon Sep 17 00:00:00 2001 From: Pierre Chatelier Date: Wed, 16 Mar 2022 15:46:11 +0100 Subject: [PATCH 28/84] Merge pull request #21677 from chacha21:rectangle_intersection * better accuracy of _rotatedRectangleIntersection instead of just migrating to double-precision (which would work), some computations are scaled by a factor that depends on the length of the smallest vectors. There is a better accuracy even with floats, so this is certainly better for very sensitive cases * Update intersection.cpp use L2SQR norm to tune the numeric scale * Update intersection.cpp adapt samePointEps with L2 norm * Update intersection.cpp move comment * Update intersection.cpp fix wrong numericalScalingFactor usage * added tests * fixed warnings returned by buildbot * modifications suggested by reviewer renaming numericalScaleFctor to normalizationScale refactor some computations more "const" * modifications as suggested by reviewer --- modules/imgproc/src/intersection.cpp | 78 ++++++++++++++-------- modules/imgproc/test/test_intersection.cpp | 78 ++++++++++++++++++++++ 2 files changed, 130 insertions(+), 26 deletions(-) diff --git a/modules/imgproc/src/intersection.cpp b/modules/imgproc/src/intersection.cpp index 47d3f3f457..b9659f666e 100644 --- a/modules/imgproc/src/intersection.cpp +++ b/modules/imgproc/src/intersection.cpp @@ -51,15 +51,15 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate { CV_INSTRUMENT_REGION(); - // L2 metric - const float samePointEps = std::max(1e-16f, 1e-6f * (float)std::max(rect1.size.area(), rect2.size.area())); - Point2f vec1[4], vec2[4]; Point2f pts1[4], pts2[4]; rect1.points(pts1); rect2.points(pts2); + // L2 metric + float samePointEps = 1e-6f * (float)std::max(rect1.size.area(), rect2.size.area()); + int ret = INTERSECT_FULL; // Specical case of rect1 == rect2 @@ -99,14 +99,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate vec2[i].y = pts2[(i+1)%4].y - pts2[i].y; } + //we adapt the epsilon to the smallest dimension of the rects + for( int i = 0; i < 4; i++ ) + { + samePointEps = std::min(samePointEps, std::sqrt(vec1[i].x*vec1[i].x+vec1[i].y*vec1[i].y)); + samePointEps = std::min(samePointEps, std::sqrt(vec2[i].x*vec2[i].x+vec2[i].y*vec2[i].y)); + } + samePointEps = std::max(1e-16f, samePointEps); + // Line test - test all line combos for intersection for( int i = 0; i < 4; i++ ) { for( int j = 0; j < 4; j++ ) { // Solve for 2x2 Ax=b - float x21 = pts2[j].x - pts1[i].x; - float y21 = pts2[j].y - pts1[i].y; + const float x21 = pts2[j].x - pts1[i].x; + const float y21 = pts2[j].y - pts1[i].y; float vx1 = vec1[i].x; float vy1 = vec1[i].y; @@ -114,10 +122,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate float vx2 = vec2[j].x; float vy2 = vec2[j].y; - float det = vx2*vy1 - vx1*vy2; + float normalizationScale = std::min(vx1*vx1+vy1*vy1, vx2*vx2+vy2*vy2);//sum of squares : this is >= 0 + //normalizationScale is a square, and we usually limit accuracy around 1e-6, so normalizationScale should be rather limited by ((1e-6)^2)=1e-12 + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; - float t1 = (vx2*y21 - vy2*x21) / det; - float t2 = (vx1*y21 - vy1*x21) / det; + vx1 *= normalizationScale; + vy1 *= normalizationScale; + vx2 *= normalizationScale; + vy2 *= normalizationScale; + + const float det = vx2*vy1 - vx1*vy2; + if (std::abs(det) < 1e-12)//like normalizationScale, we consider accuracy around 1e-6, i.e. 1e-12 when squared + continue; + const float detInvScaled = normalizationScale/det; + + const float t1 = (vx2*y21 - vy2*x21)*detInvScaled; + const float t2 = (vx1*y21 - vy1*x21)*detInvScaled; // This takes care of parallel lines if( cvIsInf(t1) || cvIsInf(t2) || cvIsNaN(t1) || cvIsNaN(t2) ) @@ -127,8 +147,8 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate if( t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f ) { - float xi = pts1[i].x + vec1[i].x*t1; - float yi = pts1[i].y + vec1[i].y*t1; + const float xi = pts1[i].x + vec1[i].x*t1; + const float yi = pts1[i].y + vec1[i].y*t1; intersection.push_back(Point2f(xi,yi)); } @@ -149,18 +169,20 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate int posSign = 0; int negSign = 0; - float x = pts1[i].x; - float y = pts1[i].y; + const float x = pts1[i].x; + const float y = pts1[i].y; for( int j = 0; j < 4; j++ ) { + float normalizationScale = vec2[j].x*vec2[j].x+vec2[j].y*vec2[j].y; + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; // line equation: Ax + By + C = 0 // see which side of the line this point is at - float A = -vec2[j].y; - float B = vec2[j].x; - float C = -(A*pts2[j].x + B*pts2[j].y); + const float A = -vec2[j].y*normalizationScale ; + const float B = vec2[j].x*normalizationScale ; + const float C = -(A*pts2[j].x + B*pts2[j].y); - float s = A*x+ B*y+ C; + const float s = A*x + B*y + C; if( s >= 0 ) { @@ -187,18 +209,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate int posSign = 0; int negSign = 0; - float x = pts2[i].x; - float y = pts2[i].y; + const float x = pts2[i].x; + const float y = pts2[i].y; for( int j = 0; j < 4; j++ ) { // line equation: Ax + By + C = 0 // see which side of the line this point is at - float A = -vec1[j].y; - float B = vec1[j].x; - float C = -(A*pts1[j].x + B*pts1[j].y); + float normalizationScale = vec2[j].x*vec2[j].x+vec2[j].y*vec2[j].y; + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; + if (std::isinf(normalizationScale )) + normalizationScale = 1.f; + const float A = -vec1[j].y*normalizationScale ; + const float B = vec1[j].x*normalizationScale ; + const float C = -(A*pts1[j].x + B*pts1[j].y); - float s = A*x + B*y + C; + const float s = A*x + B*y + C; if( s >= 0 ) { @@ -223,7 +249,7 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate } // Get rid of duplicated points - int Nstride = N; + const int Nstride = N; cv::AutoBuffer distPt(N * N); cv::AutoBuffer ptDistRemap(N); for (int i = 0; i < N; ++i) @@ -233,7 +259,7 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate for (int j = i + 1; j < N; ) { const Point2f pt1 = intersection[j]; - float d2 = normL2Sqr(pt1 - pt0); + const float d2 = normL2Sqr(pt1 - pt0); if(d2 <= samePointEps) { if (j < N - 1) @@ -252,10 +278,10 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate float minD = distPt[1]; for (int i = 0; i < N - 1; ++i) { - float* pDist = distPt.data() + Nstride * ptDistRemap[i]; + const float* pDist = distPt.data() + Nstride * ptDistRemap[i]; for (int j = i + 1; j < N; ++j) { - float d = pDist[ptDistRemap[j]]; + const float d = pDist[ptDistRemap[j]]; if (d < minD) { minD = d; diff --git a/modules/imgproc/test/test_intersection.cpp b/modules/imgproc/test/test_intersection.cpp index c455c439fc..9ba3bf8125 100644 --- a/modules/imgproc/test/test_intersection.cpp +++ b/modules/imgproc/test/test_intersection.cpp @@ -366,6 +366,84 @@ TEST(Imgproc_RotatedRectangleIntersection, regression_12221_2) EXPECT_LE(intersections.size(), (size_t)8); } +TEST(Imgproc_RotatedRectangleIntersection, accuracy_21659) +{ + float scaleFactor = 1000;//to challenge the normalizationScale in the algorithm + cv::RectanglesIntersectTypes intersectionResult = cv::RectanglesIntersectTypes::INTERSECT_NONE; + std::vector intersection; + double intersectionArea = 0; + cv::RotatedRect r1 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + cv::RotatedRect r2; + + r2 = cv::RotatedRect(cv::Point2f(-2.f, -2.f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_NONE, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.5f, .5f)*scaleFactor, cv::Size2f(1.f, 2.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.5f, 1.5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_FULL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r2.size.area()), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(.5f, .5f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_FULL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r2.size.area()), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(2.f, .5f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-500000), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 45); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-828427), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.f, 1.f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 45); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-250000), 1e-1); + + //see #21659 + r1 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545063f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + r2 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545235f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-3); + + r1 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545063f + 0.01f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + r2 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545235f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-1); + + r1 = cv::RotatedRect(cv::Point2f(45.0715866f, 39.8825722f), cv::Size2f(3.0f, 3.0f), 0.10067749f); + r2 = cv::RotatedRect(cv::Point2f(45.0715866f, 39.8825874f), cv::Size2f(3.0f, 3.0f), 0.10067749f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-3); +} + TEST(Imgproc_RotatedRectangleIntersection, regression_18520) { RotatedRect rr_empty( From 48cd2d190f28c002bac8a9a190d1fc16169eaece Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Thu, 17 Mar 2022 19:45:14 +0300 Subject: [PATCH 29/84] Merge pull request #21678 from anna-khakimova:ak/resize_f32c1_scalar GAPI Fluid: Resize F32C1 scalar version. * GAPI Fluid: Resize F32C1 scalar. * Final version * Applied comments --- .../gapi/include/opencv2/gapi/own/assert.hpp | 1 - .../cpu/gapi_imgproc_perf_tests_fluid.cpp | 6 +- .../gapi/src/backends/fluid/gfluidimgproc.cpp | 168 +++++++++++++++--- 3 files changed, 143 insertions(+), 32 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/own/assert.hpp b/modules/gapi/include/opencv2/gapi/own/assert.hpp index d50543fdac..4bd3eaaf50 100644 --- a/modules/gapi/include/opencv2/gapi/own/assert.hpp +++ b/modules/gapi/include/opencv2/gapi/own/assert.hpp @@ -43,7 +43,6 @@ namespace detail #define GAPI_Assert(expr) \ { if (!(expr)) ::detail::assert_abort(#expr, __LINE__, __FILE__, __func__); } - #ifdef NDEBUG # define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) #else diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp index b5e72ae4ce..a768875f32 100644 --- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp @@ -201,7 +201,7 @@ INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest, INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(cv::INTER_LINEAR), Values(szSmall128, szVGA, sz720p, sz1080p), Values(cv::Size(64, 64), @@ -217,7 +217,7 @@ INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstIn INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(szSmall128, szVGA, sz720p, sz1080p), Values(0.5), Values(0.5), @@ -225,7 +225,7 @@ INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPer INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(cv::INTER_LINEAR), Values(szSmall128, szVGA, sz720p, sz1080p), Values(0.5, 0.25, 2), diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index aca2dcca6f..a2805b35aa 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -1866,10 +1866,11 @@ static inline double ratio(int inSz, int outSz) { } template -static inline void initScratchLinear(const cv::GMatDesc& in, - const Size& outSz, - cv::gapi::fluid::Buffer& scratch, - int lpi) { +CV_ALWAYS_INLINE void initScratchLinear(const cv::GMatDesc& in, + const Size& outSz, + cv::gapi::fluid::Buffer& scratch, + int lpi) +{ using alpha_type = typename Mapper::alpha_type; static const auto unity = Mapper::unity; @@ -1895,7 +1896,8 @@ static inline void initScratchLinear(const cv::GMatDesc& in, auto *clone = scr.clone; auto *index = scr.mapsx; - for (int x = 0; x < outSz.width; x++) { + for (int x = 0; x < outSz.width; ++x) + { auto map = Mapper::map(hRatio, 0, in.size.width, x); auto alpha0 = map.alpha0; auto index0 = map.index0; @@ -1930,7 +1932,7 @@ static inline void initScratchLinear(const cv::GMatDesc& in, alpha[x] = alpha0; index[x] = index0; - for (int l = 0; l < 4; l++) { + for (int l = 0; l < 4; ++l) { clone[4*x + l] = alpha0; } } @@ -1952,10 +1954,18 @@ struct MapperUnit { I index0, index1; }; -inline static uint8_t calc(short alpha0, uint8_t src0, short alpha1, uint8_t src1) { +CV_ALWAYS_INLINE uint8_t resize_calc_revert_fixedpoint(short alpha0, uint8_t src0, short alpha1, uint8_t src1) +{ constexpr static const int half = 1 << 14; return (src0 * alpha0 + src1 * alpha1 + half) >> 15; } + +CV_ALWAYS_INLINE float resize_main_calculation(float alpha0, float src0, float alpha1, float src1) +{ + return src0 * alpha0 + src1 * alpha1; +} + +namespace linear { struct Mapper { constexpr static const int ONE = 1 << 15; typedef short alpha_type; @@ -1980,11 +1990,38 @@ struct Mapper { return u; } }; +} // namespace linear + +namespace linear32f { +struct Mapper { + typedef float alpha_type; + typedef int index_type; + constexpr static const float unity = 1; + + typedef MapperUnit Unit; + + static inline Unit map(double ratio, int start, int max, int outCoord) { + float f = static_cast((outCoord + 0.5) * ratio - 0.5); + int s = cvFloor(f); + f -= s; + + Unit u; + + u.index0 = std::max(s - start, 0); + u.index1 = ((std::fabs(f) <= FLT_EPSILON) || s + 1 >= max) ? s - start : s - start + 1; + + u.alpha0 = 1.f - f; + u.alpha1 = f; + + return u; + } +}; +} // namespace linear32f template -static void calcRowLinearC(const cv::gapi::fluid::View & in, - cv::gapi::fluid::Buffer& out, - cv::gapi::fluid::Buffer& scratch) { +CV_ALWAYS_INLINE void calcRowLinearC(const cv::gapi::fluid::View & in, + cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) { using alpha_type = typename Mapper::alpha_type; auto inSz = in.meta().size; @@ -2052,14 +2089,74 @@ static void calcRowLinearC(const cv::gapi::fluid::View & in, for (int c = 0; c < numChan; c++) { auto idx0 = numChan*sx0 + c; auto idx1 = numChan*sx1 + c; - T tmp0 = calc(beta0, src0[l][idx0], beta1, src1[l][idx0]); - T tmp1 = calc(beta0, src0[l][idx1], beta1, src1[l][idx1]); - dst[l][numChan * x + c] = calc(alpha0, tmp0, alpha1, tmp1); + T tmp0 = resize_calc_revert_fixedpoint(beta0, src0[l][idx0], beta1, src1[l][idx0]); + T tmp1 = resize_calc_revert_fixedpoint(beta0, src0[l][idx1], beta1, src1[l][idx1]); + dst[l][numChan * x + c] = resize_calc_revert_fixedpoint(alpha0, tmp0, alpha1, tmp1); } } } } +template +CV_ALWAYS_INLINE void calcRowLinear(const cv::gapi::fluid::View& in, + cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) +{ + GAPI_DbgAssert((out.meta().depth == CV_32F) && (out.meta().chan == 1)); + + auto inSz = in.meta().size; + auto outSz = out.meta().size; + + auto inY = in.y(); + int length = out.length(); + int outY = out.y(); + int lpi = out.lpi(); + GAPI_DbgAssert(outY + lpi <= outSz.height); + + GAPI_DbgAssert(lpi <= 4); + + LinearScratchDesc scr(inSz.width, inSz.height, outSz.width, + outSz.height, scratch.OutLineB()); + + const auto* alpha = scr.alpha; + const auto* mapsx = scr.mapsx; + const auto* beta0 = scr.beta; + const auto* mapsy = scr.mapsy; + + const auto* beta = beta0 + outY; + const float* src0[4]; + const float* src1[4]; + float* dst[4]; + + for (int l = 0; l < lpi; ++l) + { + auto index0 = mapsy[outY + l] - inY; + auto index1 = mapsy[outSz.height + outY + l] - inY; + src0[l] = in.InLine(index0); + src1[l] = in.InLine(index1); + dst[l] = out.OutLine(l); + } + + using alpha_type = typename Mapper::alpha_type; + for (int l = 0; l < lpi; ++l) + { + constexpr static const auto unity = Mapper::unity; + + auto b0 = beta[l]; + auto b1 = saturate_cast(unity - beta[l]); + + for (int x = 0; x < length; ++x) { + auto alpha0 = alpha[x]; + auto alpha1 = saturate_cast(unity - alpha[x]); + auto sx0 = mapsx[x]; + auto sx1 = sx0 + 1; + float tmp0 = resize_main_calculation(b0, src0[l][sx0], b1, src1[l][sx0]); + float tmp1 = resize_main_calculation(b0, src0[l][sx1], b1, src1[l][sx1]); + dst[l][x] = resize_main_calculation(alpha0, tmp0, alpha1, tmp1); + } + } +} + GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) { static const int Window = 1; @@ -2071,9 +2168,12 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) constexpr static const short ONE = INTER_RESIZE_COEF_SCALE; static void initScratch(const cv::GMatDesc& in, - cv::Size outSz, double fx, double fy, int /*interp*/, + cv::Size outSz, double fx, double fy, int interp, cv::gapi::fluid::Buffer &scratch) - { + { + GAPI_Assert((in.depth == CV_8U && in.chan == 3) || + (in.depth == CV_32F && in.chan == 1)); + GAPI_Assert(interp == cv::INTER_LINEAR); int outSz_w; int outSz_h; if (outSz.width == 0 || outSz.height == 0) @@ -2088,32 +2188,44 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) } cv::Size outSize(outSz_w, outSz_h); - if (in.chan == 3) + if (in.depth == CV_8U && in.chan == 3) { - initScratchLinear(in, outSize, scratch, LPI); + initScratchLinear(in, outSize, scratch, LPI); } - else if (in.chan == 4) + else if (in.depth == CV_32F && in.chan == 1) { - initScratchLinear(in, outSize, scratch, LPI); + initScratchLinear(in, outSize, scratch, LPI); } - } + else + { + CV_Error(cv::Error::StsBadArg, "unsupported combination of type and number of channel"); + } + } static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {} - static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, double /*fy*/, int interp, - cv::gapi::fluid::Buffer& out, - cv::gapi::fluid::Buffer& scratch) { + static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, + double /*fy*/, int interp, cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) + { + GAPI_Assert((in.meta().depth == CV_8U && in.meta().chan == 3) || + (in.meta().depth == CV_32F && in.meta().chan == 1)); + GAPI_Assert(interp == cv::INTER_LINEAR); const int channels = in.meta().chan; - GAPI_Assert((channels == 3 || channels == 4) && (interp == cv::INTER_LINEAR)); + const int depth = in.meta().depth; - if (channels == 3) + if (depth == CV_8U && channels == 3) { - calcRowLinearC(in, out, scratch); + calcRowLinearC(in, out, scratch); } - else if (channels == 4) + else if (depth == CV_32F && channels == 1) { - calcRowLinearC(in, out, scratch); + calcRowLinear(in, out, scratch); + } + else + { + CV_Error(cv::Error::StsBadArg, "unsupported combination of type and number of channel"); } } }; From 632e07b74958415997facb07f01177940a082613 Mon Sep 17 00:00:00 2001 From: luz paz Date: Thu, 17 Mar 2022 16:06:54 -0400 Subject: [PATCH 30/84] Fix typos in tutorials within docs/ Fix various typos found within tutorial documentation --- doc/js_tutorials/js_assets/webnn-electron/package.json | 2 +- doc/js_tutorials/js_setup/js_setup/js_setup.markdown | 8 ++++---- doc/tutorials/calib3d/usac.markdown | 4 ++-- .../how_to_use_OpenCV_parallel_for_new.markdown | 2 +- .../dnn_halide_scheduling/dnn_halide_scheduling.markdown | 2 +- .../dnn/dnn_text_spotting/dnn_text_spotting.markdown | 4 ++-- .../porting_anisotropic_segmentation.markdown | 2 +- .../interactive_face_detection.markdown | 2 +- .../bounding_rects_circles.markdown | 2 +- .../dev_with_OCV_on_Android.markdown | 2 +- .../linux_gdb_pretty_printer.markdown | 6 +++--- .../ios/image_manipulation/image_manipulation.markdown | 2 +- 12 files changed, 19 insertions(+), 19 deletions(-) diff --git a/doc/js_tutorials/js_assets/webnn-electron/package.json b/doc/js_tutorials/js_assets/webnn-electron/package.json index e6a258ee40..9c3c817db7 100644 --- a/doc/js_tutorials/js_assets/webnn-electron/package.json +++ b/doc/js_tutorials/js_assets/webnn-electron/package.json @@ -1,7 +1,7 @@ { "name": "image_classification", "version": "0.0.1", - "description": "An Electon.js example of image_classification using webnn-native", + "description": "An Electron.js example of image_classification using webnn-native", "main": "main.js", "author": "WebNN-native Authors", "license": "Apache-2.0", diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown index 9927477443..2a7a111d8a 100644 --- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown +++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown @@ -97,10 +97,10 @@ Building OpenCV.js from Source @endcode @note - The loader is implemented as a js file in the path `/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the broswer and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application. + The loader is implemented as a js file in the path `/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the browser and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application. Example Code: - @code{.javascipt} + @code{.javascript} // Set paths configuration let pathsConfig = { wasm: "../../build_wasm/opencv.js", @@ -173,7 +173,7 @@ This snippet and the following require [Node.js](https://nodejs.org) to be insta ### Headless with Puppeteer -Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuos integration like travis CI, etc) +Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuous integration like travis CI, etc) @code{.sh} cd build_js/bin @@ -229,7 +229,7 @@ node tests.js The simd optimization is experimental as wasm simd is still in development. @note - Now only emscripten LLVM upstream backend supports wasm simd, refering to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first: + Now only emscripten LLVM upstream backend supports wasm simd, referring to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first: @code{.bash} ./emsdk update ./emsdk install latest-upstream diff --git a/doc/tutorials/calib3d/usac.markdown b/doc/tutorials/calib3d/usac.markdown index 27d590be3a..df9e25f907 100644 --- a/doc/tutorials/calib3d/usac.markdown +++ b/doc/tutorials/calib3d/usac.markdown @@ -244,9 +244,9 @@ Samples: There are three new sample files in opencv/samples directory. 1. `epipolar_lines.cpp` – input arguments of `main` function are two - pathes to images. Then correspondences are found using + paths to images. Then correspondences are found using SIFT detector. Fundamental matrix is found using RANSAC from - tentaive correspondences and epipolar lines are plot. + tentative correspondences and epipolar lines are plot. 2. `essential_mat_reconstr.cpp` – input arguments are path to data file containing image names and single intrinsic matrix and directory diff --git a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown index 5ef63ed6f4..57cec4cba1 100644 --- a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown +++ b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown @@ -92,7 +92,7 @@ We then fill value to the corresponding pixel in the dst image. ### Parallel implementation -When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallely perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us. +When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallelly perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us. @note Although values of a pixel in a particular stripe may depend on pixel values outside the stripe, these are only read only operations and hence will not cause undefined behaviour. diff --git a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown index 38324610be..6d2751a467 100644 --- a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown +++ b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown @@ -70,7 +70,7 @@ Sometimes networks built using blocked structure that means some layer are identical or quite similar. If you want to apply the same scheduling for different layers accurate to tiling or vectorization factors, define scheduling patterns in section `patterns` at the beginning of scheduling file. -Also, your patters may use some parametric variables. +Also, your patterns may use some parametric variables. @code # At the beginning of the file patterns: diff --git a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown index c2b3ec8d71..b6f4e120fb 100644 --- a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown +++ b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown @@ -29,8 +29,8 @@ Before recognition, you should `setVocabulary` and `setDecodeType`. - "CTC-prefix-beam-search", the output of the text recognition model should be a probability matrix same with "CTC-greedy". - The algorithm is proposed at Hannun's [paper](https://arxiv.org/abs/1408.2873). - `setDecodeOptsCTCPrefixBeamSearch` could be used to control the beam size in search step. - - To futher optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary - but only the number of `vocPruneSize` tokens with top probabilty. + - To further optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary + but only the number of `vocPruneSize` tokens with top probability. @ref cv::dnn::TextRecognitionModel::recognize() is the main function for text recognition. - The input image should be a cropped text image or an image with `roiRects` diff --git a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown index 60829360fe..64b68e644c 100644 --- a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown +++ b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown @@ -142,7 +142,7 @@ being a Graph API, doesn't force its users to do that. However, a graph is still built implicitly when a cv::GComputation object is defined. It may be useful to inspect how the resulting graph looks like to check if it is generated correctly and if it really -represents our alrogithm. It is also useful to learn the structure of +represents our algorithm. It is also useful to learn the structure of the graph to see if it has any redundancies. G-API allows to dump generated graphs to `.dot` files which then diff --git a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown index 6f8b03bb61..27916b4176 100644 --- a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown +++ b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown @@ -241,7 +241,7 @@ pipeline is compiled for streaming: cv::GComputation::compileStreaming() triggers a special video-oriented form of graph compilation where G-API is trying to optimize throughput. Result of this compilation is an object of special type -cv::GStreamingCompiled -- in constract to a traditional callable +cv::GStreamingCompiled -- in contrast to a traditional callable cv::GCompiled, these objects are closer to media players in their semantics. diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown index 520d8761eb..14b3105b68 100644 --- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown @@ -79,7 +79,7 @@ The main function is rather simple, as follows from the comments we do the follo In general callback functions are used to react to some kind of signal, in our case it's trackbar's state change. Explicit one-time call of `thresh_callback` is necessary to display - the "Contours" window simultaniously with the "Source" window. + the "Contours" window simultaneously with the "Source" window. @add_toggle_cpp @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp trackbar diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown index 5acdbc41ed..d37721a188 100644 --- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown +++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown @@ -240,7 +240,7 @@ taken: Hello OpenCV Sample ------------------- -Here are basic steps to guide you trough the process of creating a simple OpenCV-centric +Here are basic steps to guide you through the process of creating a simple OpenCV-centric application. It will be capable of accessing camera output, processing it and displaying the result. -# Open Eclipse IDE, create a new clean workspace, create a new Android project diff --git a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown index 9d64469920..b0b8d404a0 100644 --- a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown +++ b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown @@ -20,7 +20,7 @@ This pretty-printer can show element type, `is_continuous`, `is_submatrix` flags # Installation {#tutorial_linux_gdb_pretty_printer_installation} -Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convinient place, rename `gdbinit` to `.gdbinit` and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path. +Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convenient place, rename `gdbinit` to `.gdbinit` and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path. In order to check version of python bundled with your gdb, use the following commands from the gdb shell: @@ -34,5 +34,5 @@ If the version of python 3 installed in your system doesn't match the version in # Usage {#tutorial_linux_gdb_pretty_printer_usage} -The fields in a debugger prefixed with `view_` are pseudo-fields added for convinience, the rest are left as is. -If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controlls everything matrix display-related. +The fields in a debugger prefixed with `view_` are pseudo-fields added for convenience, the rest are left as is. +If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controls everything matrix display-related. diff --git a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown index f01aa6e4f8..57f34e8e4c 100644 --- a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown +++ b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown @@ -22,7 +22,7 @@ Introduction In *OpenCV* all the image processing operations are usually carried out on the *Mat* structure. In iOS however, to render an image on screen it have to be an instance of the *UIImage* class. To convert an *OpenCV Mat* to an *UIImage* we use the *Core Graphics* framework available in iOS. Below -is the code needed to covert back and forth between Mat's and UIImage's. +is the code needed to convert back and forth between Mat's and UIImage's. @code{.m} - (cv::Mat)cvMatFromUIImage:(UIImage *)image { From b6b5c27cecad2a93f65d8ba81ba2c88c6c103784 Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Fri, 18 Mar 2022 10:19:13 +0800 Subject: [PATCH 31/84] Support for some reduce layers for onnx --- .../dnn/include/opencv2/dnn/all_layers.hpp | 14 + modules/dnn/src/init.cpp | 2 + modules/dnn/src/int8layers/reduce_layer.cpp | 213 ++++++++++ modules/dnn/src/layers/reduce_layer.cpp | 388 ++++++++++++++++++ modules/dnn/src/net_quantization.cpp | 4 +- modules/dnn/src/onnx/onnx_importer.cpp | 308 +++++++------- ...er_filter_opencv_ocl_fp16_denylist.inl.hpp | 11 + ..._conformance_layer_parser_denylist.inl.hpp | 53 --- 8 files changed, 796 insertions(+), 197 deletions(-) create mode 100644 modules/dnn/src/int8layers/reduce_layer.cpp create mode 100644 modules/dnn/src/layers/reduce_layer.cpp diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 44b16f7800..c8c14759d3 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -325,6 +325,20 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS ReduceLayer : public Layer + { + public: + int reduceType; + std::vector reduceDims; + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ReduceLayerInt8 : public ReduceLayer + { + public: + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS SoftmaxLayer : public Layer { public: diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 55ed1e5d17..86ceba382e 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -92,6 +92,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Pooling, PoolingLayer); CV_DNN_REGISTER_LAYER_CLASS(ROIPooling, PoolingLayer); CV_DNN_REGISTER_LAYER_CLASS(PSROIPooling, PoolingLayer); + CV_DNN_REGISTER_LAYER_CLASS(Reduce, ReduceLayer); CV_DNN_REGISTER_LAYER_CLASS(LRN, LRNLayer); CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer); CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer); @@ -175,6 +176,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8, ConvolutionLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8, PoolingLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ReduceInt8, ReduceLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(EltwiseInt8, EltwiseLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(BatchNormInt8, BatchNormLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(ScaleInt8, ScaleLayerInt8); diff --git a/modules/dnn/src/int8layers/reduce_layer.cpp b/modules/dnn/src/int8layers/reduce_layer.cpp new file mode 100644 index 0000000000..935bdc0659 --- /dev/null +++ b/modules/dnn/src/int8layers/reduce_layer.cpp @@ -0,0 +1,213 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include +#include +#include + +namespace cv +{ +namespace dnn +{ + +class ReduceLayerInt8Impl CV_FINAL : public ReduceLayerInt8 +{ +public: + ReduceLayerInt8Impl(const LayerParams& params) + { + // Set reduce type + CV_Assert(params.has("reduce")); + String typeString = toLowerCase(params.get("reduce")); + if (typeString == "max") + reduceType = MAX; + else if (typeString == "min") + reduceType = MIN; + else + CV_Error(Error::StsBadArg, "Unknown reduce type \"" + typeString + "\""); + + // Set deleted dims + CV_Assert(params.has("deleted_dims")); + DictValue tempDims = params.get("deleted_dims"); + int i, n = tempDims.size(); + reduceDims.resize(n); + for (i = 0; i < n; i++) + { + reduceDims[i] = tempDims.get(i); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_OPENCV) + { + return true; + } + return false; + } + + // reduceType == MIN + struct ReduceOpMIN + { + int8_t apply(const int8_t* first, const int8_t* last) + { + return std::accumulate(first, last, *first, + [](int8_t a, int8_t b) + { + return std::min(a, b); + }); + } + }; + + // reduceType == MAX + struct ReduceOpMAX + { + int8_t apply(const int8_t* first, const int8_t* last) + { + return std::accumulate(first, last, *first, + [](int8_t a, int8_t b) + { + return std::max(a, b); + }); + } + }; + + template + class ReduceInvoker : public ParallelLoopBody + { + public: + const Mat* src; + Mat *dst; + std::vector reduceDims; + int nstripes; + int reduceType; + Ptr func; + + ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr()) {} + + static void run(const Mat& src, Mat& dst, std::vector reduceDims, int reduceType, int nstripes) + { + CV_Assert_N(src.isContinuous(), dst.isContinuous(), src.type() == CV_8S, src.type() == dst.type()); + + ReduceInvoker p; + + p.src = &src; + p.dst = &dst; + + p.reduceDims = reduceDims; + p.nstripes = nstripes; + p.reduceType = reduceType; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + size_t total = dst->total(); + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + + int8_t *dstData = (int8_t *)dst->data; + int8_t *srcData = (int8_t *)src->data; + + for (size_t ofs = stripeStart; ofs < stripeEnd;) + { + const int8_t* first = srcData + ofs * totalDeleted; + const int8_t* last = srcData + (ofs + 1) * totalDeleted; + + dstData[ofs] = func->apply(first, last); + ofs += 1; + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + CV_Assert(inputs.size() == 1); + const int nstripes = getNumThreads(); + + switch (reduceType) + { + case MIN: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case MAX: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() > 0); + CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size()); + + std::vector outShape; + if (inputs[0].size() == reduceDims.size()) + outShape.push_back(1); + else + { + for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++) + { + outShape.push_back(inputs[0][i]); + } + } + outputs.assign(1, outShape); + + return false; + } + + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return false; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + for (int i = 0; i < outputs.size(); i++) + { + flops += total(outputs[i])*(totalDeleted); + } + return flops; + } +private: + enum Type + { + MAX, + MIN + }; +}; + +Ptr ReduceLayerInt8::create(const LayerParams& params) +{ + return Ptr(new ReduceLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/layers/reduce_layer.cpp b/modules/dnn/src/layers/reduce_layer.cpp new file mode 100644 index 0000000000..62bb65f897 --- /dev/null +++ b/modules/dnn/src/layers/reduce_layer.cpp @@ -0,0 +1,388 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "../op_cuda.hpp" +#include "../op_webnn.hpp" + +#include +#include +#include +using std::max; +using std::min; + +#include + +namespace cv +{ +namespace dnn +{ + +class ReduceLayerImpl CV_FINAL : public ReduceLayer +{ +public: + ReduceLayerImpl(const LayerParams& params) + { + // set reduce type + CV_Assert(params.has("reduce")); + String typeString = toLowerCase(params.get("reduce")); + if (typeString == "max") + reduceType= MAX; + else if (typeString == "min") + reduceType= MIN; + else if (typeString == "ave") + reduceType= AVE; + else if (typeString == "sum") + reduceType= SUM; + else if (typeString == "sum_square") + reduceType= SUM_SQUARE; + else if (typeString == "l1") + reduceType= L1; + else if (typeString == "l2") + reduceType= L2; + else if (typeString == "log_sum") + reduceType= LOG_SUM; + else if (typeString == "log_sum_exp") + reduceType= LOG_SUM_EXP; + else if (typeString == "prod") + reduceType= PROD; + else + CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\""); + + // set deleted dims + CV_Assert(params.has("deleted_dims")); + DictValue tempDims = params.get("deleted_dims"); + int i, n = tempDims.size(); + reduceDims.resize(n); + for (i = 0; i < n; i++) + { + reduceDims[i] = tempDims.get(i); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_OPENCV) + { + return true; + } + return false; + } + + // reduceType == MIN + struct ReduceOpMIN + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, FLT_MAX, + [](float a, float b) + { + return std::min(a, b); + }); + } + }; + + // reduceType == MAX + struct ReduceOpMAX + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, -FLT_MAX, + [](float a, float b) + { + return std::max(a, b); + }); + } + }; + + // reduceType == SUM + struct ReduceOpSUM + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f); + } + }; + + // reduceType == AVE + struct ReduceOpAVE + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.f); + return output * ikarea; + } + }; + + // reduceType == SUM_SQUARE + struct ReduceOpSUM_SQUARE + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + b * b; + }); + } + }; + + // reduceType == L1 + struct ReduceOpL1 + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + std::abs(b); + }); + } + }; + + // reduceType == L2 + struct ReduceOpL2 + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + b * b; + }); + return std::sqrt(output); + } + }; + + // reduceType == PROD + struct ReduceOpPROD + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 1.0f, std::multiplies()); + } + }; + + // reduceType == LOG_SUM + struct ReduceOpLOG_SUM + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.0f); + return std::log(output); + } + }; + + // reduceType == LOG_SUM_EXP + struct ReduceOpLOG_SUM_EXP + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.0f, + [](float a, float b) + { + return a + std::exp(b); + }); + return std::log(output); + } + }; + + template + class ReduceInvoker : public ParallelLoopBody + { + public: + const Mat* src; + Mat *dst; + std::vector reduceDims; + int nstripes; + int reduceType; + Ptr func; + + ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr()) {} + + static void run(const Mat& src, Mat& dst, std::vector reduceDims, int reduceType, int nstripes) + { + CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type()); + + ReduceInvoker p; + + p.src = &src; + p.dst = &dst; + + p.reduceDims = reduceDims; + p.nstripes = nstripes; + p.reduceType = reduceType; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + size_t total = dst->total(); + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + + float *dstData = (float *)dst->data; + float *srcData = (float *)src->data; + + for (size_t ofs = stripeStart; ofs < stripeEnd;) + { + const float* first = srcData + ofs * stride_w; + const float* last = srcData + (ofs + 1) * stride_w; + + if (ofs < stripeEnd) + { + dstData[ofs] = func->apply(first, last, 1.0 / stride_w); + ofs += 1; + } + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (inputs_arr.depth() == CV_16S) + { + forward_fallback(inputs_arr, outputs_arr, internals_arr); + return; + } + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM)); + const int nstripes = getNumThreads(); + + switch (reduceType) + { + case MIN: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case MAX: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case AVE: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case SUM: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case L1: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case L2: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case SUM_SQUARE: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case PROD: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case LOG_SUM: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case LOG_SUM_EXP: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() > 0); + CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size()); + + std::vector outShape; + if (inputs[0].size() == reduceDims.size()) + outShape.push_back(1); + else + { + for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++) + { + outShape.push_back(inputs[0][i]); + } + } + outputs.assign(1, outShape); + + return false; + } + + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (reduceType== MAX || reduceType== MIN) + { + return true; + } + return false; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + for (int i = 0; i < outputs.size(); i++) + { + flops += total(outputs[i])*(stride_w); + } + return flops; + } +private: + enum ReduceType + { + MAX, + MIN, + AVE, + SUM, + L1, + L2, + PROD, + SUM_SQUARE, + LOG_SUM, + LOG_SUM_EXP + }; +}; + +Ptr ReduceLayer::create(const LayerParams& params) +{ + return Ptr(new ReduceLayerImpl(params)); +} + +} +} diff --git a/modules/dnn/src/net_quantization.cpp b/modules/dnn/src/net_quantization.cpp index b8ee2d3219..ef1857a8e2 100644 --- a/modules/dnn/src/net_quantization.cpp +++ b/modules/dnn/src/net_quantization.cpp @@ -133,7 +133,9 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || - (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) + (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) || /* ReLU with negative slope 0 */ + (ld.type == "Reduce" && (toLowerCase(ld.params.get("reduce")) == "max" || + toLowerCase(ld.params.get("reduce")) == "min"))) { for (int i = 0; i < ld.outputBlobs.size(); i++) { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 62569d8b50..5713c025be 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -122,6 +122,7 @@ private: void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -1087,7 +1088,7 @@ void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx: addLayer(layerParams, node_proto); } -void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +void ONNXImporter::parseGlobalPool(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); @@ -1096,157 +1097,176 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node CV_Assert(node_proto.input_size() == 1); layerParams.type = "Pooling"; String pool; - if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") + if (layer_type == "GlobalMaxPool") pool = "MAX"; - else if (layer_type == "ReduceSum") - pool = "SUM"; - else + else if (layer_type == "GlobalAveragePool") pool = "AVE"; + else + CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); + + CV_Assert(!layerParams.has("axes")); + layerParams.set("global_pooling", true); layerParams.set("pool", pool); - layerParams.set("global_pooling", !layerParams.has("axes")); - bool keepdims = layerParams.get("keepdims", 1) == 1; - if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + const std::string output_name = node_proto.output(0); + int depth = layerParams.get("depth", CV_32F); + + CV_Assert(node_proto.input_size() <= 2); + String reduceType; + + if (layer_type == "ReduceMax") + reduceType = "MAX"; + else if (layer_type == "ReduceMin") + reduceType = "MIN"; + else if (layer_type == "ReduceSum") + reduceType = "SUM"; + else if (layer_type == "ReduceSumSquare") + reduceType = "SUM_SQUARE"; + else if (layer_type == "ReduceProd") + reduceType = "PROD"; + else if (layer_type == "ReduceL1") + reduceType = "L1"; + else if (layer_type == "ReduceL2") + reduceType = "L2"; + else if (layer_type == "ReduceLogSum") + reduceType = "LOG_SUM"; + else if (layer_type == "ReduceLogSumExp") + reduceType = "LOG_SUM_EXP"; + else if (layer_type == "ReduceMean") + reduceType = "AVE"; + else + CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); + + // The ReduceInt8 can only support "MAX" and "MIN". + if (depth == CV_8S) + { + CV_Assert(reduceType == "MAX" || reduceType == "MIN"); + } + + layerParams.type = (depth == CV_8S) ? "ReduceInt8" : "Reduce"; + layerParams.set("reduce", reduceType); + bool keepdims = layerParams.get("keepdims", 1) == 1; + + if (layer_type == "ReduceSum" && node_proto.input_size() == 2) + { + // TODO support the opset 13 of ReduceSum. + // in opset 13, the ReduceSum has two input, it takes axes as input instead of attribute + // details:https://github.com/onnx/onnx/issues/3420#issuecomment-844295687 + CV_Error(Error::StsNotImplemented, "Unsupported " + layer_type + " operation of opset 13, please try to " + "re-export the onnx model with opset 11."); + } + + MatShape inpShape = outShapes[node_proto.input(0)]; + std::vector shouldDelete(inpShape.size(), false); + + if (layerParams.has("axes")) { - MatShape inpShape = outShapes[node_proto.input(0)]; DictValue axes = layerParams.get("axes"); - MatShape targetShape; - std::vector shouldDelete(inpShape.size(), false); - for (int i = 0; i < axes.size(); i++) { + for (int i = 0; i < axes.size(); i++) + { int axis = normalize_axis(axes.get(i), inpShape.size()); shouldDelete[axis] = true; } - for (int axis = 0; axis < inpShape.size(); ++axis){ - if (!shouldDelete[axis]) - targetShape.push_back(inpShape[axis]); - else if (keepdims) - targetShape.push_back(1); - } - - if (inpShape.size() == 3 && axes.size() <= 2) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - CV_CheckNE(axis, 0, ""); - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams avgLp; - avgLp.name = layerParams.name + "/avg"; - avgLp.type = "Pooling"; - CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); - avgLp.set("pool", pool); - if (axes.size() == 2) - { - CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); - CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); - avgLp.set("global_pooling", true); - } - else - { - avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); - avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); - } - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, avgLp.name); - addLayer(avgLp, node_proto); - } - else - { - if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); - - CV_Assert(axes.size() <= inpShape.size() - 2); - std::vector kernel_size(inpShape.size() - 2, 1); - if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - MatShape newShape = inpShape; - newShape[axis + 1] = total(newShape, axis + 1); - newShape.resize(axis + 2); - newShape.insert(newShape.begin(), 2 - axis, 1); - - LayerParams reshapeLp; - reshapeLp.type = "Reshape"; - reshapeLp.name = layerParams.name + "/reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - - node_proto.set_output(0, reshapeLp.name); - addLayer(reshapeLp, node_proto); - - kernel_size.resize(2); - kernel_size[0] = inpShape[axis]; - node_proto.set_input(0, node_proto.output(0)); - } - else - { - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); - kernel_size[axis - 2] = inpShape[axis]; - } - } - - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/avg"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); - - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - } - - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); - - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, output_name); } - else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + else { - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - const size_t dims = keepdims ? shapeIt->second.size() : 1; - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - int newShape[] = {1, 1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/pool"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - - layerParams.type = "Reshape"; - std::vector targetShape(dims, 1); - layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size())); - - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, output_name); + for (int i = 0; i < inpShape.size(); i++) + { + shouldDelete[i] = true; + } } + + MatShape targetShape; + for (int i = 0; i < inpShape.size(); ++i) + { + if (!shouldDelete[i]) + { + targetShape.push_back(inpShape[i]); + } + else if (keepdims) + { + targetShape.push_back(1); + } + } + + if (targetShape.empty()) + targetShape.push_back(1); + + // Using PermuteLayer to move the deleted axis to the last. + std::vector perm(inpShape.size(), 0); + for (int i = 0; i < inpShape.size(); i++) + perm[i] = i; + + bool needPermuet = false; + for (int i = 0; i < inpShape.size(); i++) + { + if (shouldDelete[i]) + { + // find the first not deleted element. + std::vector::iterator iter = std::find(shouldDelete.begin() + i, shouldDelete.end(), false); + + if (iter != shouldDelete.end()) + { + int index = iter - shouldDelete.begin(); + + bool temp = shouldDelete[index]; + shouldDelete[index] = shouldDelete[i]; + shouldDelete[i] = temp; + + std::swap(perm[index], perm[i]); + std::swap(inpShape[index], inpShape[i]); + needPermuet = true; + } + else + break; + } + } + + auto inputString= node_proto.input(0); + if (needPermuet) + { + LayerParams permuteLp; + permuteLp.name = layerParams.name + "/permute"; + permuteLp.type = (depth == CV_8S) ? "PermuteInt8" : "Permute"; + permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size())); + + opencv_onnx::NodeProto protoPermute; + protoPermute.add_input(inputString); + protoPermute.add_output(permuteLp.name); + addLayer(permuteLp, protoPermute); + inputString = permuteLp.name; + } + + std::vector deletedDims; + for (int axis_i = 0; axis_i < inpShape.size(); ++axis_i) + { + if (shouldDelete[axis_i]) + { + deletedDims.push_back(inpShape[axis_i]); + } + } + + LayerParams reduceLp = layerParams; + reduceLp.name = layerParams.name + "/reduce"; + CV_Assert(layer_id.find(reduceLp.name) == layer_id.end()); + reduceLp.set("deleted_dims", DictValue::arrayInt(&deletedDims[0], deletedDims.size())); + + node_proto.set_input(0, inputString); + node_proto.set_output(0, reduceLp.name); + addLayer(reduceLp, node_proto); + + layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); + + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, output_name); + addLayer(layerParams, node_proto); } @@ -3406,8 +3426,10 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool; dispatch["MaxPool"] = &ONNXImporter::parseMaxPool; dispatch["AveragePool"] = &ONNXImporter::parseAveragePool; - dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = - dispatch["ReduceMax"] = &ONNXImporter::parseReduce; + dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = &ONNXImporter::parseGlobalPool; + dispatch["ReduceMax"] = dispatch["ReduceMin"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = dispatch["ReduceMax"] = + dispatch["ReduceMin"] = dispatch["ReduceSumSquare"] = dispatch["ReduceProd"] = dispatch["ReduceL1"] = + dispatch["ReduceL2"] = dispatch["ReduceLogSum"] = dispatch["ReduceLogSumExp"] = &ONNXImporter::parseReduce; dispatch["Slice"] = &ONNXImporter::parseSlice; dispatch["Split"] = &ONNXImporter::parseSplit; dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias; diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp index ccd1568845..c2425d469f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp @@ -20,3 +20,14 @@ "test_split_equal_parts_2d", "test_split_equal_parts_default_axis", "test_tan", +"test_reduce_l2_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: 0.00490189 vs 0.004 +"test_reduce_log_sum_exp_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: 0.00671387 vs 0.004 +"test_reduce_prod_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: inf vs 0.004 +"test_reduce_prod_default_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 18.6621 vs 0.004, Expected: (normInf) <= (lInf), actual: 18.6621 vs 0.02 +"test_reduce_prod_do_not_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_prod_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_prod_negative_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_sum_square_default_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.0183411 vs 0.004 +"test_reduce_sum_square_do_not_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 +"test_reduce_sum_square_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 +"test_reduce_sum_square_negative_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 \ No newline at end of file diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index e5d0ead9da..eef421491c 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -339,51 +339,6 @@ "test_range_int32_type_negative_delta_expanded", "test_reciprocal", "test_reciprocal_example", -"test_reduce_l1_default_axes_keepdims_example", -"test_reduce_l1_default_axes_keepdims_random", -"test_reduce_l1_do_not_keepdims_example", -"test_reduce_l1_do_not_keepdims_random", -"test_reduce_l1_keep_dims_example", -"test_reduce_l1_keep_dims_random", -"test_reduce_l1_negative_axes_keep_dims_example", -"test_reduce_l1_negative_axes_keep_dims_random", -"test_reduce_l2_default_axes_keepdims_example", -"test_reduce_l2_default_axes_keepdims_random", -"test_reduce_l2_do_not_keepdims_example", -"test_reduce_l2_do_not_keepdims_random", -"test_reduce_l2_keep_dims_example", -"test_reduce_l2_keep_dims_random", -"test_reduce_l2_negative_axes_keep_dims_example", -"test_reduce_l2_negative_axes_keep_dims_random", -"test_reduce_log_sum", -"test_reduce_log_sum_asc_axes", -"test_reduce_log_sum_default", -"test_reduce_log_sum_desc_axes", -"test_reduce_log_sum_exp_default_axes_keepdims_example", -"test_reduce_log_sum_exp_default_axes_keepdims_random", -"test_reduce_log_sum_exp_do_not_keepdims_example", -"test_reduce_log_sum_exp_do_not_keepdims_random", -"test_reduce_log_sum_exp_keepdims_example", -"test_reduce_log_sum_exp_keepdims_random", -"test_reduce_log_sum_exp_negative_axes_keepdims_example", -"test_reduce_log_sum_exp_negative_axes_keepdims_random", -"test_reduce_log_sum_negative_axes", -"test_reduce_min_default_axes_keepdims_example", -"test_reduce_min_default_axes_keepdims_random", -"test_reduce_min_do_not_keepdims_example", -"test_reduce_min_do_not_keepdims_random", -"test_reduce_min_keepdims_example", -"test_reduce_min_keepdims_random", -"test_reduce_min_negative_axes_keepdims_example", -"test_reduce_min_negative_axes_keepdims_random", -"test_reduce_prod_default_axes_keepdims_example", -"test_reduce_prod_default_axes_keepdims_random", -"test_reduce_prod_do_not_keepdims_example", -"test_reduce_prod_do_not_keepdims_random", -"test_reduce_prod_keepdims_example", -"test_reduce_prod_keepdims_random", -"test_reduce_prod_negative_axes_keepdims_example", -"test_reduce_prod_negative_axes_keepdims_random", "test_reduce_sum_default_axes_keepdims_example", "test_reduce_sum_default_axes_keepdims_random", "test_reduce_sum_do_not_keepdims_example", @@ -394,14 +349,6 @@ "test_reduce_sum_keepdims_random", "test_reduce_sum_negative_axes_keepdims_example", "test_reduce_sum_negative_axes_keepdims_random", -"test_reduce_sum_square_default_axes_keepdims_example", -"test_reduce_sum_square_default_axes_keepdims_random", -"test_reduce_sum_square_do_not_keepdims_example", -"test_reduce_sum_square_do_not_keepdims_random", -"test_reduce_sum_square_keepdims_example", -"test_reduce_sum_square_keepdims_random", -"test_reduce_sum_square_negative_axes_keepdims_example", -"test_reduce_sum_square_negative_axes_keepdims_random", "test_reflect_pad", "test_reshape_allowzero_reordered", "test_reshape_extended_dims", From ee9fe1239a6c362071e9da9f1f0768abe082e809 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Thu, 17 Mar 2022 21:33:50 +0300 Subject: [PATCH 32/84] GAPI Fluid: Enable dynamic dispatching for the Sub kernel. --- .../perf/common/gapi_core_perf_tests_inl.hpp | 6 +- .../perf/cpu/gapi_core_perf_tests_fluid.cpp | 6 +- .../gapi/src/backends/fluid/gfluidcore.cpp | 149 ++------------ .../fluid/gfluidcore_func.dispatch.cpp | 27 +++ .../src/backends/fluid/gfluidcore_func.hpp | 22 ++ .../backends/fluid/gfluidcore_func.simd.hpp | 189 +++++++++++++----- 6 files changed, 204 insertions(+), 195 deletions(-) diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index c644fd1587..83ef13008c 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -436,7 +436,7 @@ PERF_TEST_P_(DivPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need to workaround issue in the OpenCV. + //This condition need to workaround the #21044 issue in the OpenCV. //It reinitializes divider matrix without zero values for CV_16S DST type. if (dtype == CV_16S && dtype != type) cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255)); @@ -482,7 +482,7 @@ PERF_TEST_P_(DivCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need as workaround the issue in the OpenCV. + //This condition need to workaround the #21044 issue in the OpenCV. //It reinitializes divider scalar without zero values for CV_16S DST type. if (dtype == CV_16S || (type == CV_16S && dtype == -1)) cv::randu(sc, cv::Scalar::all(1), cv::Scalar::all(SHRT_MAX)); @@ -528,7 +528,7 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need as workaround the bug in the OpenCV. + //This condition need to workaround the #21044 issue in the OpenCV. //It reinitializes divider matrix without zero values for CV_16S DST type. if (dtype == CV_16S || (type == CV_16S && dtype == -1)) cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255)); diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index d91ce65fff..e4b8c0b490 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -40,10 +40,10 @@ INSTANTIATE_TEST_CASE_P(AddCPerfTestFluid, AddCPerfTest, Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(SubPerfTestFluid, SubPerfTest, - Combine(Values(AbsExact().to_compare_f()), + Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 0).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest, diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 866381f39b..c5cfc19d48 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -378,141 +378,11 @@ CV_ALWAYS_INLINE int absdiff_simd(const T in1[], const T in2[], T out[], int len return 0; } - -template -CV_ALWAYS_INLINE int sub_simd_sametype(const T in1[], const T in2[], T out[], int length) -{ - constexpr int nlanes = static_cast(VT::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - VT a = vx_load(&in1[x]); - VT b = vx_load(&in2[x]); - vx_store(&out[x], a - b); - } - - if (x < length && (in1 != out) && (in2 != out)) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; -} - -template -CV_ALWAYS_INLINE int sub_simd(const SRC in1[], const SRC in2[], DST out[], int length) -{ - if (std::is_same::value && !std::is_same::value) - return 0; - - if (std::is_same::value) - { - if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_int16 a1 = vx_load(reinterpret_cast(&in1[x])); - v_int16 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 2])); - v_int16 b1 = vx_load(reinterpret_cast(&in2[x])); - v_int16 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 2])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(a1 - b1, a2 - b2)); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vx_load(reinterpret_cast(&in1[x])); - v_float32 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 4])); - v_float32 a3 = vx_load(reinterpret_cast(&in1[x + 2 * nlanes / 4])); - v_float32 a4 = vx_load(reinterpret_cast(&in1[x + 3 * nlanes / 4])); - - v_float32 b1 = vx_load(reinterpret_cast(&in2[x])); - v_float32 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 4])); - v_float32 b3 = vx_load(reinterpret_cast(&in2[x + 2 * nlanes / 4])); - v_float32 b4 = vx_load(reinterpret_cast(&in2[x + 3 * nlanes / 4])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(v_pack(v_round(a1 - b1), v_round(a2 - b2)), - v_pack(v_round(a3 - b3), v_round(a4 - b4)))); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - - return 0; -} #endif // CV_SIMD template CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const View &src2, - Arithm arithm, double scale=1) + Arithm arithm, double scale=1) { static_assert(std::is_same::value, "wrong types"); @@ -607,10 +477,19 @@ GAPI_FLUID_KERNEL(GFluidSub, cv::gapi::core::GSub, false) { // DST SRC1 SRC2 OP __VA_ARGS__ BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(float, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(float, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index d80a6b29c0..c235991fba 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -317,6 +317,33 @@ ADD_SIMD(float, float) #undef ADD_SIMD +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + \ + CV_CPU_DISPATCH(sub_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 052adbe2fd..3a5d70a045 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -244,6 +244,28 @@ ADD_SIMD(float, float) #undef ADD_SIMD +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index 4c324daa25..c148f81e77 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -253,6 +253,28 @@ ADD_SIMD(float, float) #undef ADD_SIMD +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -2530,32 +2552,43 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], // Fluid kernels: Add // //------------------------- +template +CV_ALWAYS_INLINE VT oper(add_tag, const VT& a, const VT& b) +{ + return a + b; +} -CV_ALWAYS_INLINE void add_uchar_store(uchar* outx, const v_uint16& c1, const v_uint16& c2) +template +CV_ALWAYS_INLINE VT oper(sub_tag, const VT& a, const VT& b) +{ + return a - b; +} + +CV_ALWAYS_INLINE void pack_store_uchar(uchar* outx, const v_uint16& c1, const v_uint16& c2) { vx_store(outx, v_pack(c1, c2)); } -CV_ALWAYS_INLINE void add_uchar_store(uchar* outx, const v_int16& c1, const v_int16& c2) +CV_ALWAYS_INLINE void pack_store_uchar(uchar* outx, const v_int16& c1, const v_int16& c2) { vx_store(outx, v_pack_u(c1, c2)); } -template +template CV_ALWAYS_INLINE typename std::enable_if::value, void>::type -add_simd_impl(const SRC* in1x, const SRC* in2x, DST* outx) +arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, DST* outx) { vector_type_of_t a = vx_load(in1x); vector_type_of_t b = vx_load(in2x); - vx_store(outx, a + b); + vx_store(outx, oper(op, a, b)); } -template +template CV_ALWAYS_INLINE typename std::enable_if::value || std::is_same::value, void>::type -add_simd_impl(const SRC* in1x, const SRC* in2x, uchar* outx) +arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, uchar* outx) { constexpr int nlanes = v_uint8::nlanes; @@ -2564,10 +2597,12 @@ add_simd_impl(const SRC* in1x, const SRC* in2x, uchar* outx) vector_type_of_t b1 = vx_load(in2x); vector_type_of_t b2 = vx_load(&in2x[nlanes / 2]); - add_uchar_store(outx, a1 + b1, a2 + b2); + pack_store_uchar(outx, oper(op, a1, b1), oper(op, a2, b2)); } -CV_ALWAYS_INLINE void add_simd_impl(const float* in1x, const float* in2x, uchar* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const float* in1x, + const float* in2x, uchar* outx) { constexpr int nlanes = v_uint8::nlanes; @@ -2581,31 +2616,35 @@ CV_ALWAYS_INLINE void add_simd_impl(const float* in1x, const float* in2x, uchar* v_float32 b3 = vx_load(&in2x[2 * nlanes / 4]); v_float32 b4 = vx_load(&in2x[3 * nlanes / 4]); - vx_store(outx, v_pack_u(v_pack(v_round(a1 + b1), v_round(a2 + b2)), - v_pack(v_round(a3 + b3), v_round(a4 + b4)))); + vx_store(outx, v_pack_u(v_pack(v_round(oper(op, a1, b1)), v_round(oper(op, a2, b2))), + v_pack(v_round(oper(op, a3, b3)), v_round(oper(op, a4, b4))))); } -CV_ALWAYS_INLINE void add_simd_impl(const uchar* in1x, const uchar* in2x, short* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const uchar* in1x, + const uchar* in2x, short* outx) { v_int16 a = v_reinterpret_as_s16(vx_load_expand(in1x)); v_int16 b = v_reinterpret_as_s16(vx_load_expand(in2x)); - vx_store(outx, a + b); + vx_store(outx, oper(op, a, b)); } -CV_ALWAYS_INLINE void add_simd_impl(const uchar* in1x, const uchar* in2x, ushort* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const uchar* in1x, + const uchar* in2x, ushort* outx) { v_uint16 a = vx_load_expand(in1x); v_uint16 b = vx_load_expand(in2x); - vx_store(outx, a + b); + vx_store(outx, oper(op, a, b)); } -template +template CV_ALWAYS_INLINE typename std::enable_if::value || std::is_same::value, void>::type -add_simd_impl(const float* in1x, const float* in2x, DST* outx) +arithmOp_simd_impl(oper_tag op, const float* in1x, const float* in2x, DST* outx) { constexpr int nlanes = vector_type_of_t::nlanes; v_float32 a1 = vx_load(in1x); @@ -2613,10 +2652,12 @@ add_simd_impl(const float* in1x, const float* in2x, DST* outx) v_float32 b1 = vx_load(in2x); v_float32 b2 = vx_load(&in2x[nlanes/2]); - v_store_i16(outx, v_round(a1 + b1), v_round(a2 + b2)); + v_store_i16(outx, v_round(oper(op, a1, b1)), v_round(oper(op, a2, b2))); } -CV_ALWAYS_INLINE void add_simd_impl(const short* in1x, const short* in2x, ushort* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const short* in1x, + const short* in2x, ushort* outx) { v_int16 a = vx_load(in1x); v_int32 a1 = v_expand_low(a); @@ -2626,57 +2667,66 @@ CV_ALWAYS_INLINE void add_simd_impl(const short* in1x, const short* in2x, ushort v_int32 b1 = v_expand_low(b); v_int32 b2 = v_expand_high(b); - vx_store(outx, v_pack_u(a1 + b1, a2 + b2)); + vx_store(outx, v_pack_u(oper(op, a1, b1), oper(op, a2, b2))); } -CV_ALWAYS_INLINE void add_simd_impl(const ushort* in1x, const ushort* in2x, short* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const ushort* in1x, + const ushort* in2x, short* outx) { - v_uint16 a = vx_load(in1x); - v_uint32 a1 = v_expand_low(a); - v_uint32 a2 = v_expand_high(a); + v_int16 a = v_reinterpret_as_s16(vx_load(in1x)); + v_int32 a1 = v_expand_low(a); + v_int32 a2 = v_expand_high(a); - v_uint16 b = vx_load(in2x); - v_uint32 b1 = v_expand_low(b); - v_uint32 b2 = v_expand_high(b); + v_int16 b = v_reinterpret_as_s16(vx_load(in2x)); + v_int32 b1 = v_expand_low(b); + v_int32 b2 = v_expand_high(b); - vx_store(outx, v_reinterpret_as_s16(v_pack(a1 + b1, a2 + b2))); + vx_store(outx, v_pack(oper(op, a1, b1), oper(op, a2, b2))); } -template -CV_ALWAYS_INLINE void add_simd_impl(const SRC* in1x, const SRC* in2x, float* outx) +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, float* outx) { v_float32 a = vg_load_f32(in1x); v_float32 b = vg_load_f32(in2x); - vx_store(outx, a + b); + vx_store(outx, oper(op, a, b)); +} + +template +CV_ALWAYS_INLINE int arithmOp_simd(oper_tag op, const SRC in1[], const SRC in2[], + DST out[], const int length) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + if (length < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + arithmOp_simd_impl(op, &in1[x], &in2[x], &out[x]); + } + + if (x < length) + { + x = length - nlanes; + continue; + } + break; + } + + return x; } #define ADD_SIMD(SRC, DST) \ int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ { \ - constexpr int nlanes = vector_type_of_t::nlanes; \ - \ - if (length < nlanes) \ - return 0; \ - \ - int x = 0; \ - for (;;) \ - { \ - for (; x <= length - nlanes; x += nlanes) \ - { \ - add_simd_impl(&in1[x], &in2[x], &out[x]); \ - } \ - \ - if (x < length) \ - { \ - x = length - nlanes; \ - continue; \ - } \ - break; \ - } \ - \ - return x; \ -} + return arithmOp_simd(add_tag{}, in1, in2, out, length); \ +} \ ADD_SIMD(uchar, uchar) ADD_SIMD(ushort, uchar) @@ -2697,6 +2747,37 @@ ADD_SIMD(float, float) #undef ADD_SIMD +//------------------------- +// +// Fluid kernels: Sub +// +//------------------------- + +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + return arithmOp_simd(sub_tag{}, in1, in2, out, length); \ +} \ + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END From f7ac724c5d2b7f7580e840c4c623609098b17bdc Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Fri, 18 Mar 2022 17:50:21 +0800 Subject: [PATCH 33/84] Merge pull request #21531 from fengyuentau:add_3rdparty_mirrors Support downloading 3rdparty resources from Gitcode & Gitlab-style mirrors * replace github.com with gitcode.net for ocv_download * replace raw.githubusercontent.com with gitcode.net for ocv_download * rename funtions and remove some comments * add options for custom mirrors, which simply replace domain github.com & githubusercontent.com * run ocv_init_download once; replace DL_URL with mirrored one when calling ocv_download * fix for empty download links when not using mirror * fix bugs: set(.. .. PARENT_SCOPE) for ocv_init_download; correct macro names for replace github archives and raw githubusercontent * adjusted mirror swapping impl: replace with mirrored link before each ocv_download; update md5sum for archives * fix a bug: macro invoked with incorrect arguments by non-set vars * enclose if statement * workable impl * shorten the var names of two key options * scalable implementation of downloading from mirror and using custom mirror * improve ocv_init_download help message * fix the different extracted directory name in case of ADE & TBB which are downloaded from release page * improve help message printing * Download ADE & TBB using commit ids instead of from release pages * support custom mirrors on downloading archives * improve hints * add missing parentheses * reset ocv_download calls * mirror support implementation using ocv_cmake_hook & ocv_cmake_hook_append * move ocv_init_download into cmake/OpenCVDownload.cmake * move ocv_cmake_hook before checking CMake cache * improve hints when not fetching as git repo * add WORKING_DIRECTORY in execute_process in ocv_init_download * use OPENCV_DOWNLOAD_MIRROR_ID * add custom.cmake for custom mirror * detect github origin * fix broken var name * download from github by default if custom tbb is set * add checksum checks for gitcode.cmake before replacing urls and checksums * add checksum checks for custom.cmake before replacing urls and checkusms * use description specify instead of set for messages in custom.cmake; use warning message for warnings * updates and fixes --- cmake/OpenCVDownload.cmake | 53 ++++++++++++++++++++++++- cmake/mirrors/custom.cmake | 77 +++++++++++++++++++++++++++++++++++++ cmake/mirrors/gitcode.cmake | 70 +++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 cmake/mirrors/custom.cmake create mode 100644 cmake/mirrors/gitcode.cmake diff --git a/cmake/OpenCVDownload.cmake b/cmake/OpenCVDownload.cmake index a427a41227..3e46515537 100644 --- a/cmake/OpenCVDownload.cmake +++ b/cmake/OpenCVDownload.cmake @@ -37,6 +37,50 @@ file(WRITE "${OPENCV_DOWNLOAD_LOG}" "#use_cache \"${OPENCV_DOWNLOAD_PATH}\"\n") file(REMOVE "${OPENCV_DOWNLOAD_WITH_CURL}") file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}") +ocv_check_environment_variables(OPENCV_DOWNLOAD_MIRROR_ID) + +function(ocv_init_download_mirror) + if(NOT DEFINED OPENCV_DOWNLOAD_MIRROR_ID) + # Run `git remote get-url origin` to get remote source + execute_process( + COMMAND + git remote get-url origin + WORKING_DIRECTORY + ${CMAKE_SOURCE_DIR} + RESULT_VARIABLE + RESULT_STATUS + OUTPUT_VARIABLE + OCV_GIT_ORIGIN_URL_OUTPUT + ERROR_QUIET + ) + # if non-git, OCV_GIT_ORIGIN_URL_OUTPUT is empty + if(NOT OCV_GIT_ORIGIN_URL_OUTPUT) + message(STATUS "ocv_init_download: OpenCV source tree is not fetched as git repository. 3rdparty resources will be downloaded from github.com by default.") + return() + else() + # Check if git origin is github.com + string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "github.com" _found_github) + if(NOT ${_found_github} EQUAL -1) + set(OPENCV_DOWNLOAD_MIRROR_ID "github" CACHE STRING "") + endif() + # Check if git origin is gitcode.net + string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "gitcode.net" _found_gitcode) + if(NOT ${_found_gitcode} EQUAL -1) + set(OPENCV_DOWNLOAD_MIRROR_ID "gitcode" CACHE STRING "") + endif() + endif() + endif() + + if(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "gitcode" OR OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "custom") + message(STATUS "ocv_init_download: Using ${OPENCV_DOWNLOAD_MIRROR_ID}-hosted mirror to download 3rdparty components.") + ocv_cmake_hook_append(OPENCV_DOWNLOAD_PRE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/mirrors/${OPENCV_DOWNLOAD_MIRROR_ID}.cmake") + elseif(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "github") + return() + else() + message(STATUS "ocv_init_download: Unable to recognize git server of OpenCV source code. Using github.com to download 3rdparty components.") + endif() +endfunction() + function(ocv_download) cmake_parse_arguments(DL "UNPACK;RELATIVE_URL" "FILENAME;HASH;DESTINATION_DIR;ID;STATUS" "URL" ${ARGN}) @@ -67,6 +111,8 @@ function(ocv_download) set(${DL_STATUS} TRUE PARENT_SCOPE) endif() + ocv_cmake_hook(OPENCV_DOWNLOAD_PRE) + # Check CMake cache for already processed tasks string(FIND "${DL_DESTINATION_DIR}" "${CMAKE_BINARY_DIR}" DL_BINARY_PATH_POS) if(DL_BINARY_PATH_POS EQUAL 0) @@ -115,7 +161,7 @@ function(ocv_download) if(DL_ID) set(__msg_prefix "${DL_ID}: ") endif() - message(STATUS "${__msg_prefix}Download: ${DL_FILENAME}") + message(STATUS "${__msg_prefix}Downloading ${DL_FILENAME} from ${DL_URL}") # Copy mode: check if copy destination exists and is correct if(NOT DL_UNPACK) @@ -252,3 +298,8 @@ ${OPENCV_DOWNLOAD_LOG} set(${OCV_DOWNLOAD_HASH_NAME} "${DL_HASH}" CACHE INTERNAL "") endif() endfunction() + +# ---------------------------------------------------------------------------- +# Initialize download in case mirror is used +# ---------------------------------------------------------------------------- +ocv_init_download_mirror() diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake new file mode 100644 index 0000000000..2be93d8165 --- /dev/null +++ b/cmake/mirrors/custom.cmake @@ -0,0 +1,77 @@ +# Gitlab-style mirror +# CMake scripts look for opencv/opencv_3rdparty, +# OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade +# from OPENCV_DOWNLOAD_MIRROR +ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "") +# Tengine +ocv_update(TENGINE_PKG_MD5_CUSTOM "") +ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e + +# TBB +ocv_update(TBB_RELEASE_CUSTOM "") +ocv_update(TBB_PKG_NAME_CUSTOM "") +ocv_update(TBB_PKG_MD5_CUSTOM "") +ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 + +# ADE +ocv_update(ADE_RELEASE_CUSTOM "") +ocv_update(ADE_PKG_NAME_CUSTOM "") +ocv_update(ADE_PKG_MD5_CUSTOM "") +ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f + +macro(ocv_download_url_custom_usercontent) + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 5 __COMMIT_ID) + list(GET DL_URL_split 6 __PKG_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/opencv/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") +endmacro() +macro(ocv_download_url_custom_archive_commit_id) + if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.") + elseif(${DL_ID}_PKG_MD5_ORIGINAL STREQUAL "${DL_HASH}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/") + set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}") + else() + message(WARNING "No information about mirrors for downloading ${DL_FILENAME} from URL='${DL_URL}' and MD5=${DL_HASH}.") + endif() +endmacro() +macro(ocv_download_url_custom_archive_release) + if("m${${DL_ID}_RELEASE_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_RELEASE_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + if("m${${DL_ID}_PKG_NAME_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_NAME_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_PKG_RELEASE_CUSTOM}/${__REPO_NAME}-") + set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}") +endmacro() + +if("m${OPENCV_DOWNLOAD_MIRROR_URL}" STREQUAL "m") + message(WARNING "ocv_download: specify OPENCV_DOWNLOAD_MIRROR_URL to use custom mirror.") +else() + if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV")) + ocv_download_url_custom_usercontent() + elseif(DL_ID STREQUAL "TENGINE") + ocv_download_url_custom_archive_commit_id() + elseif(DL_ID STREQUAL "TBB") + ocv_download_url_custom_archive_release() + set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_CUSTOM}" PARENT_SCOPE) + elseif(DL_ID STREQUAL "ADE") + ocv_download_url_custom_archive_release() + set(ade_subdir "${ADE_PKG_NAME_CUSTOM}" PARENT_SCOPE) + else() + message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror ${OPENCV_DOWNLOAD_MIRROR_URL}. Use original source instead.") + endif() +endif() \ No newline at end of file diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake new file mode 100644 index 0000000000..aafe9635f3 --- /dev/null +++ b/cmake/mirrors/gitcode.cmake @@ -0,0 +1,70 @@ +# Tengine (Download via commit id) +ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690) +ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e + +# TBB (Download from release page) +ocv_update(TBB_RELEASE_GITCODE "v2020.2") +ocv_update(TBB_PKG_NAME_GITCODE "tbb-${TBB_RELEASE_GITCODE}") +ocv_update(TBB_PKG_MD5_GITCODE 4eeafdf16a90cb66e39a31c8d6c6804e) +ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 + +# ADE (Download from release page) +ocv_update(ADE_RELEASE_GITCODE "v0.1.1f") +ocv_update(ADE_PKG_NAME_GITCODE "ade-${ADE_RELEASE_GITCODE}") +ocv_update(ADE_PKG_MD5_GITCODE c12909e0ccfa93138c820ba91ff37b3c) +ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f + +# +# Replace download links for packages in opencv/opencv_3rdparty: +# 1. Extract repo owner and repo name from DL_URL. +# 2. Put repo owner and repo name into the placeholders of new DL_URL. +# +macro(ocv_download_url_gitcode_usercontent) + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 5 __COMMIT_ID) + list(GET DL_URL_split 6 __PKG_NAME) + set(DL_URL "https://gitcode.net/opencv/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") +endmacro() +# +# Replace download links and checksums for archives/releases in other repositories: +# 1. Check if versions matched. If not matched, download from github instead. +# 2. Extract repo owner and repo name from DL_URL. +# 3. Put repo owner and repo name into the placeholders of new DL_URL. +# 4. Replace DL_HASH with the one downloaded from gitcode.net. +# +macro(ocv_download_url_gitcode_archive_commit_id) + if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://gitcode.net/${__OWNER}/${__REPO_NAME}/-/archive/") + set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}") + else() + message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.") + endif() +endmacro() +macro(ocv_download_url_gitcode_archive_release) + if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://gitcode.net/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_GITCODE}/${__REPO_NAME}-") + set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}") + else() + message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.") + endif() +endmacro() + +if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV")) + ocv_download_url_gitcode_usercontent() +elseif(DL_ID STREQUAL "TENGINE") + ocv_download_url_gitcode_archive_commit_id() +elseif(DL_ID STREQUAL "TBB") + ocv_download_url_gitcode_archive_release() + set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_GITCODE}" PARENT_SCOPE) +elseif(DL_ID STREQUAL "ADE") + ocv_download_url_gitcode_archive_release() + set(ade_subdir "${ADE_PKG_NAME_GITCODE}" PARENT_SCOPE) +else() + message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror gitcode.net. Use original source instead.") +endif() From 4c7931869490c3c662b1e548c4c22bd9ee7b52fb Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 19 Mar 2022 06:52:47 +0000 Subject: [PATCH 34/84] dnn: fix index access --- modules/dnn/src/dnn.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index cbebad7e7c..b4fa8933c3 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2759,8 +2759,12 @@ struct Net::Impl : public detail::NetImplBase it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); + it = inOutShapes.find(layerId); + CV_Assert(it != inOutShapes.end()); } - const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; + const int out_port = inputLayerIds[i].oid; + CV_CheckLT(out_port, (int)it->second.out.size(), ""); + const MatShape& shape = it->second.out[out_port]; layerShapes.in.push_back(shape); } } From 9ce0e5130503ae65108f3921c6711fb7ff45c5e9 Mon Sep 17 00:00:00 2001 From: Giles Payne Date: Sun, 20 Mar 2022 18:47:26 +0900 Subject: [PATCH 35/84] Objective-C wrapper header fix-ups to avoid clashes with system macros --- modules/core/misc/objc/gen_dict.json | 20 ++++++++++++++++++++ modules/objc/generator/gen_objc.py | 13 +++++++++++++ 2 files changed, 33 insertions(+) diff --git a/modules/core/misc/objc/gen_dict.json b/modules/core/misc/objc/gen_dict.json index 9ade8ccb9f..58300255dc 100644 --- a/modules/core/misc/objc/gen_dict.json +++ b/modules/core/misc/objc/gen_dict.json @@ -78,6 +78,26 @@ "(void)divide:(double)scale src2:(Mat*)src2 dst:(Mat*)dst dtype:(int)dtype" : { "src2" : {"name" : "src"} } } }, + "header_fix" : { + "Core": { + "pow" : { + "prolog" : "#pragma push_macro(\"pow\")\n#undef pow", + "epilog" : "#pragma pop_macro(\"pow\")" + }, + "sqrt" : { + "prolog" : "#pragma push_macro(\"sqrt\")\n#undef sqrt", + "epilog" : "#pragma pop_macro(\"sqrt\")" + }, + "exp" : { + "prolog" : "#pragma push_macro(\"exp\")\n#undef exp", + "epilog" : "#pragma pop_macro(\"exp\")" + }, + "log" : { + "prolog" : "#pragma push_macro(\"log\")\n#undef log", + "epilog" : "#pragma pop_macro(\"log\")" + } + } + }, "type_dict" : { "Algorithm": { "objc_type": "Algorithm*" diff --git a/modules/objc/generator/gen_objc.py b/modules/objc/generator/gen_objc.py index 3e3ff8a2b0..83029312b9 100755 --- a/modules/objc/generator/gen_objc.py +++ b/modules/objc/generator/gen_objc.py @@ -82,6 +82,9 @@ ManualFuncs = {} # { class : { func : { arg_name : {"ctype" : ctype, "attrib" : [attrib]} } } } func_arg_fix = {} +# { class : { func : { prolog : "", epilog : "" } } } +header_fix = {} + # { class : { enum: fixed_enum } } enum_fix = {} @@ -479,6 +482,9 @@ class FuncInfo(GeneralInfo): self.ctype = re.sub(r"^CvTermCriteria", "TermCriteria", decl[1] or "") self.args = [] func_fix_map = func_arg_fix.get(self.classname or module, {}).get(self.objc_name, {}) + header_fixes = header_fix.get(self.classname or module, {}).get(self.objc_name, {}) + self.prolog = header_fixes.get('prolog', None) + self.epilog = header_fixes.get('epilog', None) for a in decl[3]: arg = a[:] arg_fix_map = func_fix_map.get(arg[1], {}) @@ -1170,6 +1176,9 @@ class ObjectiveCWrapperGenerator(object): objc_name = fi.objc_name if not constructor else ("init" + ("With" + (args[0].name[0].upper() + args[0].name[1:]) if len(args) > 0 else "")) ) + if fi.prolog is not None: + method_declarations.write("\n%s\n\n" % fi.prolog) + method_declarations.write( Template( """$prototype$swift_name$deprecation_decl; @@ -1181,6 +1190,9 @@ class ObjectiveCWrapperGenerator(object): ) ) + if fi.epilog is not None: + method_declarations.write("%s\n\n" % fi.epilog) + method_implementations.write( Template( """$prototype {$prologue $ret_val$obj_deref$cv_name($cv_args)$tail;$epilogue$ret @@ -1646,6 +1658,7 @@ if __name__ == "__main__": AdditionalImports[module] = gen_type_dict.get("AdditionalImports", {}) ManualFuncs.update(gen_type_dict.get("ManualFuncs", {})) func_arg_fix.update(gen_type_dict.get("func_arg_fix", {})) + header_fix.update(gen_type_dict.get("header_fix", {})) enum_fix.update(gen_type_dict.get("enum_fix", {})) const_fix.update(gen_type_dict.get("const_fix", {})) namespaces_dict.update(gen_type_dict.get("namespaces_dict", {})) From f3699b5ac8c5c467f281811a3c60a95b18be9cb2 Mon Sep 17 00:00:00 2001 From: pkubaj Date: Mon, 21 Mar 2022 02:05:05 +0000 Subject: [PATCH 36/84] Fix build with LLVM 13 on ppc64le /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/vsx_utils.hpp:352:12: warning: 'vec_permi' macro redefined [-Wmacro-redefined] # define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) ^ /usr/lib/clang/13.0.0/include/altivec.h:13077:9: note: previous definition is here #define vec_permi(__a, __b, __c) \ ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/vsx_utils.hpp:370:25: error: redefinition of 'vec_promote' VSX_FINLINE(vec_dword2) vec_promote(long long a, int b) ^ /usr/lib/clang/13.0.0/include/altivec.h:14604:1: note: previous definition is here vec_promote(signed long long __a, int __b) { ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/vsx_utils.hpp:377:26: error: redefinition of 'vec_promote' VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) ^ /usr/lib/clang/13.0.0/include/altivec.h:14611:1: note: previous definition is here vec_promote(unsigned long long __a, int __b) { ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/hal/intrin_vsx.hpp:1045:22: error: call to 'vec_rsqrt' is ambiguous { return v_float32x4(vec_rsqrt(x.val)); } ^~~~~~~~~ /usr/lib/clang/13.0.0/include/altivec.h:8472:34: note: candidate function static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) { ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/vsx_utils.hpp:362:29: note: candidate function VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/hal/intrin_vsx.hpp:1047:22: error: call to 'vec_rsqrt' is ambiguous { return v_float64x2(vec_rsqrt(x.val)); } ^~~~~~~~~ /usr/lib/clang/13.0.0/include/altivec.h:8477:35: note: candidate function static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) { ^ /wrkdirs/usr/ports/graphics/opencv/work/opencv-4.5.5/modules/core/include/opencv2/core/vsx_utils.hpp:365:30: note: candidate function VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a) ^ 1 warning and 4 errors generated. The specific functions were added to altivec.h in LLVM's 1ff93618e58df210def48d26878c20a1b414d900, c3da07d216dd20fbdb7302fd085c0a59e189ae3d and 10cc5bcd868c433f9a781aef82178b04e98bd098. --- modules/core/include/opencv2/core/vsx_utils.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 5cbc066784..79a1074d59 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -324,6 +324,7 @@ VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) #define VSX_IMPL_CONVERT(rt, rg, fnm) \ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } +#ifndef vec_permi #if __clang_major__ < 5 // implement vec_permi in a dirty way # define VSX_IMPL_CLANG_4_PERMI(Tvec) \ @@ -351,12 +352,14 @@ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } // vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 # define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) #endif // __clang_major__ < 5 +#endif // shift left double by word immediate #ifndef vec_sldw # define vec_sldw vec_xxsldwi #endif +#if __clang_major__ < 13 // Implement vec_rsqrt since clang only supports vec_rsqrte #ifndef vec_rsqrt VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) @@ -380,6 +383,7 @@ VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) ret[b & 1] = a; return ret; } +#endif // vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt #define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ From 4eae23a2ccca801a9fea0afb450e35430cbbee3a Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 21 Mar 2022 16:04:33 +0300 Subject: [PATCH 37/84] ts: fix some EXPECT_MAT macros --- modules/ts/include/opencv2/ts/ocl_test.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 6126883091..1ef37a962a 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -89,7 +89,7 @@ extern int test_loop_times; #define EXPECT_MAT_NORM(mat, eps) \ do \ { \ - EXPECT_LE(TestUtils::checkNorm1(mat), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm1(mat), eps) \ } while ((void)0, 0) #undef EXPECT_MAT_NEAR @@ -98,7 +98,7 @@ do \ { \ ASSERT_EQ(mat1.type(), mat2.type()); \ ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(TestUtils::checkNorm2(mat1, mat2), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(mat1, mat2), eps) \ << "Size: " << mat1.size() << std::endl; \ } while ((void)0, 0) @@ -107,7 +107,7 @@ do \ { \ ASSERT_EQ((mat1).type(), (mat2).type()); \ ASSERT_EQ((mat1).size(), (mat2).size()); \ - EXPECT_LE(TestUtils::checkNormRelative((mat1), (mat2)), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelative((mat1), (mat2)), eps) \ << "Size: " << (mat1).size() << std::endl; \ } while ((void)0, 0) @@ -146,7 +146,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -155,7 +155,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) @@ -183,7 +183,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -192,7 +192,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) From 593996216f54d472aa6eae23386de65ef9f7ba99 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 17 Mar 2022 16:14:38 +0300 Subject: [PATCH 38/84] cartToPolar/polarToCart: disable inplace mode --- modules/core/src/mathfuncs.cpp | 6 ++++++ modules/core/test/test_arithm.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 9fdf7d7702..f463539679 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -269,6 +269,9 @@ void cartToPolar( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(), ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees)) @@ -563,6 +566,9 @@ void polarToCart( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 74bf39fbc7..fa48877e3d 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2396,5 +2396,36 @@ TEST(Core_Magnitude, regression_19506) } } +TEST(Core_CartPolar, inplace) +{ + RNG& rng = TS::ptr()->get_rng(); + cv::Mat1d A[2] = {cv::Mat1d(10, 10), cv::Mat1d(10, 10)}; + cv::Mat1d B[2], C[2]; + cv::UMat uA[2]; + + for(int i = 0; i < 2; ++i) + { + cvtest::randUni(rng, A[i], Scalar::all(-1000), Scalar::all(1000)); + A[i].copyTo(uA[i]); + } + + // Reverse + cv::cartToPolar(A[0], A[1], B[0], B[1], false); + cv::polarToCart(B[0], B[1], C[0], C[1], false); + EXPECT_MAT_NEAR(A[0], C[0], 2); + EXPECT_MAT_NEAR(A[1], C[1], 2); + + // Inplace + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[0], B[1], false), cv::Exception); + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[1], B[0], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[0], A[1], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[1], A[0], false), cv::Exception); + // Inplace OCL + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + +} }} // namespace From e76cda6923aaa1b86772b2a1f1595e67d69f742c Mon Sep 17 00:00:00 2001 From: Alexander Conner Date: Tue, 22 Mar 2022 15:40:23 -0400 Subject: [PATCH 39/84] Update CameraBridgeViewBase error message Update CameraBridgeViewBase error message - Change "you device does" to "your device does" --- .../android/java/org/opencv/android/CameraBridgeViewBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java b/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java index 07c059b7d9..1993cf1407 100644 --- a/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java +++ b/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java @@ -376,7 +376,7 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac if (!connectCamera(getWidth(), getHeight())) { AlertDialog ad = new AlertDialog.Builder(getContext()).create(); ad.setCancelable(false); // This blocks the 'BACK' button - ad.setMessage("It seems that you device does not support camera (or it is locked). Application will be closed."); + ad.setMessage("It seems that your device does not support camera (or it is locked). Application will be closed."); ad.setButton(DialogInterface.BUTTON_NEUTRAL, "OK", new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { dialog.dismiss(); From 2a218b96c4efe49d0ee87d104295fb56e04929a5 Mon Sep 17 00:00:00 2001 From: Stefano Allegretti Date: Wed, 23 Mar 2022 15:55:30 +0100 Subject: [PATCH 40/84] Fix VideoCapture::open() brief description --- modules/videoio/include/opencv2/videoio.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 93ea8cdddc..35491a0b5e 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -751,7 +751,7 @@ public: */ CV_WRAP virtual bool open(const String& filename, int apiPreference = CAP_ANY); - /** @brief Opens a camera for video capturing + /** @brief Opens a video file or a capturing device or an IP video stream for video capturing with API Preference and parameters @overload @@ -775,7 +775,7 @@ public: */ CV_WRAP virtual bool open(int index, int apiPreference = CAP_ANY); - /** @brief Returns true if video capturing has been initialized already. + /** @brief Opens a camera for video capturing with API Preference and parameters @overload From 8e8e4bbabc74f37d94574103f6ee7f81090389c2 Mon Sep 17 00:00:00 2001 From: luz paz Date: Fri, 18 Mar 2022 17:15:14 -0400 Subject: [PATCH 41/84] dnn: fix various dnn related typos Fixes source comments and documentation related to dnn code. --- modules/dnn/include/opencv2/dnn/dnn.hpp | 4 ++-- modules/dnn/src/cuda/concat.cu | 2 +- modules/dnn/src/cuda/kernel_dispatcher.hpp | 2 +- modules/dnn/src/cuda/permute.cu | 4 ++-- modules/dnn/src/cuda/slice.cu | 2 +- modules/dnn/src/cuda4dnn/csl/memory.hpp | 2 +- modules/dnn/src/cuda4dnn/csl/pointer.hpp | 4 ++-- modules/dnn/src/darknet/darknet_io.cpp | 2 +- modules/dnn/src/model.cpp | 2 +- .../dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp | 2 +- modules/dnn/test/test_ie_models.cpp | 4 ++-- samples/dnn/speech_recognition.cpp | 4 ++-- samples/dnn/speech_recognition.py | 6 +++--- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 67042a14b7..ab443cd67e 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -389,7 +389,7 @@ CV__DNN_INLINE_NS_BEGIN /** - * @brief "Deattaches" all the layers, attached to particular layer. + * @brief "Detaches" all the layers, attached to particular layer. */ virtual void unsetAttached(); @@ -1579,7 +1579,7 @@ public: * - top-right * - bottom-right * - * Use cv::getPerspectiveTransform function to retrive image region without perspective transformations. + * Use cv::getPerspectiveTransform function to retrieve image region without perspective transformations. * * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output. * diff --git a/modules/dnn/src/cuda/concat.cu b/modules/dnn/src/cuda/concat.cu index ac1be75682..5250b59518 100644 --- a/modules/dnn/src/cuda/concat.cu +++ b/modules/dnn/src/cuda/concat.cu @@ -100,7 +100,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { CV_Assert(output.rank() == input.rank()); CV_Assert(output_axis_offset < output.get_axis_size(axis)); - /* if axes preceeding the concat axis are all singleton, the concat blocks are contiguous + /* if axes preceding the concat axis are all singleton, the concat blocks are contiguous * in the output and we can copy each block directly */ if (output.size_range(0, axis) == 1) diff --git a/modules/dnn/src/cuda/kernel_dispatcher.hpp b/modules/dnn/src/cuda/kernel_dispatcher.hpp index b0fc658850..0f3e7c4fc4 100644 --- a/modules/dnn/src/cuda/kernel_dispatcher.hpp +++ b/modules/dnn/src/cuda/kernel_dispatcher.hpp @@ -33,7 +33,7 @@ * template * void launch_some_kernel(...); * - * // creates the dispatcher named "some_dispatcher" which invokves the correct instantiation of "launch_some_kernel" + * // creates the dispatcher named "some_dispatcher" which invokes the correct instantiation of "launch_some_kernel" * GENERATE_KERNEL_DISPATCHER(some_dispatcher, launch_some_kernel); * * // internal API function diff --git a/modules/dnn/src/cuda/permute.cu b/modules/dnn/src/cuda/permute.cu index 082c1bf75e..35c95a6737 100644 --- a/modules/dnn/src/cuda/permute.cu +++ b/modules/dnn/src/cuda/permute.cu @@ -72,7 +72,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { __syncthreads(); /* We interchange `threadIdx.x` and `threadIdx.y` so that consecutive output indices map to - * consecutive threads. This would allow writes across threds in a warp to be coalesced. + * consecutive threads. This would allow writes across threads in a warp to be coalesced. */ const index_type out_x = blockIdx.y * TILE_SIZE + threadIdx.x; const index_type out_y_begin = blockIdx.x * TILE_SIZE + threadIdx.y; @@ -156,7 +156,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { * tensor indices be [o1, o2, ...]. The permutation operation essentially copies items * from the input tensor to new locations in the output tensor as dictated by the indices. * - * If the size of the nth axis (say i2) of the input is one the input and output indicies for + * If the size of the nth axis (say i2) of the input is one the input and output indices for * all the elements will be of the form be [i1, 0, ...] and [..., 0, ...] respectively. * The index does not contribute to the element's address calculation and hence would give * identical result if it weren't there. diff --git a/modules/dnn/src/cuda/slice.cu b/modules/dnn/src/cuda/slice.cu index 37b718cd63..461e87e549 100644 --- a/modules/dnn/src/cuda/slice.cu +++ b/modules/dnn/src/cuda/slice.cu @@ -159,7 +159,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { /* We can do a copy if the reduced rank is two and only the first axis is sliced. * The general requirement is that only one axis is sliced and all the axes that - * preceed the sliced axis are singleton. However, the reductions above will remove + * precede the sliced axis are singleton. However, the reductions above will remove * all the leading singleton axes and merge the trailing unsliced axes into one, or * zero if there are no trailing unsliced axes. The latter is handled separately. */ diff --git a/modules/dnn/src/cuda4dnn/csl/memory.hpp b/modules/dnn/src/cuda4dnn/csl/memory.hpp index 40918cd4b3..683ed62059 100644 --- a/modules/dnn/src/cuda4dnn/csl/memory.hpp +++ b/modules/dnn/src/cuda4dnn/csl/memory.hpp @@ -68,7 +68,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { } } }); - /* std::shared_ptr::reset invokves the deleter if an exception occurs; hence, we don't + /* std::shared_ptr::reset invokes the deleter if an exception occurs; hence, we don't * need to have a try-catch block to free the allocated device memory */ diff --git a/modules/dnn/src/cuda4dnn/csl/pointer.hpp b/modules/dnn/src/cuda4dnn/csl/pointer.hpp index 45bf94bf0a..4d7a232093 100644 --- a/modules/dnn/src/cuda4dnn/csl/pointer.hpp +++ b/modules/dnn/src/cuda4dnn/csl/pointer.hpp @@ -147,7 +147,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { /* host const void pointer to const void device pointer */ CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { } - /* allow any device pointer to be implicitly convereted to void device pointer */ + /* allow any device pointer to be implicitly converted to void device pointer */ template CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr ptr_) noexcept : ptr{ ptr_.get() } { } @@ -199,7 +199,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { /* host pointer to device pointer */ CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { } - /* allow any device pointer to mutable memory to be implicitly convereted to void device pointer */ + /* allow any device pointer to mutable memory to be implicitly converted to void device pointer */ template ::value, bool>::type = false> CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr ptr_) noexcept : ptr { ptr_.get() } { } diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 11aad453e3..520f3c94be 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -791,7 +791,7 @@ namespace cv { if (layers_vec.size() > 1) { // layer ids in layers_vec - inputs of Slice layers - // after adding offset to layers_vec: layer ids - ouputs of Slice layers + // after adding offset to layers_vec: layer ids - outputs of Slice layers for (size_t k = 0; k < layers_vec.size(); ++k) layers_vec[k] += layers_vec.size(); diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index 4cce0a7dc4..7444011a64 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -799,7 +799,7 @@ struct TextRecognitionModel_Impl : public Model::Impl virtual std::string ctcPrefixBeamSearchDecode(const Mat& prediction) { - // CTC prefix beam seach decode. + // CTC prefix beam search decode. // For more detail, refer to: // https://distill.pub/2017/ctc/#inference // https://gist.github.com/awni/56369a90d03953e370f3964c826ed4b0i diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp index ecb5c62f56..daadc32ad2 100644 --- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp +++ b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp @@ -331,7 +331,7 @@ teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int g teg_weight = kernel_; } - /* initial the resoruce of tengine */ + /* initial the resource of tengine */ if(false == tengine_init_flag) { init_tengine(); diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 0fe19db5e9..3622f69bdb 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -290,7 +290,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath if (cvtest::debugLevel > 0) { const std::vector& dims = desc.getDims(); - std::cout << "Input: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + std::cout << "Input: '" << it.first << "' precision=" << desc.getPrecision() << " dims=" << dims.size() << " ["; for (auto d : dims) std::cout << " " << d; std::cout << "] ocv_mat=" << inputsMap[it.first].size << " of " << typeToString(inputsMap[it.first].type()) << std::endl; @@ -308,7 +308,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath if (cvtest::debugLevel > 0) { const std::vector& dims = desc.getDims(); - std::cout << "Output: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + std::cout << "Output: '" << it.first << "' precision=" << desc.getPrecision() << " dims=" << dims.size() << " ["; for (auto d : dims) std::cout << " " << d; std::cout << "] ocv_mat=" << outputsMap[it.first].size << " of " << typeToString(outputsMap[it.first].type()) << std::endl; diff --git a/samples/dnn/speech_recognition.cpp b/samples/dnn/speech_recognition.cpp index 7e9ee1f54d..ff461c50f5 100644 --- a/samples/dnn/speech_recognition.cpp +++ b/samples/dnn/speech_recognition.cpp @@ -33,7 +33,7 @@ private: double highfreq = sample_rate / 2; public: - // Mel filterbanks preperation + // Mel filterbanks preparation double hz_to_mel(double frequencies) { //Converts frequencies from hz to mel scale @@ -149,7 +149,7 @@ public: return weights; } - // STFT preperation + // STFT preparation vector pad_window_center(vector&data, int size) { // Pad the window out to n_fft size diff --git a/samples/dnn/speech_recognition.py b/samples/dnn/speech_recognition.py index 7bc424b37c..da2ce11521 100644 --- a/samples/dnn/speech_recognition.py +++ b/samples/dnn/speech_recognition.py @@ -44,7 +44,7 @@ import os model.graph.initializer.insert(i,init) ``` - 6. Add an additional reshape node to handle the inconsistant input from python and c++ of openCV. + 6. Add an additional reshape node to handle the inconsistent input from python and c++ of openCV. see https://github.com/opencv/opencv/issues/19091 Make & insert a new node with 'Reshape' operation & required initializer ``` @@ -256,7 +256,7 @@ class FilterbankFeatures: weights *= enorm[:, np.newaxis] return weights - # STFT preperation + # STFT preparation def pad_window_center(self, data, size, axis=-1, **kwargs): ''' Centers the data and pads. @@ -329,7 +329,7 @@ class FilterbankFeatures: then padded with zeros to match n_fft fft_window : a vector or array of length `n_fft` having values computed by a window function - pad_mode : mode while padding the singnal + pad_mode : mode while padding the signal return_complex : returns array with complex data type if `True` return : Matrix of short-term Fourier transform coefficients. ''' From d98e07c3d37a30b077784f5b5806c302e18c7534 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 25 Mar 2022 11:19:53 +0300 Subject: [PATCH 42/84] Merge pull request #21660 from TolyaTalamanov:at/handle-exception-in-streamingexecutor [G-API] Handle exceptions in streaming executor * Handle exceptions in streaming executor * Rethrow exception in non-streaming executor * Clean up * Put more tests * Handle exceptions in IE backend * Handle exception in IE callbacks * Handle exception in GExecutor * Handle all exceptions in IE backend * Not only (std::exception& e) * Fix comments to review * Handle input exception in generic way * Fix comment * Clean up * Apply review comments * Put more comments * Fix alignment * Move test outside of HAVE_NGRAPH * Fix compilation --- .../include/opencv2/gapi/cpu/gcpukernel.hpp | 7 +- modules/gapi/src/backends/ie/giebackend.cpp | 130 +++--- .../backends/streaming/gstreamingbackend.cpp | 2 + modules/gapi/src/compiler/gislandmodel.cpp | 12 +- modules/gapi/src/compiler/gislandmodel.hpp | 15 +- modules/gapi/src/executor/gexecutor.cpp | 23 +- .../gapi/src/executor/gstreamingexecutor.cpp | 425 +++++++++++------- .../gapi/src/executor/gstreamingexecutor.hpp | 11 +- .../gapi/test/infer/gapi_infer_ie_test.cpp | 41 ++ .../test/streaming/gapi_streaming_tests.cpp | 164 +++++++ 10 files changed, 617 insertions(+), 213 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp index 48909a84fc..ff3ee45ed3 100644 --- a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp +++ b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp @@ -2,12 +2,17 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #ifndef OPENCV_GAPI_GCPUKERNEL_HPP #define OPENCV_GAPI_GCPUKERNEL_HPP +#ifdef _MSC_VER +#pragma warning(disable: 4702) // "Unreachable code" +// on postprocess(...) call inside OCVCallHelper +#endif + #include #include #include diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 711827d574..52c60c1f0b 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -389,10 +389,13 @@ public: const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; - // NB: Need to gurantee that MediaFrame::View don't die until request is over. + // NB: Need to gurantee that MediaFrame::View doesn't die until request is over. using Views = std::vector>; Views views; + // To store exception appeared in callback. + std::exception_ptr eptr; + private: cv::detail::VectorRef& outVecRef(std::size_t idx); @@ -656,7 +659,7 @@ std::vector cv::gimpl::ie::IECompiled::createInfe class cv::gimpl::ie::RequestPool { public: using RunF = std::function; - using CallbackF = std::function; + using CallbackF = std::function; // NB: The task is represented by: // RunF - function which is set blobs and run async inference. @@ -675,7 +678,7 @@ private: void callback(Task task, size_t id, IE::InferRequest request, - IE::StatusCode code); + IE::StatusCode code) noexcept; void setup(); QueueClass m_idle_ids; @@ -706,32 +709,28 @@ void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) { static_cast( std::bind(&cv::gimpl::ie::RequestPool::callback, this, t, id, _1, _2))); - t.run(request); + // NB: InferRequest is already marked as busy + // in case of exception need to return it back to the idle. + try { + t.run(request); + } catch (...) { + request.SetCompletionCallback([](){}); + m_idle_ids.push(id); + throw; + } } void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task, size_t id, IE::InferRequest request, - IE::StatusCode code) { - // FIXME: Any exception which is arrised here must not leave this callback, - // because it won't be handled. - try { - if (code != IE::StatusCode::OK) { - throw std::logic_error("IE::InferRequest finished with not OK status"); - } - task.callback(request); - // NB: IE::InferRequest keeps the callback until the new one is set. - // Since user's callback might keep resources that should be released, - // need to destroy its after execution. - // Let's set the empty one to cause the destruction of a callback. - request.SetCompletionCallback([](){}); - m_idle_ids.push(id); - } catch (const std::exception& e) { - GAPI_LOG_FATAL(NULL, "Callback failed with error: " << e.what()); - //FIXME: Exception CAN't be rethrown here, since this callback works - // in separate IE thread and such scenarios aren't handled properly in - // G-API so far. - } + IE::StatusCode code) noexcept { + // NB: Inference is over. + // 1. Run callback + // 2. Destroy callback to free resources. + // 3. Mark InferRequest as idle. + task.callback(request, code); + request.SetCompletionCallback([](){}); + m_idle_ids.push(id); } // NB: Not thread-safe. @@ -786,18 +785,19 @@ void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in // 1. Collect island inputs/outputs. // 2. Create kernel context. (Every kernel has his own context). // 3. If the EndOfStream message is recieved, wait until all passed task are done. - // 4. + // 4. If the Exception message is revieved, propagate it further. + // 5. // 5.1 Run the kernel. // 5.2 Kernel wait for all nececcary infer requests and start asynchronous execution. // 5.3 After the kernel is finished continue processing next frame. // - // 5. If graph is compiled in non-streaming mode, wait until all tasks are done. + // 6. If graph is compiled in non-streaming mode, wait until all tasks are done. std::vector input_objs; std::vector output_objs; - const auto &in_desc = in.desc(); - const auto in_msg = in.get(); + const auto &in_desc = in.desc(); + auto in_msg = in.get(); if (cv::util::holds_alternative(in_msg)) { @@ -835,10 +835,20 @@ void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in const auto &kk = giem.metadata(this_nh).get(); - // (4) Run the kernel. - kk.run(ctx, *m_reqPool); + // (5) Run the kernel. + try { + kk.run(ctx, *m_reqPool); + } catch (...) { + auto eptr = std::current_exception(); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) + { + auto output = ctx->output(i); + ctx->out.post(std::move(output), eptr); + } + return; + } - // (5) In non-streaming mode need to wait until the all tasks are done + // (6) In non-streaming mode need to wait until the all tasks are done // FIXME: Is there more graceful way to handle this case ? if (!m_gm.metadata().contains()) { m_reqPool->waitAll(); @@ -944,19 +954,26 @@ static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, // NB: This is a callback used by async infer // to post outputs blobs (cv::GMat's). -static void PostOutputs(InferenceEngine::InferRequest &request, - std::shared_ptr ctx) { +static void PostOutputs(InferenceEngine::InferRequest &request, + InferenceEngine::StatusCode code, + std::shared_ptr ctx) { GAPI_ITT_STATIC_LOCAL_HANDLE(ie_cb_post_outputs_hndl, "IE_async_callback_PostOutputs"); GAPI_ITT_AUTO_TRACE_GUARD(ie_cb_post_outputs_hndl); - for (auto i : ade::util::iota(ctx->uu.params.num_out)) - { + if (code != IE::StatusCode::OK) { + std::stringstream ss; + ss << "InferRequest for model: " << ctx->uu.params.model_path + << " finished with InferenceEngine::StatusCode: " << static_cast(code); + ctx->eptr = std::make_exception_ptr(std::logic_error(ss.str())); + } + + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { auto& out_mat = ctx->outMatR(i); IE::Blob::Ptr this_blob = request.GetBlob(ctx->uu.params.output_names[i]); copyFromIE(this_blob, out_mat); auto output = ctx->output(i); ctx->out.meta(output, ctx->input(0).meta); - ctx->out.post(std::move(output)); + ctx->out.post(std::move(output), ctx->eptr); } } @@ -966,7 +983,9 @@ public: std::shared_ptr ctx, std::vector>&& cached_dims); - void operator()(InferenceEngine::InferRequest &request, size_t pos) const; + void operator()(InferenceEngine::InferRequest &request, + InferenceEngine::StatusCode code, + size_t pos) const; private: struct Priv { @@ -987,20 +1006,30 @@ PostOutputsList::PostOutputsList(size_t size, m_priv->cached_dims = std::move(cached_dims); } -void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) const { +void PostOutputsList::operator()(InferenceEngine::InferRequest &req, + InferenceEngine::StatusCode code, + size_t pos) const { auto&& ctx = m_priv->ctx; auto&& cached_dims = m_priv->cached_dims; auto&& finished = m_priv->finished; auto&& size = m_priv->size; - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - std::vector &out_vec = ctx->outVecR(i); - IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); - GAPI_Assert(out_blob); + if (code != IE::StatusCode::OK) { + ctx->eptr = std::make_exception_ptr( + std::logic_error("IE::InferRequest finished with not OK status")); + } - // FIXME: Avoid data copy. Not sure if it is possible though - out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - copyFromIE(out_blob, out_vec[pos]); + if (!ctx->eptr) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + std::vector &out_vec = ctx->outVecR(i); + + IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); + GAPI_Assert(out_blob); + + // FIXME: Avoid data copy. Not sure if it is possible though + out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); + copyFromIE(out_blob, out_vec[pos]); + } } ++finished; @@ -1008,7 +1037,7 @@ void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) for (auto i : ade::util::iota(ctx->uu.params.num_out)) { auto output = ctx->output(i); ctx->out.meta(output, ctx->input(0).meta); - ctx->out.post(std::move(output)); + ctx->out.post(std::move(output), ctx->eptr); } } } @@ -1123,7 +1152,7 @@ struct Infer: public cv::detail::KernelTag { // What about to do that in RequestPool ? req.StartAsync(); }, - std::bind(PostOutputs, _1, ctx) + std::bind(PostOutputs, _1, _2, ctx) } ); } @@ -1218,7 +1247,7 @@ struct InferROI: public cv::detail::KernelTag { // What about to do that in RequestPool ? req.StartAsync(); }, - std::bind(PostOutputs, _1, ctx) + std::bind(PostOutputs, _1, _2, ctx) } ); } @@ -1294,7 +1323,6 @@ struct InferList: public cv::detail::KernelTag { static void run(std::shared_ptr ctx, cv::gimpl::ie::RequestPool &reqPool) { - const auto& in_roi_vec = ctx->inArg(0u).rref(); // NB: In case there is no input data need to post output anyway if (in_roi_vec.empty()) { @@ -1335,7 +1363,7 @@ struct InferList: public cv::detail::KernelTag { setROIBlob(req, ctx->uu.params.input_names[0u], this_blob, rc, *ctx); req.StartAsync(); }, - std::bind(callback, std::placeholders::_1, pos) + std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos) } ); } @@ -1506,7 +1534,7 @@ struct InferList2: public cv::detail::KernelTag { } req.StartAsync(); }, - std::bind(callback, std::placeholders::_1, list_idx) + std::bind(callback, std::placeholders::_1, std::placeholders::_2, list_idx) } // task ); } // for diff --git a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp index 4bd2a10ea5..69b5f6c72b 100644 --- a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp +++ b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp @@ -172,6 +172,7 @@ void Copy::Actor::run(cv::gimpl::GIslandExecutable::IInput &in, return; } + GAPI_DbgAssert(cv::util::holds_alternative(in_msg)); const cv::GRunArgs &in_args = cv::util::get(in_msg); GAPI_Assert(in_args.size() == 1u); @@ -212,6 +213,7 @@ public: return; } + GAPI_Assert(cv::util::holds_alternative(in_msg)); const cv::GRunArgs &in_args = cv::util::get(in_msg); GAPI_Assert(in_args.size() == 1u); auto frame = cv::util::get(in_args[0]); diff --git a/modules/gapi/src/compiler/gislandmodel.cpp b/modules/gapi/src/compiler/gislandmodel.cpp index 1a8e0939e2..920fd700fc 100644 --- a/modules/gapi/src/compiler/gislandmodel.cpp +++ b/modules/gapi/src/compiler/gislandmodel.cpp @@ -412,7 +412,17 @@ void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IO out_objs.emplace_back(ade::util::value(it), out.get(ade::util::checked_cast(ade::util::index(it)))); } - run(std::move(in_objs), std::move(out_objs)); + + try { + run(std::move(in_objs), std::move(out_objs)); + } catch (...) { + auto eptr = std::current_exception(); + for (auto &&it: out_objs) + { + out.post(std::move(it.second), eptr); + } + return; + } // Propagate in-graph meta down to the graph // Note: this is not a complete implementation! Mainly this is a stub diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp index 063504a922..565b3c4f21 100644 --- a/modules/gapi/src/compiler/gislandmodel.hpp +++ b/modules/gapi/src/compiler/gislandmodel.hpp @@ -161,7 +161,12 @@ public: const std::vector &desc() const { return d; } }; struct EndOfStream {}; -using StreamMsg = cv::util::variant; + +struct Exception { + std::exception_ptr eptr; +}; + +using StreamMsg = cv::util::variant; struct GIslandExecutable::IInput: public GIslandExecutable::IODesc { virtual ~IInput() = default; virtual StreamMsg get() = 0; // Get a new input vector (blocking) @@ -169,9 +174,11 @@ struct GIslandExecutable::IInput: public GIslandExecutable::IODesc { }; struct GIslandExecutable::IOutput: public GIslandExecutable::IODesc { virtual ~IOutput() = default; - virtual GRunArgP get(int idx) = 0; // Allocate (wrap) a new data object for output idx - virtual void post(GRunArgP&&) = 0; // Release the object back to the framework (mark available) - virtual void post(EndOfStream&&) = 0; // Post end-of-stream marker back to the framework + virtual GRunArgP get(int idx) = 0; // Allocate (wrap) a new data object for output idx + virtual void post(GRunArgP&&, const std::exception_ptr& = {}) = 0; // Release the object back to the framework (mark available) + virtual void post(EndOfStream&&) = 0; // Post end-of-stream marker back to the framework + virtual void post(Exception&&) = 0; + // Assign accumulated metadata to the given output object. // This method can only be called after get() and before post(). diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp index 6c15d1dfc9..b7b0b5c2d0 100644 --- a/modules/gapi/src/executor/gexecutor.cpp +++ b/modules/gapi/src/executor/gexecutor.cpp @@ -270,6 +270,7 @@ class cv::gimpl::GExecutor::Output final: public cv::gimpl::GIslandExecutable::I { cv::gimpl::Mag &mag; std::unordered_map out_idx; + std::exception_ptr eptr; GRunArgP get(int idx) override { @@ -278,8 +279,18 @@ class cv::gimpl::GExecutor::Output final: public cv::gimpl::GIslandExecutable::I out_idx[cv::gimpl::proto::ptr(r)] = idx; return r; } - void post(GRunArgP&&) override { } // Do nothing here + void post(GRunArgP&&, const std::exception_ptr& e) override + { + if (e) + { + eptr = e; + } + } void post(EndOfStream&&) override {} // Do nothing here too + void post(Exception&& ex) override + { + eptr = std::move(ex.eptr); + } void meta(const GRunArgP &out, const GRunArg::Meta &m) override { const auto idx = out_idx.at(cv::gimpl::proto::ptr(out)); @@ -291,6 +302,14 @@ public: { set(rcs); } + + void verify() + { + if (eptr) + { + std::rethrow_exception(eptr); + } + } }; void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) @@ -389,6 +408,8 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) Input i{m_res, op.in_objects}; Output o{m_res, op.out_objects}; op.isl_exec->run(i, o); + // NB: Check if execution finished without exception. + o.verify(); } // (7) diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index a3a2746acc..34424cb94b 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -31,6 +31,8 @@ #include #include +#include + namespace { using namespace cv::gimpl::stream; @@ -310,14 +312,13 @@ class QueueReader const std::size_t this_id); public: - bool getInputVector (std::vector &in_queues, - cv::GRunArgs &in_constants, - cv::GRunArgs &isl_inputs); + cv::gimpl::StreamMsg getInputVector (std::vector &in_queues, + cv::GRunArgs &in_constants); - bool getResultsVector(std::vector &in_queues, - const std::vector &in_mapping, - const std::size_t out_size, - cv::GRunArgs &out_results); + using V = cv::util::variant; + V getResultsVector(std::vector &in_queues, + const std::vector &in_mapping, + const std::size_t out_size); }; void rewindToStop(std::vector &in_queues, @@ -369,9 +370,8 @@ void QueueReader::rewindToStop(std::vector &in_queues, ::rewindToStop(in_queues, this_id); } -bool QueueReader::getInputVector(std::vector &in_queues, - cv::GRunArgs &in_constants, - cv::GRunArgs &isl_inputs) +cv::gimpl::StreamMsg QueueReader::getInputVector(std::vector &in_queues, + cv::GRunArgs &in_constants) { // NB: Need to release resources from the previous step, to fetch new ones. // On some systems it might be impossible to allocate new memory @@ -381,72 +381,98 @@ bool QueueReader::getInputVector(std::vector &in_queues, // lifetime, keep the whole cmd vector (of size == # of inputs) // in memory. m_cmd.resize(in_queues.size()); - isl_inputs.resize(in_queues.size()); + cv::GRunArgs isl_inputs(in_queues.size()); + cv::optional exception; for (auto &&it : ade::util::indexed(in_queues)) { - auto id = ade::util::index(it); - auto &q = ade::util::value(it); + auto id = ade::util::index(it); + auto &q = ade::util::value(it); - if (q == nullptr) - { - GAPI_Assert(!in_constants.empty()); - // NULL queue means a graph-constant value (like a - // value-initialized scalar) - // It can also hold a constant value received with - // Stop::Kind::CNST message (see above). - isl_inputs[id] = in_constants[id]; - continue; - } + if (q == nullptr) + { + GAPI_Assert(!in_constants.empty()); + // NULL queue means a graph-constant value (like a + // value-initialized scalar) + // It can also hold a constant value received with + // Stop::Kind::CNST message (see above). + isl_inputs[id] = in_constants[id]; + continue; + } - q->pop(m_cmd[id]); - if (!cv::util::holds_alternative(m_cmd[id])) - { - isl_inputs[id] = cv::util::get(m_cmd[id]); - } - else // A Stop sign - { - const auto &stop = cv::util::get(m_cmd[id]); - if (stop.kind == Stop::Kind::CNST) - { - // We've got a Stop signal from a const source, - // propagated as a result of real stream reaching its - // end. Sometimes these signals come earlier than - // real EOS Stops so are deprioritized -- just - // remember the Const value here and continue - // processing other queues. Set queue pointer to - // nullptr and update the const_val vector - // appropriately - m_finishing = true; - in_queues[id] = nullptr; - in_constants.resize(in_queues.size()); - in_constants[id] = std::move(stop.cdata); + q->pop(m_cmd[id]); + switch (m_cmd[id].index()) + { + case Cmd::index_of(): + isl_inputs[id] = cv::util::get(m_cmd[id]); + break; + case Cmd::index_of(): + { + const auto &stop = cv::util::get(m_cmd[id]); + if (stop.kind == Stop::Kind::CNST) + { + // We've got a Stop signal from a const source, + // propagated as a result of real stream reaching its + // end. Sometimes these signals come earlier than + // real EOS Stops so are deprioritized -- just + // remember the Const value here and continue + // processing other queues. Set queue pointer to + // nullptr and update the const_val vector + // appropriately + m_finishing = true; + in_queues[id] = nullptr; + in_constants.resize(in_queues.size()); + in_constants[id] = std::move(stop.cdata); - // NEXT time (on a next call to getInputVector()), the - // "q==nullptr" check above will be triggered, but now - // we need to make it manually: - isl_inputs[id] = in_constants[id]; - } - else - { - GAPI_Assert(stop.kind == Stop::Kind::HARD); - rewindToStop(in_queues, id); - // After queues are read to the proper indicator, - // indicate end-of-stream - return false; - } // if(Cnst) - } // if(Stop) + // NEXT time (on a next call to getInputVector()), the + // "q==nullptr" check above will be triggered, but now + // we need to make it manually: + isl_inputs[id] = in_constants[id]; + } + else + { + GAPI_Assert(stop.kind == Stop::Kind::HARD); + rewindToStop(in_queues, id); + // After queues are read to the proper indicator, + // indicate end-of-stream + return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; + } // if(Cnst) + break; + } + case Cmd::index_of(): + { + exception = + cv::util::make_optional(cv::util::get(m_cmd[id])); + break; + } + default: + GAPI_Assert(false && "Unsupported cmd type in getInputVector()"); + } } // for(in_queues) + if (exception.has_value()) { + return cv::gimpl::StreamMsg{exception.value()}; + } + if (m_finishing) { // If the process is about to end (a soft Stop was received // already) and an island has no other inputs than constant // inputs, its queues may all become nullptrs. Indicate it as // "no data". - return !ade::util::all_of(in_queues, [](Q *ptr){return ptr == nullptr;}); + if (ade::util::all_of(in_queues, [](Q *ptr){return ptr == nullptr;})) { + return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; + } } - return true; // A regular case - there is data to process. + // A regular case - there is data to process + for (auto& arg : isl_inputs) { + if (arg.index() == cv::GRunArg::index_of()) { + arg = cv::GRunArg{ cv::make_rmat(cv::util::get(arg)) + , arg.meta + }; + } + } + return cv::gimpl::StreamMsg{std::move(isl_inputs)}; } // This is a special method to obtain a result vector @@ -474,33 +500,47 @@ bool QueueReader::getInputVector(std::vector &in_queues, // (_may be_ partially filled) to the same final output queue. // The receiver part at the GStreamingExecutor level won't change // because of that. -bool QueueReader::getResultsVector(std::vector &in_queues, - const std::vector &in_mapping, - const std::size_t out_size, - cv::GRunArgs &out_results) + +QueueReader::V QueueReader::getResultsVector(std::vector &in_queues, + const std::vector &in_mapping, + const std::size_t out_size) { + cv::GRunArgs out_results(out_size); m_cmd.resize(out_size); + cv::optional exception; for (auto &&it : ade::util::indexed(in_queues)) { auto ii = ade::util::index(it); auto oi = in_mapping[ii]; auto &q = ade::util::value(it); q->pop(m_cmd[oi]); - if (!cv::util::holds_alternative(m_cmd[oi])) - { - out_results[oi] = std::move(cv::util::get(m_cmd[oi])); - } - else // A Stop sign - { - // In theory, the CNST should never reach here. - // Collector thread never handles the inputs directly - // (collector's input queues are always produced by - // islands in the graph). - rewindToStop(in_queues, ii); - return false; - } // if(Stop) + + switch (m_cmd[oi].index()) { + case Cmd::index_of(): + out_results[oi] = std::move(cv::util::get(m_cmd[oi])); + break; + case Cmd::index_of(): + // In theory, the CNST should never reach here. + // Collector thread never handles the inputs directly + // (collector's input queues are always produced by + // islands in the graph). + rewindToStop(in_queues, ii); + return QueueReader::V(Stop{}); + case Cmd::index_of(): + exception = + cv::util::make_optional(cv::util::get(m_cmd[oi])); + break; + default: + cv::util::throw_error( + std::logic_error("Unexpected cmd kind in getResultsVector")); + } // switch } // for(in_queues) - return true; + + if (exception.has_value()) { + return QueueReader::V(exception.value()); + } + + return QueueReader::V(out_results); } @@ -521,7 +561,9 @@ void emitterActorThread(std::shared_ptr emitter, || cv::util::holds_alternative(cmd)); if (cv::util::holds_alternative(cmd)) { - for (auto &&oq : out_queues) oq->push(cmd); + for (auto &&oq : out_queues) { + oq->push(cmd); + } return; } @@ -547,10 +589,21 @@ void emitterActorThread(std::shared_ptr emitter, // Try to obtain next data chunk from the source cv::GRunArg data; - const bool result = [&](){ - GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl); - return emitter->pull(data); - }(); + bool result = false; + try { + result = [&](){ + GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl); + return emitter->pull(data); + }(); + } catch (...) { + auto eptr = std::current_exception(); + for (auto &&oq : out_queues) + { + oq->push(Cmd{cv::gimpl::Exception{eptr}}); + } + // NB: Go to the next iteration. + continue; + } if (result) { @@ -673,28 +726,8 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput std::vector &in_queues; // FIXME: This can be part of QueueReader cv::GRunArgs &in_constants; // FIXME: This can be part of QueueReader - virtual cv::gimpl::StreamMsg get() override - { - GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get"); - GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + cv::optional last_read_msg; - cv::GRunArgs isl_input_args; - - if (!qr.getInputVector(in_queues, in_constants, isl_input_args)) - { - // Stop case - return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; - } - // Wrap all input cv::Mats with RMats - for (auto& arg : isl_input_args) { - if (arg.index() == cv::GRunArg::index_of()) { - arg = cv::GRunArg{ cv::make_rmat(cv::util::get(arg)) - , arg.meta - }; - } - } - return cv::gimpl::StreamMsg{std::move(isl_input_args)}; - } virtual cv::gimpl::StreamMsg try_get() override { // FIXME: This is not very usable at the moment! @@ -709,17 +742,43 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput { set(in_descs); } + + const cv::gimpl::StreamMsg& read() + { + GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::read"); + GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + + last_read_msg = + cv::optional( + qr.getInputVector(in_queues, in_constants)); + return last_read_msg.value(); + } + + virtual cv::gimpl::StreamMsg get() override + { + GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get"); + GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + + if (!last_read_msg.has_value()) { + (void)read(); + } + auto msg = std::move(last_read_msg.value()); + last_read_msg = cv::optional(); + return msg; + } }; class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput { // These objects form an internal state of the StreamingOutput struct Posting - { - using V = cv::util::variant; - V data; - bool ready = false; - }; + { + using V = cv::util::variant; + V data; + bool ready = false; + }; using PostingList = std::list; std::vector m_postings; std::unordered_map< const void* @@ -820,7 +879,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput return ret_val; } - virtual void post(cv::GRunArgP&& argp) override + virtual void post(cv::GRunArgP&& argp, const std::exception_ptr& exptr) override { GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_post_hndl, "StreamingOutput::post"); GAPI_ITT_AUTO_TRACE_GUARD(outputs_post_hndl); @@ -834,6 +893,9 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput const int out_idx = it->second.first; const auto out_iter = it->second.second; out_iter->ready = true; + if (exptr) { + out_iter->data = cv::gimpl::Exception{exptr}; + } m_postIdx.erase(it); // Drop the link from the cache anyway if (out_iter != m_postings[out_idx].begin()) { @@ -845,16 +907,22 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput while (post_iter != m_postings[out_idx].end() && post_iter->ready == true) { Cmd cmd; - if (cv::util::holds_alternative(post_iter->data)) + switch (post_iter->data.index()) { - cmd = Cmd{cv::util::get(post_iter->data)}; - } - else - { - GAPI_Assert(cv::util::holds_alternative(post_iter->data)); - cmd = Cmd{Stop{}}; - m_stops_sent++; + case Posting::V::index_of(): + cmd = Cmd{cv::util::get(post_iter->data)}; + break; + case Posting::V::index_of(): + cmd = Cmd{cv::util::get(post_iter->data)}; + break; + case Posting::V::index_of(): + cmd = Cmd{Stop{}}; + m_stops_sent++; + break; + default: + GAPI_Assert(false && "Unreachable code"); } + for (auto &&q : m_out_queues[out_idx]) { q->push(cmd); @@ -889,6 +957,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput } } } + void meta(const cv::GRunArgP &out, const cv::GRunArg::Meta &m) override { std::lock_guard lock{m_mutex}; @@ -919,6 +988,32 @@ public: // when it posted/resent all STOP messages to all its outputs. return m_stops_sent == desc().size(); } + + virtual void post(cv::gimpl::Exception&& error) override + { + std::lock_guard lock{m_mutex}; + // If the posting list is empty, just broadcast the stop message. + // If it is not, enqueue the Stop message in the postings list. + for (auto &&it : ade::util::indexed(m_postings)) + { + const auto idx = ade::util::index(it); + auto &lst = ade::util::value(it); + if (lst.empty()) + { + for (auto &&q : m_out_queues[idx]) + { + q->push(Cmd(std::move(error))); + } + } + else + { + Posting p; + p.data = Posting::V{std::move(error)}; + p.ready = true; + lst.push_back(std::move(p)); // FIXME: For some reason {}-ctor didn't work here + } + } + } }; // This thread is a plain dumb processing actor. What it do is just: @@ -947,7 +1042,17 @@ void islandActorThread(std::vector in_rcs, while (!output.done()) { GAPI_ITT_AUTO_TRACE_GUARD(island_hndl); - island_exec->run(input, output); + // NB: In case the input message is an cv::gimpl::Exception + // handle it in a general way. + if (cv::util::holds_alternative(input.read())) + { + auto in_msg = input.get(); + output.post(std::move(cv::util::get(in_msg))); + } + else + { + island_exec->run(input, output); + } } } @@ -984,26 +1089,33 @@ void collectorThread(std::vector in_queues, while (true) { GAPI_ITT_AUTO_TRACE_GUARD(collector_hndl); - cv::GRunArgs this_result(out_size); - const bool ok = [&](){ + const auto result = [&](){ GAPI_ITT_AUTO_TRACE_GUARD(collector_get_results_hndl); - return qr.getResultsVector(in_queues, in_mapping, out_size, this_result); + return qr.getResultsVector(in_queues, in_mapping, out_size); }(); - if (!ok) + switch (result.index()) { - if (handle_stop) + case QueueReader::V::index_of(): { - out_queue.push(Cmd{Stop{}}); + GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl); + auto this_result = cv::util::get(result); + out_queue.push(Cmd{Result{std::move(this_result), flags}}); + break; } - // Terminate the thread anyway - return; - } - - { - GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl); - out_queue.push(Cmd{Result{std::move(this_result), flags}}); + case QueueReader::V::index_of(): + if (handle_stop) + { + out_queue.push(Cmd{Stop{}}); + } + // Terminate the thread anyway + return; + case QueueReader::V::index_of(): + out_queue.push(Cmd{cv::util::get(result)}); + break; + default: + GAPI_Assert(false && "Unreachable code"); } } } @@ -1707,16 +1819,24 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs) Cmd cmd; m_out_queue.pop(cmd); - if (cv::util::holds_alternative(cmd)) - { - wait_shutdown(); - return false; + switch (cmd.index()) { + case Cmd::index_of(): + wait_shutdown(); + return false; + case Cmd::index_of(): { + GAPI_Assert(cv::util::holds_alternative(cmd)); + cv::GRunArgs &this_result = cv::util::get(cmd).args; + sync_data(this_result, outs); + return true; + } + case Cmd::index_of(): { + std::rethrow_exception(cv::util::get(cmd).eptr); + return true; + } + default: + GAPI_Assert(false && "Unsupported cmd type in pull"); } - - GAPI_Assert(cv::util::holds_alternative(cmd)); - cv::GRunArgs &this_result = cv::util::get(cmd).args; - sync_data(this_result, outs); - return true; + GAPI_Assert(false && "Unreachable code"); } bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs) @@ -1734,15 +1854,20 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs) Cmd cmd; m_out_queue.pop(cmd); - if (cv::util::holds_alternative(cmd)) - { - wait_shutdown(); - return false; + switch (cmd.index()) { + case Cmd::index_of(): + wait_shutdown(); + return false; + case Cmd::index_of(): { + sync_data(cv::util::get(cmd), outs); + return true; + } + case Cmd::index_of(): { + std::rethrow_exception(cv::util::get(cmd).eptr); + return true; + } } - - GAPI_Assert(cv::util::holds_alternative(cmd)); - sync_data(cv::util::get(cmd), outs); - return true; + GAPI_Assert(false && "Unreachable code"); } std::tuple> cv::gimpl::GStreamingExecutor::pull() diff --git a/modules/gapi/src/executor/gstreamingexecutor.hpp b/modules/gapi/src/executor/gstreamingexecutor.hpp index b4aadcbbaf..da27f6a646 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.hpp +++ b/modules/gapi/src/executor/gstreamingexecutor.hpp @@ -50,11 +50,12 @@ struct Result { using Cmd = cv::util::variant < cv::util::monostate - , Start // Tells emitters to start working. Not broadcasted to workers. - , Stop // Tells emitters to stop working. Broadcasted to workers. - , cv::GRunArg // Workers data payload to process. - , Result // Pipeline's data for gout() - >; + , Start // Tells emitters to start working. Not broadcasted to workers. + , Stop // Tells emitters to stop working. Broadcasted to workers. + , cv::GRunArg // Workers data payload to process. + , Result // Pipeline's data for gout() + , cv::gimpl::Exception // Exception which is thrown while execution. + >; // Interface over a queue. The underlying queue implementation may be // different. This class is mainly introduced to bring some diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 8dc23a3880..3741438373 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -2915,6 +2915,47 @@ TEST(Infer, ModelWith2DInputs) #endif // HAVE_NGRAPH +TEST(TestAgeGender, ThrowBlobAndInputPrecisionMismatchStreaming) +{ + const std::string device = "MYRIAD"; + skipIfDeviceNotAvailable(device); + + initDLDTDataPath(); + + cv::gapi::ie::detail::ParamDesc params; + // NB: Precision for inputs is U8. + params.model_path = compileAgeGenderBlob(device); + params.device_id = device; + + // Configure & run G-API + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + cv::GMat in, age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(age, gender)) + .compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + + cv::Mat in_mat(320, 240, CV_32FC3); + cv::randu(in_mat, 0, 1); + cv::Mat gapi_age, gapi_gender; + + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + // NB: Blob precision is U8, but user pass FP32 data, so exception will be thrown. + // Now exception comes directly from IE, but since G-API has information + // about data precision at the compile stage, consider the possibility of + // throwing exception from there. + for (int i = 0; i < 10; ++i) { + EXPECT_ANY_THROW(pipeline.pull(cv::gout(gapi_age, gapi_gender))); + } +} + } // namespace opencv_test #endif // HAVE_INF_ENGINE diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index 4d33d4b0c5..ffa1d452c1 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -304,6 +304,66 @@ void checkPullOverload(const cv::Mat& ref, EXPECT_EQ(0., cv::norm(ref, out_mat, cv::NORM_INF)); } +class InvalidSource : public cv::gapi::wip::IStreamSource { +public: + InvalidSource(const size_t throw_every_nth_frame, + const size_t num_frames) + : m_throw_every_nth_frame(throw_every_nth_frame), + m_curr_frame_id(0u), + m_num_frames(num_frames), + m_mat(1, 1, CV_8U) { + } + + static std::string exception_msg() + { + return "InvalidSource sucessfuly failed!"; + } + + bool pull(cv::gapi::wip::Data& d) { + ++m_curr_frame_id; + if (m_curr_frame_id > m_num_frames) { + return false; + } + + if (m_curr_frame_id % m_throw_every_nth_frame == 0) { + throw std::logic_error(InvalidSource::exception_msg()); + return true; + } else { + d = cv::Mat(m_mat); + } + + return true; + } + + cv::GMetaArg descr_of() const override { + return cv::GMetaArg{cv::descr_of(m_mat)}; + } + +private: + size_t m_throw_every_nth_frame; + size_t m_curr_frame_id; + size_t m_num_frames; + cv::Mat m_mat; +}; + +G_TYPED_KERNEL(GThrowExceptionOp, , "org.opencv.test.throw_error_op") +{ + static GMatDesc outMeta(GMatDesc in) { return in; } +}; + +GAPI_OCV_KERNEL(GThrowExceptionKernel, GThrowExceptionOp) +{ + static std::string exception_msg() + { + return "GThrowExceptionKernel sucessfuly failed"; + } + + static void run(const cv::Mat&, cv::Mat&) + { + throw std::logic_error(GThrowExceptionKernel::exception_msg()); + } +}; + } // anonymous namespace TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat) @@ -2512,5 +2572,109 @@ TEST(GAPI_Streaming, TestDesyncMediaFrameGray) { } } +TEST(GAPI_Streaming_Exception, SingleKernelThrow) { + cv::GMat in; + auto pipeline = cv::GComputation(in, GThrowExceptionOp::on(in)) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, StreamingBackendExceptionAsInput) { + cv::GMat in; + auto pipeline = cv::GComputation(in, + cv::gapi::copy(GThrowExceptionOp::on(in))) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, RegularBacckendsExceptionAsInput) { + cv::GMat in; + auto pipeline = cv::GComputation(in, + cv::gapi::add(GThrowExceptionOp::on(in), GThrowExceptionOp::on(in))) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, SourceThrow) { + cv::GMat in; + auto pipeline = cv::GComputation(in, cv::gapi::copy(in)).compileStreaming(); + + pipeline.setSource(std::make_shared(1u, 1u)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(InvalidSource::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, SourceThrowEverySecondFrame) { + constexpr size_t throw_every_nth_frame = 2u; + constexpr size_t num_frames = 10u; + size_t curr_frame = 0; + bool has_frame = true; + cv::Mat out_mat; + + cv::GMat in; + auto pipeline = cv::GComputation(in, cv::gapi::copy(in)).compileStreaming(); + + pipeline.setSource(std::make_shared(throw_every_nth_frame, num_frames)); + pipeline.start(); + while (has_frame) { + ++curr_frame; + try { + has_frame = pipeline.pull(cv::gout(out_mat)); + } catch (const std::exception& e) { + EXPECT_TRUE(curr_frame % throw_every_nth_frame == 0); + EXPECT_EQ(InvalidSource::exception_msg(), e.what()); + } + } + + // NB: Pull was called num_frames + 1(stop). + EXPECT_EQ(num_frames, curr_frame - 1); +} } // namespace opencv_test From 9dd8e4df7fc9c0cf79c50c6f46e8aa389362ff02 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 25 Mar 2022 11:22:45 +0300 Subject: [PATCH 43/84] Merge pull request #21719 from TolyaTalamanov:at/pipeline_modeling_tool-skip-frames-for-nodes [G-API] Pipeline modeling tool: Skip frames * Add skip feature * Refactoring * Fix warning * Put more comments * Fix comments to review * Agregate common params into structure * Fix warning * Clean up & add test * Add assert * Fix warning on Mac * Update modules/gapi/samples/pipeline_modeling_tool.cpp Co-authored-by: Dmitry Matveev --- .../gapi/samples/pipeline_modeling_tool.cpp | 70 +++++-- .../pipeline_builder.hpp | 192 ++++++++++++++---- .../test_pipeline_modeling_tool.py | 26 +++ 3 files changed, 232 insertions(+), 56 deletions(-) diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp index 4ff2cbd82c..2ed9642256 100644 --- a/modules/gapi/samples/pipeline_modeling_tool.cpp +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -173,6 +173,50 @@ static PLMode strToPLMode(const std::string& mode_str) { } } +template <> +CallParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + // FIXME: Impossible to read size_t due OpenCV limitations. + auto call_every_nth_opt = readOpt(fn["call_every_nth"]); + auto call_every_nth = call_every_nth_opt.value_or(1); + if (call_every_nth <= 0) { + throw std::logic_error( + name + " call_every_nth must be greater than zero\n" + "Current call_every_nth: " + std::to_string(call_every_nth)); + } + return CallParams{std::move(name), static_cast(call_every_nth)}; +} + +template <> +InferParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + + InferParams params; + params.path = read(fn); + params.device = check_and_read(fn, "device", name); + params.input_layers = readList(fn, "input_layers", name); + params.output_layers = readList(fn, "output_layers", name); + + return params; +} + +template <> +DummyParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + + DummyParams params; + params.time = check_and_read(fn, "time", name); + if (params.time < 0) { + throw std::logic_error(name + " time must be positive"); + } + params.output = check_and_read(fn, "output", name); + + return params; +} + static std::vector parseExecList(const std::string& exec_list) { std::vector pl_types; std::stringstream ss(exec_list); @@ -316,31 +360,17 @@ int main(int argc, char* argv[]) { if (!nodes_fn.isSeq()) { throw std::logic_error("nodes in " + name + " must be a sequence"); } + for (auto node_fn : nodes_fn) { - auto node_name = - check_and_read(node_fn, "name", "node"); + auto call_params = read(node_fn); auto node_type = check_and_read(node_fn, "type", "node"); if (node_type == "Dummy") { - auto time = - check_and_read(node_fn, "time", node_name); - if (time < 0) { - throw std::logic_error(node_name + " time must be positive"); - } - auto output = - check_and_read(node_fn, "output", node_name); - builder.addDummy(node_name, time, output); + builder.addDummy(call_params, read(node_fn)); } else if (node_type == "Infer") { - InferParams params; - params.path = read(node_fn); - params.device = - check_and_read(node_fn, "device", node_name); - params.input_layers = - readList(node_fn, "input_layers", node_name); - params.output_layers = - readList(node_fn, "output_layers", node_name); - params.config = config; - builder.addInfer(node_name, params); + auto infer_params = read(node_fn); + infer_params.config = config; + builder.addInfer(call_params, infer_params); } else { throw std::logic_error("Unsupported node type: " + node_type); } diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp index a4f69b60ad..3906ae4f4c 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -23,11 +23,16 @@ struct Edge { P dst; }; +struct CallParams { + std::string name; + size_t call_every_nth; +}; + struct CallNode { using F = std::function; - std::string name; - F run; + CallParams params; + F run; }; struct DataNode { @@ -44,6 +49,80 @@ struct Node { Kind kind; }; +struct SubGraphCall { + G_API_OP(GSubGraph, + , + "custom.subgraph") { + static cv::GMatDesc outMeta(const cv::GMatDesc& in, + cv::GComputation comp, + cv::GCompileArgs compile_args, + const size_t call_every_nth) { + GAPI_Assert(call_every_nth > 0); + auto out_metas = + comp.compile(in, std::move(compile_args)).outMetas(); + GAPI_Assert(out_metas.size() == 1u); + GAPI_Assert(cv::util::holds_alternative(out_metas[0])); + return cv::util::get(out_metas[0]); + } + + }; + + struct SubGraphState { + cv::Mat last_result; + cv::GCompiled cc; + int call_counter = 0; + }; + + GAPI_OCV_KERNEL_ST(SubGraphImpl, GSubGraph, SubGraphState) { + static void setup(const cv::GMatDesc& in, + cv::GComputation comp, + cv::GCompileArgs compile_args, + const size_t /*call_every_nth*/, + std::shared_ptr& state, + const cv::GCompileArgs& /*args*/) { + state.reset(new SubGraphState{}); + state->cc = comp.compile(in, std::move(compile_args)); + auto out_desc = + cv::util::get(state->cc.outMetas()[0]); + utils::createNDMat(state->last_result, + out_desc.dims, + out_desc.depth); + } + + static void run(const cv::Mat& in, + cv::GComputation /*comp*/, + cv::GCompileArgs /*compile_args*/, + const size_t call_every_nth, + cv::Mat& out, + SubGraphState& state) { + // NB: Make a call on the first iteration and skip the furthers. + if (state.call_counter == 0) { + state.cc(in, state.last_result); + } + state.last_result.copyTo(out); + state.call_counter = (state.call_counter + 1) % call_every_nth; + } + }; + + void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs); + + size_t numInputs() const { return 1; } + size_t numOutputs() const { return 1; } + + cv::GComputation comp; + cv::GCompileArgs compile_args; + size_t call_every_nth; +}; + +void SubGraphCall::operator()(const cv::GProtoArgs& inputs, + cv::GProtoArgs& outputs) { + GAPI_Assert(inputs.size() == 1u); + GAPI_Assert(cv::util::holds_alternative(inputs[0])); + GAPI_Assert(outputs.empty()); + auto in = cv::util::get(inputs[0]); + outputs.emplace_back(GSubGraph::on(in, comp, compile_args, call_every_nth)); +} + struct DummyCall { G_API_OP(GDummy, , @@ -166,6 +245,11 @@ struct ImportPath { using ModelPath = cv::util::variant; +struct DummyParams { + double time; + OutputDescr output; +}; + struct InferParams { std::string name; ModelPath path; @@ -178,11 +262,11 @@ struct InferParams { class PipelineBuilder { public: PipelineBuilder(); - void addDummy(const std::string& name, - const double time, - const OutputDescr& output); + void addDummy(const CallParams& call_params, + const DummyParams& dummy_params); - void addInfer(const std::string& name, const InferParams& params); + void addInfer(const CallParams& call_params, + const InferParams& infer_params); void setSource(const std::string& name, std::shared_ptr src); @@ -197,8 +281,8 @@ public: private: template - void addCall(const std::string& name, - CallT&& call); + void addCall(const CallParams& call_params, + CallT&& call); Pipeline::Ptr construct(); @@ -226,20 +310,21 @@ private: PipelineBuilder::PipelineBuilder() : m_state(new State{}) { }; -void PipelineBuilder::addDummy(const std::string& name, - const double time, - const OutputDescr& output) { +void PipelineBuilder::addDummy(const CallParams& call_params, + const DummyParams& dummy_params) { m_state->kernels.include(); - addCall(name, DummyCall{time, output}); + addCall(call_params, + DummyCall{dummy_params.time, dummy_params.output}); } template -void PipelineBuilder::addCall(const std::string& name, - CallT&& call) { +void PipelineBuilder::addCall(const CallParams& call_params, + CallT&& call) { size_t num_inputs = call.numInputs(); size_t num_outputs = call.numOutputs(); - Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{name, std::move(call)}}}); + Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{call_params, + std::move(call)}}}); // NB: Create placeholders for inputs. call_node->in_nodes.resize(num_inputs); // NB: Create outputs with empty data. @@ -249,36 +334,39 @@ void PipelineBuilder::addCall(const std::string& name, Node::Kind{DataNode{}}}); } - auto it = m_state->calls_map.find(name); + auto it = m_state->calls_map.find(call_params.name); if (it != m_state->calls_map.end()) { - throw std::logic_error("Node: " + name + " already exists!"); + throw std::logic_error("Node: " + call_params.name + " already exists!"); } - m_state->calls_map.emplace(name, call_node); + m_state->calls_map.emplace(call_params.name, call_node); m_state->all_calls.emplace_back(call_node); } -void PipelineBuilder::addInfer(const std::string& name, - const InferParams& params) { +void PipelineBuilder::addInfer(const CallParams& call_params, + const InferParams& infer_params) { // NB: No default ctor for Params. std::unique_ptr> pp; - if (cv::util::holds_alternative(params.path)) { - auto load_path = cv::util::get(params.path); - pp.reset(new cv::gapi::ie::Params(name, + if (cv::util::holds_alternative(infer_params.path)) { + auto load_path = cv::util::get(infer_params.path); + pp.reset(new cv::gapi::ie::Params(call_params.name, load_path.xml, load_path.bin, - params.device)); + infer_params.device)); } else { - GAPI_Assert(cv::util::holds_alternative(params.path)); - auto import_path = cv::util::get(params.path); - pp.reset(new cv::gapi::ie::Params(name, + GAPI_Assert(cv::util::holds_alternative(infer_params.path)); + auto import_path = cv::util::get(infer_params.path); + pp.reset(new cv::gapi::ie::Params(call_params.name, import_path.blob, - params.device)); + infer_params.device)); } - pp->pluginConfig(params.config); + pp->pluginConfig(infer_params.config); m_state->networks += cv::gapi::networks(*pp); - addCall(name, InferCall{name, params.input_layers, params.output_layers}); + addCall(call_params, + InferCall{call_params.name, + infer_params.input_layers, + infer_params.output_layers}); } void PipelineBuilder::addEdge(const Edge& edge) { @@ -318,7 +406,7 @@ void PipelineBuilder::setSource(const std::string& name, std::shared_ptr src) { GAPI_Assert(!m_state->src && "Only single source pipelines are supported!"); m_state->src = src; - addCall(name, SourceCall{}); + addCall(CallParams{name, 1u/*call_every_nth*/}, SourceCall{}); } void PipelineBuilder::setMode(PLMode mode) { @@ -405,7 +493,7 @@ Pipeline::Ptr PipelineBuilder::construct() { if (in_data_node.expired()) { const auto& call = cv::util::get(call_node->kind); throw std::logic_error( - "Node: " + call.name + " in Pipeline: " + m_state->name + + "Node: " + call.params.name + " in Pipeline: " + m_state->name + " has dangling input by in port: " + std::to_string(i)); } } @@ -424,8 +512,14 @@ Pipeline::Ptr PipelineBuilder::construct() { sorted_calls.push_back(n); } } + + m_state->kernels.include(); + m_state->compile_args.emplace_back(m_state->networks); + m_state->compile_args.emplace_back(m_state->kernels); + // (2). Go through every call node. for (auto call_node : sorted_calls) { + auto& call = cv::util::get(call_node->kind); cv::GProtoArgs outputs; cv::GProtoArgs inputs; for (size_t i = 0; i < call_node->in_nodes.size(); ++i) { @@ -437,8 +531,37 @@ Pipeline::Ptr PipelineBuilder::construct() { // (3). Extract proto input from every input node. inputs.push_back(in_data.arg.value()); } + // NB: If node shouldn't be called on each iterations, + // it should be wrapped into subgraph which is able to skip calling. + if (call.params.call_every_nth != 1u) { + // FIXME: Limitation of the subgraph operation (). + // G-API doesn't support dynamic number of inputs/outputs. + if (inputs.size() > 1u) { + throw std::logic_error( + "skip_frame_nth is supported only for single input subgraphs\n" + "Current subgraph has " + std::to_string(inputs.size()) + " inputs"); + } + + if (outputs.size() > 1u) { + throw std::logic_error( + "skip_frame_nth is supported only for single output subgraphs\n" + "Current subgraph has " + std::to_string(inputs.size()) + " outputs"); + } + // FIXME: Should be generalized. + // Now every subgraph contains only single node + // which has single input/output. + GAPI_Assert(cv::util::holds_alternative(inputs[0])); + cv::GProtoArgs subgr_inputs{cv::GProtoArg{cv::GMat()}}; + cv::GProtoArgs subgr_outputs; + call.run(subgr_inputs, subgr_outputs); + auto comp = cv::GComputation(cv::GProtoInputArgs{subgr_inputs}, + cv::GProtoOutputArgs{subgr_outputs}); + call = CallNode{CallParams{call.params.name, 1u/*call_every_nth*/}, + SubGraphCall{std::move(comp), + m_state->compile_args, + call.params.call_every_nth}}; + } // (4). Run call and get outputs. - auto call = cv::util::get(call_node->kind); call.run(inputs, outputs); // (5) If call node doesn't have inputs // it means that it's input producer node (Source). @@ -460,9 +583,6 @@ Pipeline::Ptr PipelineBuilder::construct() { } } - m_state->compile_args.emplace_back(m_state->networks); - m_state->compile_args.emplace_back(m_state->kernels); - if (m_state->mode == PLMode::STREAMING) { GAPI_Assert(graph_inputs.size() == 1); GAPI_Assert(cv::util::holds_alternative(graph_inputs[0])); diff --git a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py index f36d0efc3b..d56a0399e9 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py +++ b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py @@ -956,3 +956,29 @@ Pipelines: exec_str = '{} --cfg={} --pl_mode=streaming --drop_frames'.format(pipeline_modeling_tool, cfg_file) out = get_output(exec_str) assert out.startswith('--drop_frames option is supported only for pipelines in "regular" mode') + + +def test_incorrect_call_every_nth(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + call_every_nth: {}\" """ + + error = 'Node0 call_every_nth must be greater than zero\nCurrent call_every_nth: {}' + + def check(cfg_file, call_every_nth): + out = get_output('{} --cfg={}'.format(pipeline_modeling_tool, cfg_file.format(call_every_nth))) + assert out.startswith(error.format(call_every_nth)) + + check(cfg_file, -3) + check(cfg_file, 0) From 4d46958c82c5bf102488775004e7960edca4b42e Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Fri, 25 Mar 2022 15:36:31 +0300 Subject: [PATCH 44/84] fix: inline namespace handling in header parser `inline namespace` should be skipped in header parser namespaces list. Example: ```cpp namespace cv { inline namespace inlined { namespace inner { // content } // namespace inner } // namespace inlined } // namespace cv ``` Before fix `inner` is registered as `cv..inner` After fix: `cv.inner` --- modules/python/src2/hdr_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index 0c3360fcbc..ebe13f05c7 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -1002,7 +1002,7 @@ class CppHeaderParser(object): docstring = "" if stmt_type == "namespace": chunks = [block[1] for block in self.block_stack if block[0] == 'namespace'] + [name] - self.namespaces.add('.'.join(chunks)) + self.namespaces.add('.'.join(filter(lambda c: len(c)> 0, chunks))) else: stmt_type, name, parse_flag = "block", "", False From e5bdab0355466af2e2c2a61950a7d5aef3498096 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Fri, 25 Mar 2022 18:11:01 +0300 Subject: [PATCH 45/84] Merge pull request #21728 from anna-khakimova:ak/resize_f32c1_avx_simd GAPI Fluid: SIMD AVX2 Resize F32C1. * GAPI Fluid: Resize F32C1 scalar. * Final version * GAPI Fluid: SIMD AVX2 for Resize F32C1. * Applied comments. * Deleted warning suppression. * Applied comments. --- .../gapi/src/backends/fluid/gfluidimgproc.cpp | 20 ++ .../fluid/gfluidimgproc_simd_avx2.hpp | 181 ++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index a2805b35aa..bdd11b1214 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -25,6 +25,9 @@ #include "gfluidimgproc_func.hpp" +#if CV_AVX2 +#include "gfluidimgproc_simd_avx2.hpp" +#endif #if CV_SSE4_1 #include "gfluidcore_simd_sse41.hpp" #endif @@ -2132,11 +2135,25 @@ CV_ALWAYS_INLINE void calcRowLinear(const cv::gapi::fluid::View& in, { auto index0 = mapsy[outY + l] - inY; auto index1 = mapsy[outSz.height + outY + l] - inY; + src0[l] = in.InLine(index0); src1[l] = in.InLine(index1); dst[l] = out.OutLine(l); } +#if CV_AVX2 + // number floats in AVX2 SIMD vector. + constexpr int nlanes = 8; + + if (inSz.width >= nlanes && outSz.width >= nlanes) + { + avx2::calcRowLinear32FC1Impl(dst, src0, src1, alpha, mapsx, beta, + inSz, outSz, lpi); + + return; + } +#endif // CV_AVX2 + using alpha_type = typename Mapper::alpha_type; for (int l = 0; l < lpi; ++l) { @@ -2150,6 +2167,7 @@ CV_ALWAYS_INLINE void calcRowLinear(const cv::gapi::fluid::View& in, auto alpha1 = saturate_cast(unity - alpha[x]); auto sx0 = mapsx[x]; auto sx1 = sx0 + 1; + float tmp0 = resize_main_calculation(b0, src0[l][sx0], b1, src1[l][sx0]); float tmp1 = resize_main_calculation(b0, src0[l][sx1], b1, src1[l][sx1]); dst[l][x] = resize_main_calculation(alpha0, tmp0, alpha1, tmp1); @@ -2174,6 +2192,7 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) GAPI_Assert((in.depth == CV_8U && in.chan == 3) || (in.depth == CV_32F && in.chan == 1)); GAPI_Assert(interp == cv::INTER_LINEAR); + int outSz_w; int outSz_h; if (outSz.width == 0 || outSz.height == 0) @@ -2212,6 +2231,7 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) GAPI_Assert((in.meta().depth == CV_8U && in.meta().chan == 3) || (in.meta().depth == CV_32F && in.meta().chan == 1)); GAPI_Assert(interp == cv::INTER_LINEAR); + const int channels = in.meta().chan; const int depth = in.meta().depth; diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp new file mode 100644 index 0000000000..e246f0613b --- /dev/null +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp @@ -0,0 +1,181 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#if !defined(GAPI_STANDALONE) + +#include "opencv2/gapi/own/saturate.hpp" + +#include + +#include "opencv2/core.hpp" + +#include + +#include +#include + +#include +#include +#include + +namespace cv { +namespace gapi { +namespace fluid { +namespace avx2 { + +CV_ALWAYS_INLINE void v_gather_pairs(const float src[], const int* mapsx, + v_float32x8& low, v_float32x8& high) +{ + low.val = _mm256_castsi256_ps(_mm256_setr_epi64x(*reinterpret_cast(&src[mapsx[0]]), + *reinterpret_cast(&src[mapsx[1]]), + *reinterpret_cast(&src[mapsx[2]]), + *reinterpret_cast(&src[mapsx[3]]))); + high.val = _mm256_castsi256_ps(_mm256_setr_epi64x(*reinterpret_cast(&src[mapsx[4]]), + *reinterpret_cast(&src[mapsx[5]]), + *reinterpret_cast(&src[mapsx[6]]), + *reinterpret_cast(&src[mapsx[7]]))); +} + +CV_ALWAYS_INLINE void v_deinterleave(const v_float32x8& low, const v_float32x8& high, + v_float32x8& even, v_float32x8& odd) +{ + __m256 tmp0 = _mm256_unpacklo_ps(low.val, high.val); + __m256 tmp1 = _mm256_unpackhi_ps(low.val, high.val); + __m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); + __m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); + even.val = _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(tmp2), 216 /*11011000*/)); + odd.val = _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(tmp3), 216 /*11011000*/)); +} + +// Resize (bi-linear, 32FC1) +CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[], + const float *src0[], + const float *src1[], + const float alpha[], + const int mapsx[], + const float beta[], + const Size& inSz, + const Size& outSz, + const int lpi) +{ + bool xRatioEq1 = inSz.width == outSz.width; + bool yRatioEq1 = inSz.height == outSz.height; + + constexpr int nlanes = v_float32x8::nlanes; + + if (!xRatioEq1 && !yRatioEq1) + { + for (int line = 0; line < lpi; ++line) { + float beta0 = beta[line]; + float beta1 = 1 - beta0; + v_float32x8 v_beta0 = v256_setall_f32(beta0); + int x = 0; + + v_float32x8 low1, high1, s00, s01; + v_float32x8 low2, high2, s10, s11; + for (; x <= outSz.width - nlanes; x += nlanes) + { + v_float32x8 alpha0 = v256_load(&alpha[x]); + // v_float32 alpha1 = 1.f - alpha0; + + v_gather_pairs(src0[line], &mapsx[x], low1, high1); + v_deinterleave(low1, high1, s00, s01); + + // v_float32 res0 = s00*alpha0 + s01*alpha1; + v_float32x8 res0 = v_fma(s00 - s01, alpha0, s01); + + v_gather_pairs(src1[line], &mapsx[x], low2, high2); + v_deinterleave(low2, high2, s10, s11); + + // v_float32 res1 = s10*alpha0 + s11*alpha1; + v_float32x8 res1 = v_fma(s10 - s11, alpha0, s11); + // v_float32 d = res0*beta0 + res1*beta1; + v_float32x8 d = v_fma(res0 - res1, v_beta0, res1); + + v_store(&dst[line][x], d); + } + + for (; x < outSz.width; ++x) + { + float alpha0 = alpha[x]; + float alpha1 = 1 - alpha0; + int sx0 = mapsx[x]; + int sx1 = sx0 + 1; + float res0 = src0[line][sx0] * alpha0 + src0[line][sx1] * alpha1; + float res1 = src1[line][sx0] * alpha0 + src1[line][sx1] * alpha1; + dst[line][x] = beta0 * res0 + beta1 * res1; + } + } + } + else if (!xRatioEq1) + { + + for (int line = 0; line < lpi; ++line) { + int x = 0; + + v_float32x8 low, high, s00, s01; + for (; x <= outSz.width - nlanes; x += nlanes) + { + v_float32x8 alpha0 = v256_load(&alpha[x]); + // v_float32 alpha1 = 1.f - alpha0; + + v_gather_pairs(src0[line], &mapsx[x], low, high); + v_deinterleave(low, high, s00, s01); + + // v_float32 d = s00*alpha0 + s01*alpha1; + v_float32x8 d = v_fma(s00 - s01, alpha0, s01); + + v_store(&dst[line][x], d); + } + + for (; x < outSz.width; ++x) { + float alpha0 = alpha[x]; + float alpha1 = 1 - alpha0; + int sx0 = mapsx[x]; + int sx1 = sx0 + 1; + dst[line][x] = src0[line][sx0] * alpha0 + src0[line][sx1] * alpha1; + } + } + + } + else if (!yRatioEq1) + { + int length = inSz.width; // == outSz.width + + for (int line = 0; line < lpi; ++line) { + float beta0 = beta[line]; + float beta1 = 1 - beta0; + v_float32x8 v_beta0 = v256_setall_f32(beta0); + int x = 0; + + for (; x <= length - nlanes; x += nlanes) + { + v_float32x8 s0 = v256_load(&src0[line][x]); + v_float32x8 s1 = v256_load(&src1[line][x]); + + // v_float32 d = s0*beta0 + s1*beta1; + v_float32x8 d = v_fma(s0 - s1, v_beta0, s1); + + v_store(&dst[line][x], d); + } + + for (; x < length; ++x) { + dst[line][x] = beta0 * src0[line][x] + beta1 * src1[line][x]; + } + } + + } + else + { + int length = inSz.width; // == outSz.width + memcpy(dst[0], src0[0], length * sizeof(float)*lpi); + } +} +} // namespace avx2 +} // namespace fliud +} // namespace gapi +} // namespace cv +#endif // !defined(GAPI_STANDALONE) From 386df457a9dd76663eb66c01fcf6faaa6f56d6f1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 27 Mar 2022 12:48:49 +0000 Subject: [PATCH 46/84] python: ensure publishing of subclasses before derived types --- modules/python/src2/gen2.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 79853648c5..1a9239c07f 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -1178,10 +1178,25 @@ class PythonWrapperGenerator(object): classlist1 = [(classinfo.decl_idx, name, classinfo) for name, classinfo in classlist] classlist1.sort() + published_types = set() # ensure toposort with base classes for decl_idx, name, classinfo in classlist1: if classinfo.ismap: continue - self.code_type_publish.write(classinfo.gen_def(self)) + def _registerType(classinfo): + if classinfo.decl_idx in published_types: + #print(classinfo.decl_idx, classinfo.name, ' - already published') + return + published_types.add(classinfo.decl_idx) + + if classinfo.base and classinfo.base in self.classes: + base_classinfo = self.classes[classinfo.base] + #print(classinfo.decl_idx, classinfo.name, ' - request publishing of base type ', base_classinfo.decl_idx, base_classinfo.name) + _registerType(base_classinfo) + + #print(classinfo.decl_idx, classinfo.name, ' - published!') + self.code_type_publish.write(classinfo.gen_def(self)) + + _registerType(classinfo) # step 3: generate the code for all the global functions From be38d4ea932bc3a0d06845ed1a2de84acc2a09de Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Wed, 30 Mar 2022 00:14:01 +0300 Subject: [PATCH 47/84] Merge pull request #21777 from anna-khakimova:ak/convertto_simd GAPI Fluid: SIMD for ConvertTo. * GAPI Fluid: SIMD for convertto. * Applied comments --- .../perf/cpu/gapi_core_perf_tests_fluid.cpp | 2 +- .../gapi/src/backends/fluid/gfluidcore.cpp | 135 ++----- .../fluid/gfluidcore_func.dispatch.cpp | 59 ++- .../src/backends/fluid/gfluidcore_func.hpp | 41 ++ .../backends/fluid/gfluidcore_func.simd.hpp | 354 ++++++++++++++++++ 5 files changed, 487 insertions(+), 104 deletions(-) diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index e4b8c0b490..83de793a81 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -324,7 +324,7 @@ INSTANTIATE_TEST_CASE_P(ConvertToPerfTestFluid, ConvertToPerfTest, Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1), Values(CV_8U, CV_16U, CV_16S, CV_32F), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(2.5, 1.0), + Values(1.0, 2.5), Values(0.0), Values(cv::compile_args(CORE_FLUID)))); diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index c5cfc19d48..7a8f1f5ed8 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -1555,102 +1555,43 @@ GAPI_FLUID_KERNEL(GFluidLUT, cv::gapi::core::GLUT, false) // //------------------------- -#if CV_SIMD128 -template -CV_ALWAYS_INLINE int run_convertto_simd(DST*, const SRC*, int) +template +CV_ALWAYS_INLINE void convertto_impl(const T in[], T out[], const int length) { - return 0; + memcpy(out, in, length * sizeof(T)); } -CV_ALWAYS_INLINE int run_convertto_simd(uchar *out, const float *in, const int length) + +template +CV_ALWAYS_INLINE void convertto_impl(const SRC in[], DST out[], const int length) { - int l = 0; - for (; l <= length - 16; l += 16) - { - v_int32x4 i0, i1, i2, i3; - i0 = v_round( v_load( (float*)& in[l ] ) ); - i1 = v_round( v_load( (float*)& in[l + 4] ) ); - i2 = v_round( v_load( (float*)& in[l + 8] ) ); - i3 = v_round( v_load( (float*)& in[l + 12] ) ); - - v_uint16x8 us0, us1; - us0 = v_pack_u(i0, i1); - us1 = v_pack_u(i2, i3); - - v_uint8x16 uc; - uc = v_pack(us0, us1); - v_store((uchar*)& out[l], uc); - } - return l; -} -CV_ALWAYS_INLINE int run_convertto_simd(ushort *out, const float *in, const int length) -{ - int l = 0; - for (; l <= length - 8; l += 8) - { - v_int32x4 i0, i1; - i0 = v_round( v_load( (float*)& in[l ] ) ); - i1 = v_round( v_load( (float*)& in[l + 4] ) ); - - v_uint16x8 us; - us = v_pack_u(i0, i1); - v_store((ushort*)& out[l], us); - } - return l; -} -#endif - -template::value && - std::is_floating_point::value, bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) -{ - // manual SIMD if need rounding - static_assert(std::is_same::value, "64-bit floating-point source is not supported"); - int l = 0; // cycle index -#if CV_SIMD128 - l = run_convertto_simd(out, in, length); + int x = 0; +#if CV_SIMD + x = convertto_simd(in, out, length); #endif // tail of SIMD cycle - for (; l < length; l++) + for (; x < length; ++x) { - out[l] = saturate(in[l], rintf); + out[x] = saturate(in[x], rintf); } } -template::value && - std::is_integral::value , bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) + +template +CV_ALWAYS_INLINE void convertto_impl(const SRC *in, DST* out, const float alpha, const float beta, + const int length) { - for (int l = 0; l < length; l++) + int x = 0; +#if CV_SIMD + x = convertto_scaled_simd(in, out, alpha, beta, length); +#endif + + for (; x < length; ++x) { - out[l] = saturate(in[l]); - } -} -template::value, bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) -{ - static_assert(!std::is_same::value, "64-bit floating-point source is not supported"); - for (int l = 0; l < length; l++) - { - out[l] = static_cast(in[l]); + out[x] = saturate(in[x] * alpha + beta, rintf); } } template -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const float alpha, const float beta, - const int length) -{ - static_assert(!std::is_same::value, "64-bit floating-point source is not supported"); - // TODO: optimize if alpha and beta and data are integral - for (int l = 0; l < length; l++) - { - out[l] = saturate(in[l] * alpha + beta, rintf); - } -} - -template -static void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta) +CV_ALWAYS_INLINE void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta) { const auto *in = src.InLine(0); auto *out = dst.OutLine(); @@ -1664,13 +1605,13 @@ static void run_convertto(Buffer &dst, const View &src, double _alpha, double _b const auto beta = static_cast( _beta ); // compute faster if no alpha no beta - if (1.f == alpha && 0.f == beta) + if ((std::fabs(alpha - 1.f) < FLT_EPSILON) && (std::fabs(beta) < FLT_EPSILON)) { - run_convertto(out, in, length); + convertto_impl(in, out, length); } else // if alpha or beta is non-trivial { - run_convertto(out, in, alpha, beta, length); + convertto_impl(in, out, alpha, beta, length); } } @@ -1681,22 +1622,22 @@ GAPI_FLUID_KERNEL(GFluidConvertTo, cv::gapi::core::GConvertTo, false) static void run(const View &src, int /*rtype*/, double alpha, double beta, Buffer &dst) { // DST SRC OP __VA_ARGS__ - UNARY_(uchar , uchar , run_convertto, dst, src, alpha, beta); - UNARY_(uchar , ushort, run_convertto, dst, src, alpha, beta); - UNARY_(uchar , short, run_convertto, dst, src, alpha, beta); - UNARY_(uchar , float, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(uchar, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, short, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, float, run_convertto, dst, src, alpha, beta); UNARY_(ushort, uchar , run_convertto, dst, src, alpha, beta); UNARY_(ushort, ushort, run_convertto, dst, src, alpha, beta); UNARY_(ushort, short, run_convertto, dst, src, alpha, beta); UNARY_(ushort, float, run_convertto, dst, src, alpha, beta); - UNARY_( short, uchar , run_convertto, dst, src, alpha, beta); - UNARY_( short, ushort, run_convertto, dst, src, alpha, beta); - UNARY_( short, short, run_convertto, dst, src, alpha, beta); - UNARY_( short, float, run_convertto, dst, src, alpha, beta); - UNARY_( float, uchar , run_convertto, dst, src, alpha, beta); - UNARY_( float, ushort, run_convertto, dst, src, alpha, beta); - UNARY_( float, short, run_convertto, dst, src, alpha, beta); - UNARY_( float, float, run_convertto, dst, src, alpha, beta); + UNARY_(short, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(short, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(short, short, run_convertto, dst, src, alpha, beta); + UNARY_(short, float, run_convertto, dst, src, alpha, beta); + UNARY_(float, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(float, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(float, short, run_convertto, dst, src, alpha, beta); + UNARY_(float, float, run_convertto, dst, src, alpha, beta); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index c235991fba..c9d329b2ff 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -293,9 +293,8 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], #define ADD_SIMD(SRC, DST) \ int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ { \ - \ - CV_CPU_DISPATCH(add_simd, (in1, in2, out, length), \ - CV_CPU_DISPATCH_MODES_ALL); \ + CV_CPU_DISPATCH(add_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ } ADD_SIMD(uchar, uchar) @@ -320,9 +319,8 @@ ADD_SIMD(float, float) #define SUB_SIMD(SRC, DST) \ int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ { \ - \ - CV_CPU_DISPATCH(sub_simd, (in1, in2, out, length), \ - CV_CPU_DISPATCH_MODES_ALL); \ + CV_CPU_DISPATCH(sub_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ } SUB_SIMD(uchar, uchar) @@ -344,6 +342,55 @@ SUB_SIMD(float, float) #undef SUB_SIMD +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length) \ +{ \ + CV_CPU_DISPATCH(convertto_simd, (in, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length) \ +{ \ + CV_CPU_DISPATCH(convertto_scaled_simd, (in, out, alpha, beta, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 3a5d70a045..81aa098b64 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -266,6 +266,47 @@ SUB_SIMD(float, float) #undef SUB_SIMD +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length); + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length); + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index c148f81e77..d1fe33fa2e 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -275,6 +275,47 @@ SUB_SIMD(float, float) #undef SUB_SIMD +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length); + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length); + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -289,6 +330,11 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#define SRC_SHORT_OR_USHORT std::is_same::value || std::is_same::value +#define DST_SHORT_OR_USHORT std::is_same::value || std::is_same::value +#define SRC_DST_SHORT_AND_USHORT (std::is_same::value && std::is_same::value) || (std::is_same::value && std::is_same::value) +#define SRC_DST_SHORT_OR_USHORT (std::is_same::value && std::is_same::value) || (std::is_same::value && std::is_same::value) + struct scale_tag {}; struct not_scale_tag {}; @@ -2778,6 +2824,314 @@ SUB_SIMD(float, float) #undef SUB_SIMD +//------------------------- +// +// Fluid kernels: ConvertTo +// +//------------------------- + +CV_ALWAYS_INLINE void store_i16(ushort* outx, const v_uint16& res) +{ + vx_store(outx, res); +} + +CV_ALWAYS_INLINE void store_i16(short* outx, const v_uint16& res) +{ + vx_store(outx, v_reinterpret_as_s16(res)); +} + +CV_ALWAYS_INLINE void store_i16(ushort* outx, const v_int16& res) +{ + vx_store(outx, v_reinterpret_as_u16(res)); +} + +CV_ALWAYS_INLINE void store_i16(short* outx, const v_int16& res) +{ + vx_store(outx, res); +} + +CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const float* inx, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int32 a1 = v_round(vx_load(inx)); + v_int32 a2 = v_round(vx_load(&inx[nlanes/4])); + v_int32 a3 = v_round(vx_load(&inx[nlanes/2])); + v_int32 a4 = v_round(vx_load(&inx[3*nlanes/4])); + + v_int16 r1 = v_pack(a1, a2); + v_int16 r2 = v_pack(a3, a4); + + vx_store(outx, v_pack_u(r1, r2)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const SRC* inx, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + vector_type_of_t a1 = vx_load(inx); + vector_type_of_t a2 = vx_load(&inx[nlanes/2]); + + pack_store_uchar(outx, a1, a2); +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const float* inx, DST* outx) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_int32 a1 = v_round(vx_load(inx)); + v_int32 a2 = v_round(vx_load(&inx[nlanes/2])); + + v_store_i16(outx, a1, a2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const uchar* inx, DST* outx) +{ + v_uint8 a = vx_load(inx); + v_uint16 res = v_expand_low(a); + + store_i16(outx, res); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const SRC* inx, DST* outx) +{ + vector_type_of_t a = vx_load(inx); + store_i16(outx, a); +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const SRC* inx, float* outx) +{ + v_float32 a = vg_load_f32(inx); + vx_store(outx, a); +} + +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= length - nlanes; x += nlanes) \ + { \ + convertto_simd_nocoeff_impl(&in[x], &out[x]); \ + } \ + if (x < length) \ + { \ + x = length - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const float* inx, uchar* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 a1 = vx_load(inx); + v_float32 a2 = vx_load(&inx[nlanes / 4]); + v_float32 a3 = vx_load(&inx[nlanes / 2]); + v_float32 a4 = vx_load(&inx[3 * nlanes / 4]); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(a3, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(a4, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const SRC* inx, uchar* outx, const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int16 a = v_reinterpret_as_s16(vx_load(inx)); + v_int16 b = v_reinterpret_as_s16(vx_load(&inx[nlanes / 2])); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + v_float32 b1 = v_cvt_f32(v_expand_low(b)); + v_float32 b2 = v_cvt_f32(v_expand_high(b)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(b1, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(b2, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const uchar* inx, uchar* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_uint8 a = vx_load(inx); + v_int16 a1 = v_reinterpret_as_s16(v_expand_low(a)); + v_int16 a2 = v_reinterpret_as_s16(v_expand_high(a)); + + v_float32 f1 = v_cvt_f32(v_expand_low(a1)); + v_float32 f2 = v_cvt_f32(v_expand_high(a1)); + + v_float32 f3 = v_cvt_f32(v_expand_low(a2)); + v_float32 f4 = v_cvt_f32(v_expand_high(a2)); + + v_int32 r1 = v_round(v_fma(f1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(f2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(f3, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(f4, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const float* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_float32 a1 = vx_load(inx); + v_float32 a2 = vx_load(&inx[nlanes / 2]); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const uchar* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_int16 a = v_reinterpret_as_s16(vx_load_expand(inx)); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const SRC* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_int16 a = v_reinterpret_as_s16(vx_load(inx)); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const SRC* inx, float* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_float32 a = vg_load_f32(inx); + vx_store(outx, v_fma(a, v_alpha, v_beta)); +} + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + v_float32 v_alpha = vx_setall_f32(alpha); \ + v_float32 v_beta = vx_setall_f32(beta); \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= length - nlanes; x += nlanes) \ + { \ + convertto_scaled_simd_impl(&in[x], &out[x], v_alpha, v_beta); \ + } \ + if (x < length) \ + { \ + x = length - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END From b060151625cd57aa37c6de32a06f6e01cf31e222 Mon Sep 17 00:00:00 2001 From: Suleyman TURKMEN Date: Tue, 29 Mar 2022 10:39:07 +0300 Subject: [PATCH 48/84] add test (DISABLED_open_from_web) --- modules/videoio/test/test_ffmpeg.cpp | 16 +++++++-- modules/videoio/test/test_precomp.hpp | 2 -- modules/videoio/test/test_video_io.cpp | 45 ++------------------------ 3 files changed, 17 insertions(+), 46 deletions(-) diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 87e25bbd3d..40ef586fd6 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -235,8 +235,8 @@ static void generateFrame(Mat &frame, unsigned int i, const Point ¢er, const frame = Scalar::all(i % 255); stringstream buf(ios::out); buf << "frame #" << i; - putText(frame, buf.str(), Point(50, center.y), FONT_HERSHEY_SIMPLEX, 5.0, color, 5, CV_AA); - circle(frame, center, i + 2, color, 2, CV_AA); + putText(frame, buf.str(), Point(50, center.y), FONT_HERSHEY_SIMPLEX, 5.0, color, 5, LINE_AA); + circle(frame, center, i + 2, color, 2, LINE_AA); } TEST(videoio_ffmpeg, parallel) @@ -536,5 +536,17 @@ TEST(videoio_ffmpeg, create_with_property_badarg) EXPECT_FALSE(cap.isOpened()); } +// related issue: https://github.com/opencv/opencv/issues/16821 +TEST(videoio_ffmpeg, DISABLED_open_from_web) +{ + if (!videoio_registry::hasBackend(CAP_FFMPEG)) + throw SkipTestException("FFmpeg backend was not found"); + + string video_file = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"; + VideoCapture cap(video_file, CAP_FFMPEG); + int n_frames = -1; + EXPECT_NO_THROW(n_frames = (int)cap.get(CAP_PROP_FRAME_COUNT)); + EXPECT_EQ((int)14315, n_frames); +} }} // namespace diff --git a/modules/videoio/test/test_precomp.hpp b/modules/videoio/test/test_precomp.hpp index 5bc2ccdf95..cffdf2bef4 100644 --- a/modules/videoio/test/test_precomp.hpp +++ b/modules/videoio/test/test_precomp.hpp @@ -9,8 +9,6 @@ #include "opencv2/ts.hpp" #include "opencv2/videoio.hpp" #include "opencv2/videoio/registry.hpp" -#include "opencv2/imgproc/imgproc_c.h" - #include "opencv2/core/private.hpp" namespace cv { diff --git a/modules/videoio/test/test_video_io.cpp b/modules/videoio/test/test_video_io.cpp index 8b6b16e16e..2ed1267433 100644 --- a/modules/videoio/test/test_video_io.cpp +++ b/modules/videoio/test/test_video_io.cpp @@ -1,47 +1,8 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -#include "opencv2/videoio/videoio_c.h" namespace opencv_test { From 5440fd6cb43ea65a056c46b691fcdab1a425e92d Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Sat, 19 Mar 2022 20:06:50 +0300 Subject: [PATCH 49/84] videoio: initial FFmpeg 5.0 support --- modules/videoio/src/cap_ffmpeg_impl.hpp | 442 ++++++++++++++++-------- 1 file changed, 304 insertions(+), 138 deletions(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 43c555309b..91a0f710aa 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -41,6 +41,8 @@ //M*/ #include "cap_ffmpeg_legacy_api.hpp" +#include "opencv2/core/utils/logger.hpp" +#include "cap_interface.hpp" using namespace cv; @@ -49,6 +51,7 @@ using namespace cv; #endif #include #include +#include #ifndef __OPENCV_BUILD #define CV_FOURCC(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24)) @@ -79,6 +82,7 @@ extern "C" { #include #include +#include #if LIBAVUTIL_BUILD >= (LIBAVUTIL_VERSION_MICRO >= 100 \ ? CALC_FFMPEG_VERSION(51, 63, 100) : CALC_FFMPEG_VERSION(54, 6, 0)) @@ -88,6 +92,62 @@ extern "C" { #include #include +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L602-L605 +#if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(58, 9, 100) +# define CV_FFMPEG_REGISTER +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L654-L657 +#if LIBAVCODEC_BUILD < CALC_FFMPEG_VERSION(58, 9, 100) +# define CV_FFMPEG_LOCKMGR +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L390-L392 +#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(58, 87, 100) +#include +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L208-L210 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 0, 100) +# define CV_FFMPEG_FMT_CONST const +#else +# define CV_FFMPEG_FMT_CONST +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L623-L624 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 7, 100) +# define CV_FFMPEG_URL +#endif + +// AVStream.codec deprecated in favor of AVStream.codecpar +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L1039-L1040 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 16, 100) +//#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(57, 33, 100) +# define CV_FFMPEG_CODECPAR +# define CV_FFMPEG_CODEC_FIELD codecpar +#else +# define CV_FFMPEG_CODEC_FIELD codec +#endif + +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 16, 100) +# define CV_FFMPEG_PTS_FIELD pts +#else +# define CV_FFMPEG_PTS_FIELD pkt_pts +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L1757-L1758 +#if LIBAVUTIL_BUILD < CALC_FFMPEG_VERSION(52, 63, 100) +inline static AVRational av_make_q(int num, int den) +{ + AVRational res; + res.num = num; + res.den = den; + return res; +} +#endif + + + #ifdef __cplusplus } #endif @@ -471,6 +531,15 @@ static AVRational _opencv_ffmpeg_get_sample_aspect_ratio(AVStream *stream) #endif } +inline static std::string _opencv_ffmpeg_get_error_string(int error_code) +{ + char buf[255] = {0}; + const int err = av_strerror(error_code, buf, 254); + if (err == 0) + return std::string(buf); + else + return std::string("Unknown error"); +} struct CvCapture_FFMPEG { @@ -502,6 +571,7 @@ struct CvCapture_FFMPEG AVFormatContext * ic; AVCodec * avcodec; + AVCodecContext * context; int video_stream; AVStream * video_st; AVFrame * picture; @@ -565,6 +635,7 @@ void CvCapture_FFMPEG::init() img_convert_ctx = 0; avcodec = 0; + context = 0; frame_number = 0; eps_zero = 0.000025; @@ -617,10 +688,19 @@ void CvCapture_FFMPEG::close() if( video_st ) { - avcodec_close( video_st->codec ); +#ifdef CV_FFMPEG_CODECPAR + avcodec_close( context ); +#endif video_st = NULL; } + if (context) + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#endif + } + if( ic ) { avformat_close_input(&ic); @@ -798,8 +878,10 @@ private: }; #endif + static ImplMutex _mutex; +#ifdef CV_FFMPEG_LOCKMGR static int LockCallBack(void **mutex, AVLockOp op) { ImplMutex* localMutex = reinterpret_cast(*mutex); @@ -830,7 +912,7 @@ static int LockCallBack(void **mutex, AVLockOp op) } return 0; } - +#endif static void ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vargs) { @@ -881,19 +963,59 @@ public: { avformat_network_init(); +#ifdef CV_FFMPEG_REGISTER /* register all codecs, demux and protocols */ av_register_all(); +#endif +#ifdef CV_FFMPEG_LOCKMGR /* register a callback function for synchronization */ av_lockmgr_register(&LockCallBack); +#endif } ~InternalFFMpegRegister() { +#ifdef CV_FFMPEG_LOCKMGR av_lockmgr_register(NULL); +#endif av_log_set_callback(NULL); } }; +inline void fill_codec_context(AVCodecContext * enc, AVDictionary * dict) +{ +//#ifdef FF_API_THREAD_INIT +// avcodec_thread_init(enc, get_number_of_cpus()); +//#else + enc->thread_count = get_number_of_cpus(); +//#endif + + AVDictionaryEntry* avdiscard_entry = av_dict_get(dict, "avdiscard", NULL, 0); + + if (avdiscard_entry) + { + if(strcmp(avdiscard_entry->value, "all") == 0) + enc->skip_frame = AVDISCARD_ALL; + else if (strcmp(avdiscard_entry->value, "bidir") == 0) + enc->skip_frame = AVDISCARD_BIDIR; + else if (strcmp(avdiscard_entry->value, "default") == 0) + enc->skip_frame = AVDISCARD_DEFAULT; + else if (strcmp(avdiscard_entry->value, "none") == 0) + enc->skip_frame = AVDISCARD_NONE; + // NONINTRA flag was introduced with version bump at revision: + // https://github.com/FFmpeg/FFmpeg/commit/b152152df3b778d0a86dcda5d4f5d065b4175a7b + // This key is supported only for FFMPEG version +#if LIBAVCODEC_VERSION_MICRO >= 100 && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 67, 100) + else if (strcmp(avdiscard_entry->value, "nonintra") == 0) + enc->skip_frame = AVDISCARD_NONINTRA; +#endif + else if (strcmp(avdiscard_entry->value, "nonkey") == 0) + enc->skip_frame = AVDISCARD_NONKEY; + else if (strcmp(avdiscard_entry->value, "nonref") == 0) + enc->skip_frame = AVDISCARD_NONREF; + } +} + bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& params) { InternalFFMpegRegister::init(); @@ -997,7 +1119,7 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& #else av_dict_set(&dict, "rtsp_transport", "tcp", 0); #endif - AVInputFormat* input_format = NULL; + CV_FFMPEG_FMT_CONST AVInputFormat* input_format = NULL; AVDictionaryEntry* entry = av_dict_get(dict, "input_format", NULL, 0); if (entry != 0) { @@ -1015,60 +1137,44 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& err = avformat_find_stream_info(ic, NULL); if (err < 0) { - CV_WARN("Could not find codec parameters"); + CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")"); goto exit_func; } for(i = 0; i < ic->nb_streams; i++) { - AVCodecContext* enc = ic->streams[i]->codec; - -//#ifdef FF_API_THREAD_INIT -// avcodec_thread_init(enc, get_number_of_cpus()); -//#else - enc->thread_count = get_number_of_cpus(); -//#endif - - AVDictionaryEntry* avdiscard_entry = av_dict_get(dict, "avdiscard", NULL, 0); - - if (avdiscard_entry) { - if(strcmp(avdiscard_entry->value, "all") == 0) - enc->skip_frame = AVDISCARD_ALL; - else if (strcmp(avdiscard_entry->value, "bidir") == 0) - enc->skip_frame = AVDISCARD_BIDIR; - else if (strcmp(avdiscard_entry->value, "default") == 0) - enc->skip_frame = AVDISCARD_DEFAULT; - else if (strcmp(avdiscard_entry->value, "none") == 0) - enc->skip_frame = AVDISCARD_NONE; - // NONINTRA flag was introduced with version bump at revision: - // https://github.com/FFmpeg/FFmpeg/commit/b152152df3b778d0a86dcda5d4f5d065b4175a7b - // This key is supported only for FFMPEG version -#if LIBAVCODEC_VERSION_MICRO >= 100 && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 67, 100) - else if (strcmp(avdiscard_entry->value, "nonintra") == 0) - enc->skip_frame = AVDISCARD_NONINTRA; +#ifndef CV_FFMPEG_CODECPAR + context = ic->streams[i]->codec; + AVCodecID codec_id = context->codec_id; + AVMediaType codec_type = context->codec_type; +#else + AVCodecParameters* par = ic->streams[i]->codecpar; + AVCodecID codec_id = par->codec_id; + AVMediaType codec_type = par->codec_type; #endif - else if (strcmp(avdiscard_entry->value, "nonkey") == 0) - enc->skip_frame = AVDISCARD_NONKEY; - else if (strcmp(avdiscard_entry->value, "nonref") == 0) - enc->skip_frame = AVDISCARD_NONREF; - } - if( AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream < 0) + if( AVMEDIA_TYPE_VIDEO == codec_type && video_stream < 0) { - CV_LOG_DEBUG(NULL, "FFMPEG: stream[" << i << "] is video stream with codecID=" << (int)enc->codec_id - << " width=" << enc->width - << " height=" << enc->height + // backup encoder' width/height +#ifndef CV_FFMPEG_CODECPAR + int enc_width = context->width; + int enc_height = context->height; +#else + int enc_width = par->width; + int enc_height = par->height; +#endif + + CV_LOG_DEBUG(NULL, "FFMPEG: stream[" << i << "] is video stream with codecID=" << (int)codec_id + << " width=" << enc_width + << " height=" << enc_height ); - // backup encoder' width/height - int enc_width = enc->width; - int enc_height = enc->height; #if !USE_AV_HW_CODECS va_type = VIDEO_ACCELERATION_NONE; #endif // find and open decoder, try HW acceleration types specified in 'hw_acceleration' list (in order) - AVCodec *codec = NULL; + const AVCodec *codec = NULL; err = -1; #if USE_AV_HW_CODECS HWAccelIterator accel_iter(va_type, false/*isEncoder*/, dict); @@ -1080,21 +1186,27 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& #if USE_AV_HW_CODECS accel_iter.parse_next(); AVHWDeviceType hw_type = accel_iter.hw_type(); - enc->get_format = avcodec_default_get_format; - if (enc->hw_device_ctx) { - av_buffer_unref(&enc->hw_device_ctx); - } if (hw_type != AV_HWDEVICE_TYPE_NONE) { CV_LOG_DEBUG(NULL, "FFMPEG: trying to configure H/W acceleration: '" << accel_iter.hw_type_device_string() << "'"); AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; - codec = hw_find_codec(enc->codec_id, hw_type, av_codec_is_decoder, accel_iter.disabled_codecs().c_str(), &hw_pix_fmt); - if (codec) { + codec = hw_find_codec(codec_id, hw_type, av_codec_is_decoder, accel_iter.disabled_codecs().c_str(), &hw_pix_fmt); + if (codec) + { +#ifdef CV_FFMPEG_CODECPAR + context = avcodec_alloc_context3(codec); +#endif + CV_Assert(context); + context->get_format = avcodec_default_get_format; + if (context->hw_device_ctx) { + av_buffer_unref(&context->hw_device_ctx); + } if (hw_pix_fmt != AV_PIX_FMT_NONE) - enc->get_format = hw_get_format_callback; // set callback to select HW pixel format, not SW format - enc->hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0); - if (!enc->hw_device_ctx) + context->get_format = hw_get_format_callback; // set callback to select HW pixel format, not SW format + context->hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0); + if (!context->hw_device_ctx) { + context->get_format = avcodec_default_get_format; CV_LOG_DEBUG(NULL, "FFMPEG: ... can't create H/W device: '" << accel_iter.hw_type_device_string() << "'"); codec = NULL; } @@ -1106,10 +1218,10 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& AVDictionaryEntry* video_codec_param = av_dict_get(dict, "video_codec", NULL, 0); if (video_codec_param == NULL) { - codec = avcodec_find_decoder(enc->codec_id); + codec = avcodec_find_decoder(codec_id); if (!codec) { - CV_LOG_ERROR(NULL, "Could not find decoder for codec_id=" << (int)enc->codec_id); + CV_LOG_ERROR(NULL, "Could not find decoder for codec_id=" << (int)codec_id); } } else @@ -1121,10 +1233,26 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& CV_LOG_ERROR(NULL, "Could not find decoder '" << video_codec_param->value << "'"); } } + if (codec) + { +#ifdef CV_FFMPEG_CODECPAR + context = avcodec_alloc_context3(codec); +#endif + CV_Assert(context); + } } if (!codec) + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#endif continue; - err = avcodec_open2(enc, codec, NULL); + } + fill_codec_context(context, dict); +#ifdef CV_FFMPEG_CODECPAR + avcodec_parameters_to_context(context, par); +#endif + err = avcodec_open2(context, codec, NULL); if (err >= 0) { #if USE_AV_HW_CODECS va_type = hw_type_to_va_type(hw_type); @@ -1146,10 +1274,10 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& } // checking width/height (since decoder can sometimes alter it, eg. vp6f) - if (enc_width && (enc->width != enc_width)) - enc->width = enc_width; - if (enc_height && (enc->height != enc_height)) - enc->height = enc_height; + if (enc_width && (context->width != enc_width)) + context->width = enc_width; + if (enc_height && (context->height != enc_height)) + context->height = enc_height; video_stream = i; video_st = ic->streams[i]; @@ -1160,8 +1288,8 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& picture = avcodec_alloc_frame(); #endif - frame.width = enc->width; - frame.height = enc->height; + frame.width = context->width; + frame.height = context->height; frame.cn = 3; frame.step = 0; frame.data = NULL; @@ -1306,7 +1434,7 @@ bool CvCapture_FFMPEG::grabFrame() int count_errs = 0; const int max_number_of_attempts = 1 << 9; - if( !ic || !video_st ) return false; + if( !ic || !video_st || !context ) return false; if( ic->streams[video_stream]->nb_frames > 0 && frame_number > ic->streams[video_stream]->nb_frames ) @@ -1322,7 +1450,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_SEND_FRAME_API // check if we can receive frame from previously decoded packet - valid = avcodec_receive_frame(video_st->codec, picture) >= 0; + valid = avcodec_receive_frame(context, picture) >= 0; #endif // get the next frame @@ -1372,19 +1500,19 @@ bool CvCapture_FFMPEG::grabFrame() // Decode video frame #if USE_AV_SEND_FRAME_API - if (avcodec_send_packet(video_st->codec, &packet) < 0) { + if (avcodec_send_packet(context, &packet) < 0) { break; } - ret = avcodec_receive_frame(video_st->codec, picture); + ret = avcodec_receive_frame(context, picture); #else int got_picture = 0; - avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet); + avcodec_decode_video2(context, picture, &got_picture, &packet); ret = got_picture ? 0 : -1; #endif if (ret >= 0) { //picture_pts = picture->best_effort_timestamp; if( picture_pts == AV_NOPTS_VALUE_ ) - picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts; + picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; valid = true; } else if (ret == AVERROR(EAGAIN)) { @@ -1415,7 +1543,7 @@ bool CvCapture_FFMPEG::grabFrame() bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn) { - if (!video_st) + if (!video_st || !context) return false; if (rawMode || flag == extraDataIdx) @@ -1428,8 +1556,8 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, ret = p.data != NULL; } else if (flag == extraDataIdx) { - *data = ic->streams[video_stream]->codec->extradata; - *step = ic->streams[video_stream]->codec->extradata_size; + *data = ic->streams[video_stream]->CV_FFMPEG_CODEC_FIELD->extradata; + *step = ic->streams[video_stream]->CV_FFMPEG_CODEC_FIELD->extradata_size; } *width = *step; *height = 1; @@ -1454,13 +1582,13 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, return false; if( img_convert_ctx == NULL || - frame.width != video_st->codec->width || - frame.height != video_st->codec->height || + frame.width != video_st->CV_FFMPEG_CODEC_FIELD->width || + frame.height != video_st->CV_FFMPEG_CODEC_FIELD->height || frame.data == NULL ) { // Some sws_scale optimizations have some assumptions about alignment of data/step/width/height // Also we use coded_width/height to workaround problem with legacy ffmpeg versions (like n0.8) - int buffer_width = video_st->codec->coded_width, buffer_height = video_st->codec->coded_height; + int buffer_width = context->coded_width, buffer_height = context->coded_height; img_convert_ctx = sws_getCachedContext( img_convert_ctx, @@ -1494,8 +1622,8 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, _opencv_ffmpeg_av_image_fill_arrays(&rgb_picture, rgb_picture.data[0], AV_PIX_FMT_BGR24, buffer_width, buffer_height ); #endif - frame.width = video_st->codec->width; - frame.height = video_st->codec->height; + frame.width = video_st->CV_FFMPEG_CODEC_FIELD->width; + frame.height = video_st->CV_FFMPEG_CODEC_FIELD->height; frame.cn = 3; frame.data = rgb_picture.data[0]; frame.step = rgb_picture.linesize[0]; @@ -1505,7 +1633,7 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, img_convert_ctx, sw_picture->data, sw_picture->linesize, - 0, video_st->codec->coded_height, + 0, context->coded_height, rgb_picture.data, rgb_picture.linesize ); @@ -1529,12 +1657,12 @@ bool CvCapture_FFMPEG::retrieveHWFrame(cv::OutputArray output) { #if USE_AV_HW_CODECS // check that we have HW frame in GPU memory - if (!picture || !picture->hw_frames_ctx) { + if (!picture || !picture->hw_frames_ctx || !context) { return false; } // GPU color conversion NV12->BGRA, from GPU media buffer to GPU OpenCL buffer - return hw_copy_frame_to_umat(video_st->codec->hw_device_ctx, picture, output); + return hw_copy_frame_to_umat(context->hw_device_ctx, picture, output); #else CV_UNUSED(output); return false; @@ -1543,7 +1671,7 @@ bool CvCapture_FFMPEG::retrieveHWFrame(cv::OutputArray output) double CvCapture_FFMPEG::getProperty( int property_id ) const { - if( !video_st ) return 0; + if( !video_st || !context ) return 0; double codec_tag = 0; CV_CODEC_ID codec_id = AV_CODEC_ID_NONE; @@ -1570,8 +1698,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const case CAP_PROP_FPS: return get_fps(); case CAP_PROP_FOURCC: - codec_id = video_st->codec->codec_id; - codec_tag = (double) video_st->codec->codec_tag; + codec_id = video_st->CV_FFMPEG_CODEC_FIELD->codec_id; + codec_tag = (double) video_st->CV_FFMPEG_CODEC_FIELD->codec_tag; if(codec_tag || codec_id == AV_CODEC_ID_NONE) { @@ -1591,7 +1719,11 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den; case CAP_PROP_CODEC_PIXEL_FORMAT: { +#ifdef CV_FFMPEG_CODECPAR + AVPixelFormat pix_fmt = (AVPixelFormat)video_st->codecpar->format; +#else AVPixelFormat pix_fmt = video_st->codec->pix_fmt; +#endif unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt); return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag; } @@ -1671,7 +1803,7 @@ double CvCapture_FFMPEG::get_fps() const if (fps < eps_zero) { - fps = 1.0 / r2d(ic->streams[video_stream]->codec->time_base); + fps = 1.0 / r2d(ic->streams[video_stream]->time_base); } #endif return fps; @@ -1703,7 +1835,16 @@ double CvCapture_FFMPEG::dts_to_sec(int64_t dts) const void CvCapture_FFMPEG::get_rotation_angle() { rotation_angle = 0; -#if LIBAVUTIL_BUILD >= CALC_FFMPEG_VERSION(52, 94, 100) +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(57, 68, 100) + const uint8_t *data = 0; + data = av_stream_get_side_data(video_st, AV_PKT_DATA_DISPLAYMATRIX, NULL); + if (data) + { + rotation_angle = cvRound(av_display_rotation_get((const int32_t*)data)); + if (rotation_angle < 0) + rotation_angle += 360; + } +#elif LIBAVUTIL_BUILD >= CALC_FFMPEG_VERSION(52, 94, 100) AVDictionaryEntry *rotate_tag = av_dict_get(video_st->metadata, "rotate", NULL, 0); if (rotate_tag != NULL) rotation_angle = atoi(rotate_tag->value); @@ -1712,6 +1853,7 @@ void CvCapture_FFMPEG::get_rotation_angle() void CvCapture_FFMPEG::seek(int64_t _frame_number) { + CV_Assert(context); _frame_number = std::min(_frame_number, get_total_frames()); int delta = 16; @@ -1728,7 +1870,7 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) double time_base = r2d(ic->streams[video_stream]->time_base); time_stamp += (int64_t)(sec / time_base + 0.5); if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD); - avcodec_flush_buffers(ic->streams[video_stream]->codec); + avcodec_flush_buffers(context); if( _frame_number > 0 ) { grabFrame(); @@ -1833,7 +1975,7 @@ struct CvVideoWriter_FFMPEG void init(); - AVOutputFormat * fmt; + CV_FFMPEG_FMT_CONST AVOutputFormat * fmt; AVFormatContext * oc; uint8_t * outbuf; uint32_t outbuf_size; @@ -1842,6 +1984,7 @@ struct CvVideoWriter_FFMPEG AVFrame * input_picture; uint8_t * picbuf; AVStream * video_st; + AVCodecContext * context; AVPixelFormat input_pix_fmt; unsigned char * aligned_input; size_t aligned_input_size; @@ -1906,6 +2049,7 @@ void CvVideoWriter_FFMPEG::init() input_picture = 0; picbuf = 0; video_st = 0; + context = 0; input_pix_fmt = AV_PIX_FMT_NONE; aligned_input = NULL; aligned_input_size = 0; @@ -1957,23 +2101,32 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo } /* configure video stream */ -static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, +static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc, AVStream *st, const AVCodec* codec, int w, int h, int bitrate, - double fps, AVPixelFormat pixel_format) + double fps, AVPixelFormat pixel_format, int fourcc) { +#ifdef CV_FFMPEG_CODECPAR + AVCodecContext *c = avcodec_alloc_context3(codec); +#else AVCodecContext *c = st->codec; +#endif + CV_Assert(c); + int frame_rate, frame_rate_base; c->codec_id = codec->id; c->codec_type = AVMEDIA_TYPE_VIDEO; + c->codec_tag = fourcc; +#ifndef CV_FFMPEG_CODECPAR // Set per-codec defaults CV_CODEC_ID c_id = c->codec_id; avcodec_get_context_defaults3(c, codec); // avcodec_get_context_defaults3 erases codec_id for some reason c->codec_id = c_id; +#endif /* put sample parameters */ int64_t lbit_rate = (int64_t)bitrate; @@ -2016,7 +2169,12 @@ static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, } } if (best == NULL) - return false; + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&c); +#endif + return NULL; + } c->time_base.den= best->num; c->time_base.num= best->den; } @@ -2059,26 +2217,20 @@ static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, #endif } -#if defined(_MSC_VER) - AVRational avg_frame_rate = {frame_rate, frame_rate_base}; - st->avg_frame_rate = avg_frame_rate; -#else - st->avg_frame_rate = (AVRational){frame_rate, frame_rate_base}; -#endif + st->avg_frame_rate = av_make_q(frame_rate, frame_rate_base); #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(55, 20, 0) st->time_base = c->time_base; #endif - return true; + return c; } static const int OPENCV_NO_FRAMES_WRITTEN_CODE = 1000; -static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st, +static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st, AVCodecContext * c, uint8_t *, uint32_t, AVFrame * picture, int frame_idx) { - AVCodecContext* c = video_st->codec; int ret = OPENCV_NO_FRAMES_WRITTEN_CODE; #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0) @@ -2176,9 +2328,6 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width = frame_width; height = frame_height; - // typecast from opaque data type to implemented struct - AVCodecContext* c = video_st->codec; - // FFmpeg contains SIMD optimizations which can sometimes read data past // the supplied input buffer. // Related info: https://trac.ffmpeg.org/ticket/6763 @@ -2215,10 +2364,10 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int step = aligned_step; } - AVPixelFormat sw_pix_fmt = c->pix_fmt; + AVPixelFormat sw_pix_fmt = context->pix_fmt; #if USE_AV_HW_CODECS - if (c->hw_frames_ctx) - sw_pix_fmt = ((AVHWFramesContext*)c->hw_frames_ctx->data)->sw_format; + if (context->hw_frames_ctx) + sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format; #endif if ( sw_pix_fmt != input_pix_fmt ) { CV_Assert( input_picture ); @@ -2232,8 +2381,8 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int img_convert_ctx = sws_getContext(width, height, (AVPixelFormat)input_pix_fmt, - c->width, - c->height, + context->width, + context->height, sw_pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); @@ -2255,14 +2404,14 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int bool ret; #if USE_AV_HW_CODECS - if (video_st->codec->hw_device_ctx) { + if (context->hw_device_ctx) { // copy data to HW frame AVFrame* hw_frame = av_frame_alloc(); if (!hw_frame) { CV_LOG_ERROR(NULL, "Error allocating AVFrame (av_frame_alloc)"); return false; } - if (av_hwframe_get_buffer(video_st->codec->hw_frames_ctx, hw_frame, 0) < 0) { + if (av_hwframe_get_buffer(context->hw_frames_ctx, hw_frame, 0) < 0) { CV_LOG_ERROR(NULL, "Error obtaining HW frame (av_hwframe_get_buffer)"); av_frame_free(&hw_frame); return false; @@ -2273,14 +2422,14 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int return false; } hw_frame->pts = frame_idx; - int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, outbuf, outbuf_size, hw_frame, frame_idx); + int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, context, outbuf, outbuf_size, hw_frame, frame_idx); ret = ret_write >= 0 ? true : false; av_frame_free(&hw_frame); } else #endif { picture->pts = frame_idx; - int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, outbuf, outbuf_size, picture, frame_idx); + int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, context, outbuf, outbuf_size, picture, frame_idx); ret = ret_write >= 0 ? true : false; } @@ -2291,7 +2440,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int bool CvVideoWriter_FFMPEG::writeHWFrame(cv::InputArray input) { #if USE_AV_HW_CODECS - if (!video_st->codec->hw_frames_ctx) + if (!video_st || !context || !context->hw_frames_ctx || !context->hw_device_ctx) return false; // Get hardware frame from frame pool @@ -2299,20 +2448,20 @@ bool CvVideoWriter_FFMPEG::writeHWFrame(cv::InputArray input) { if (!hw_frame) { return false; } - if (av_hwframe_get_buffer(video_st->codec->hw_frames_ctx, hw_frame, 0) < 0) { + if (av_hwframe_get_buffer(context->hw_frames_ctx, hw_frame, 0) < 0) { av_frame_free(&hw_frame); return false; } // GPU to GPU copy - if (!hw_copy_umat_to_frame(video_st->codec->hw_device_ctx, input, hw_frame)) { + if (!hw_copy_umat_to_frame(context->hw_device_ctx, input, hw_frame)) { av_frame_free(&hw_frame); return false; } // encode hw_frame->pts = frame_idx; - icv_av_write_frame_FFMPEG( oc, video_st, outbuf, outbuf_size, hw_frame, frame_idx); + icv_av_write_frame_FFMPEG( oc, video_st, context, outbuf, outbuf_size, hw_frame, frame_idx); frame_idx++; av_frame_free(&hw_frame); @@ -2365,7 +2514,7 @@ void CvVideoWriter_FFMPEG::close() { for(;;) { - int ret = icv_av_write_frame_FFMPEG( oc, video_st, outbuf, outbuf_size, NULL, frame_idx); + int ret = icv_av_write_frame_FFMPEG( oc, video_st, context, outbuf, outbuf_size, NULL, frame_idx); if( ret == OPENCV_NO_FRAMES_WRITTEN_CODE || ret < 0 ) break; } @@ -2380,7 +2529,7 @@ void CvVideoWriter_FFMPEG::close() } // free pictures - if( video_st->codec->pix_fmt != input_pix_fmt) + if( context->pix_fmt != input_pix_fmt) { if(picture->data[0]) free(picture->data[0]); @@ -2392,7 +2541,7 @@ void CvVideoWriter_FFMPEG::close() av_free(input_picture); /* close codec */ - avcodec_close(video_st->codec); + avcodec_close(context); av_free(outbuf); @@ -2599,8 +2748,15 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, /* set file name */ oc->oformat = fmt; +#ifndef CV_FFMPEG_URL snprintf(oc->filename, sizeof(oc->filename), "%s", filename); - +#else + size_t name_len = strlen(filename); + oc->url = (char*)av_malloc(name_len + 1); + CV_Assert(oc->url); + memcpy((void*)oc->url, filename, name_len + 1); + oc->url[name_len] = '\0'; +#endif /* set some options */ oc->max_delay = (int)(0.7*AV_TIME_BASE); /* This reduces buffer underrun warnings with MPEG */ @@ -2715,7 +2871,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, double bitrate = std::min(bitrate_scale*fps*width*height, (double)INT_MAX/2); if (codec_id == AV_CODEC_ID_NONE) { - codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, AVMEDIA_TYPE_VIDEO); + codec_id = av_guess_codec(oc->oformat, NULL, filename, NULL, AVMEDIA_TYPE_VIDEO); } // Add video stream to output file @@ -2733,11 +2889,9 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, } #endif - AVCodecContext *c = video_st->codec; - // find and open encoder, try HW acceleration types specified in 'hw_acceleration' list (in order) int err = -1; - AVCodec* codec = NULL; + const AVCodec* codec = NULL; #if USE_AV_HW_CODECS AVBufferRef* hw_device_ctx = NULL; HWAccelIterator accel_iter(va_type, true/*isEncoder*/, dict); @@ -2780,9 +2934,17 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, AVPixelFormat format = codec_pix_fmt; #endif - if (!icv_configure_video_stream_FFMPEG(oc, video_st, codec, - width, height, (int) (bitrate + 0.5), - fps, format)) { +#ifdef CV_FFMPEG_CODECPAR + if (context) + { + avcodec_free_context(&context); + } +#endif + context = icv_configure_video_stream_FFMPEG(oc, video_st, codec, + width, height, (int) (bitrate + 0.5), + fps, format, fourcc); + if (!context) + { continue; } @@ -2794,27 +2956,25 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, #endif #endif - c->codec_tag = fourcc; - #if USE_AV_HW_CODECS if (hw_device_ctx) { - c->hw_device_ctx = av_buffer_ref(hw_device_ctx); + context->hw_device_ctx = av_buffer_ref(hw_device_ctx); if (hw_format != AV_PIX_FMT_NONE) { - c->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format); - if (!c->hw_frames_ctx) + context->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format); + if (!context->hw_frames_ctx) continue; } } #endif - int64_t lbit_rate = (int64_t) c->bit_rate; + int64_t lbit_rate = (int64_t) context->bit_rate; lbit_rate += (int64_t)(bitrate / 2); lbit_rate = std::min(lbit_rate, (int64_t) INT_MAX); - c->bit_rate_tolerance = (int) lbit_rate; - c->bit_rate = (int) lbit_rate; + context->bit_rate_tolerance = (int) lbit_rate; + context->bit_rate = (int) lbit_rate; /* open the codec */ - err = avcodec_open2(c, codec, NULL); + err = avcodec_open2(context, codec, NULL); if (err >= 0) { #if USE_AV_HW_CODECS va_type = hw_type_to_va_type(hw_type); @@ -2823,7 +2983,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, #endif break; } else { - CV_LOG_ERROR(NULL, "Could not open codec " << codec->name << ", error: " << icvFFMPEGErrStr(err)); + CV_LOG_ERROR(NULL, "Could not open codec " << codec->name << ", error: " << icvFFMPEGErrStr(err) << " (" << err << ")"); } #if USE_AV_HW_CODECS } // while (accel_iter.good()) @@ -2844,6 +3004,12 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, return false; } +#ifdef CV_FFMPEG_CODECPAR + // Copy all to codecpar... + // !!! https://stackoverflow.com/questions/15897849/c-ffmpeg-not-writing-avcc-box-information + avcodec_parameters_from_context(video_st->codecpar, context); +#endif + outbuf = NULL; @@ -2858,16 +3024,16 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, } bool need_color_convert; - AVPixelFormat sw_pix_fmt = c->pix_fmt; + AVPixelFormat sw_pix_fmt = context->pix_fmt; #if USE_AV_HW_CODECS - if (c->hw_frames_ctx) - sw_pix_fmt = ((AVHWFramesContext*)c->hw_frames_ctx->data)->sw_format; + if (context->hw_frames_ctx) + sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format; #endif need_color_convert = (sw_pix_fmt != input_pix_fmt); /* allocate the encoded raw picture */ - picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, c->width, c->height, need_color_convert); + picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, context->width, context->height, need_color_convert); if (!picture) { return false; } @@ -2877,7 +3043,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, to the required output format */ input_picture = NULL; if ( need_color_convert ) { - input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, c->width, c->height, false); + input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, context->width, context->height, false); if (!input_picture) { return false; } From b687bc807a34b70f630932b3d0cc14b357b4eaf2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 30 Mar 2022 10:47:59 +0000 Subject: [PATCH 50/84] dnn(test): update OpenVINO tests 2021.4.2 --- modules/dnn/test/test_onnx_importer.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index dda479c4fa..b3cbcebf21 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -787,10 +787,30 @@ TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM_cell) +TEST_P(Test_ONNX_layers, LSTM_cell_forward) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("lstm_cell_forward", npy, 0, 0, false, false); +} +TEST_P(Test_ONNX_layers, LSTM_cell_bidirectional) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("lstm_cell_bidirectional", npy, 0, 0, false, false); +} +TEST_P(Test_ONNX_layers, LSTM_cell_with_peepholes) +{ testONNXModels("lstm_cell_with_peepholes", npy, 0, 0, false, false); } From 6f5cf8c15f1b760f00a070d994211c3c3e65bc2a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 30 Mar 2022 11:06:18 +0000 Subject: [PATCH 51/84] dnn: fix ReduceLayer implementation, update OpenVINO tests --- modules/dnn/src/layers/reduce_layer.cpp | 1 + ...conformance_layer_filter__openvino.inl.hpp | 75 ++++++++++++++++--- modules/dnn/test/test_onnx_importer.cpp | 33 +++++++- 3 files changed, 96 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/layers/reduce_layer.cpp b/modules/dnn/src/layers/reduce_layer.cpp index 62bb65f897..47aec237c7 100644 --- a/modules/dnn/src/layers/reduce_layer.cpp +++ b/modules/dnn/src/layers/reduce_layer.cpp @@ -25,6 +25,7 @@ class ReduceLayerImpl CV_FINAL : public ReduceLayer public: ReduceLayerImpl(const LayerParams& params) { + setParamsFrom(params); // set reduce type CV_Assert(params.has("reduce")); String typeString = toLowerCase(params.get("reduce")); diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index bdd8f3b8b9..284dfb75fa 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -1267,7 +1267,10 @@ CASE(test_reduce_l1_negative_axes_keep_dims_example) CASE(test_reduce_l1_negative_axes_keep_dims_random) // no filter CASE(test_reduce_l2_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00490189 vs 0.004) +#endif CASE(test_reduce_l2_default_axes_keepdims_random) // no filter CASE(test_reduce_l2_do_not_keepdims_example) @@ -1291,7 +1294,10 @@ CASE(test_reduce_log_sum_default) CASE(test_reduce_log_sum_desc_axes) // no filter CASE(test_reduce_log_sum_exp_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00671387 vs 0.004 +#endif CASE(test_reduce_log_sum_exp_default_axes_keepdims_random) // no filter CASE(test_reduce_log_sum_exp_do_not_keepdims_example) @@ -1357,21 +1363,47 @@ CASE(test_reduce_min_negative_axes_keepdims_example) CASE(test_reduce_min_negative_axes_keepdims_random) // no filter CASE(test_reduce_prod_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + SKIP_MYRIAD; // accuracy (Expected: (normL1) <= (l1), actual: inf vs 0.004) +#endif CASE(test_reduce_prod_default_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 5; // Expected: (normL1) <= (l1), actual: 2.66211 vs 0.004 |ref| = 24621.337890625 + default_lInf = 5; // Expected: (normInf) <= (lInf), actual: 2.66211 vs 0.02 |ref| = 24621.337890625 + } +#endif CASE(test_reduce_prod_do_not_keepdims_example) // no filter CASE(test_reduce_prod_do_not_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_prod_keepdims_example) // no filter CASE(test_reduce_prod_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_prod_negative_axes_keepdims_example) // no filter CASE(test_reduce_prod_negative_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_sum_default_axes_keepdims_example) // no filter CASE(test_reduce_sum_default_axes_keepdims_random) @@ -1395,19 +1427,40 @@ CASE(test_reduce_sum_negative_axes_keepdims_random) CASE(test_reduce_sum_square_default_axes_keepdims_example) // no filter CASE(test_reduce_sum_square_default_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.0183411 vs 0.004 +#endif CASE(test_reduce_sum_square_do_not_keepdims_example) // no filter CASE(test_reduce_sum_square_do_not_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reduce_sum_square_keepdims_example) // no filter CASE(test_reduce_sum_square_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reduce_sum_square_negative_axes_keepdims_example) // no filter CASE(test_reduce_sum_square_negative_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reflect_pad) // no filter CASE(test_relu) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 4918c72d10..941fca85fd 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -358,7 +358,18 @@ TEST_P(Test_ONNX_layers, ReduceSum) TEST_P(Test_ONNX_layers, ReduceMax) { testONNXModels("reduce_max"); +} +TEST_P(Test_ONNX_layers, ReduceMax_axis_0) +{ testONNXModels("reduce_max_axis_0"); +} +TEST_P(Test_ONNX_layers, ReduceMax_axis_1) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: !out.networkInputs.empty() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("reduce_max_axis_1"); } @@ -378,10 +389,28 @@ TEST_P(Test_ONNX_layers, ArgLayer) TEST_P(Test_ONNX_layers, Scale) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name ReduceMean_0 has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("scale"); +} + +TEST_P(Test_ONNX_layers, Scale_broadcast) +{ testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3); +} + +TEST_P(Test_ONNX_layers, Scale_broadcast_mid) +{ testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2); } From 3e4a566e4692c3b2783248aa34a28548e78b1933 Mon Sep 17 00:00:00 2001 From: HAN Liutong Date: Thu, 31 Mar 2022 04:04:34 +0800 Subject: [PATCH 52/84] Merge pull request #21351 from hanliutong:rvv-clang * Update universal intrinsics of RVV back-end. * Use array instead of malloc. --- .../include/opencv2/core/hal/intrin_rvv.hpp | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index fe6c077639..a592976827 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -230,6 +230,7 @@ inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src, size_t vl) //////////// Types //////////// +#ifndef __clang__ struct v_uint8x16 { typedef uchar lane_type; @@ -531,7 +532,358 @@ struct v_float64x2 double val[2]; }; #endif +#else +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + *pval = v; + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_u8m1(v, nlanes); + } + operator vuint8m1_t() const + { + return *pval; + } + uchar get0() const + { + return vmv_x(*pval); + } + inline v_uint8x16& operator=(const v_uint8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint8x16(const v_uint8x16& vec) { + *pval = *(vec.pval); + } + uchar val[16]; + vuint8m1_t* pval = (vuint8m1_t*)val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + *pval = v; + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_i8m1(v, nlanes); + } + operator vint8m1_t() const + { + return *pval; + } + schar get0() const + { + return vmv_x(*pval); + } + inline v_int8x16& operator=(const v_int8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int8x16(const v_int8x16& vec) { + *pval = *(vec.pval); + } + schar val[16]; + vint8m1_t* pval = (vint8m1_t*)val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + *pval = v; + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_u16m1(v, nlanes); + } + operator vuint16m1_t() const + { + return *pval; + } + ushort get0() const + { + return vmv_x(*pval); + } + + inline v_uint16x8& operator=(const v_uint16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint16x8(const v_uint16x8& vec) { + *pval = *(vec.pval); + } + ushort val[8]; + vuint16m1_t* pval = (vuint16m1_t*)val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + *pval = v; + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_i16m1(v, nlanes); + } + operator vint16m1_t() const + { + return *pval; + } + short get0() const + { + return vmv_x(*pval); + } + + inline v_int16x8& operator=(const v_int16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int16x8(const v_int16x8& vec) { + *pval = *(vec.pval); + } + short val[8]; + vint16m1_t* pval = (vint16m1_t*)val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + *pval = v; + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + *pval = vle32_v_u32m1(v, nlanes); + } + operator vuint32m1_t() const + { + return *pval; + } + unsigned get0() const + { + return vmv_x(*pval); + } + + inline v_uint32x4& operator=(const v_uint32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint32x4(const v_uint32x4& vec) { + *pval = *(vec.pval); + } + unsigned val[4]; + vuint32m1_t* pval = (vuint32m1_t*)val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + *pval = v; + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + *pval = vle32_v_i32m1(v, nlanes); + } + operator vint32m1_t() const + { + return *pval; + } + int get0() const + { + return vmv_x(*pval); + } + + inline v_int32x4& operator=(const v_int32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int32x4(const v_int32x4& vec) { + *pval = *(vec.pval); + } + int val[4]; + vint32m1_t* pval = (vint32m1_t*)val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + *pval = v; + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + *pval = vle32_v_f32m1(v, nlanes); + } + operator vfloat32m1_t() const + { + return *pval; + } + float get0() const + { + return vfmv_f(*pval); + } + inline v_float32x4& operator=(const v_float32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float32x4(const v_float32x4& vec) { + *pval = *(vec.pval); + } + float val[4]; + vfloat32m1_t* pval = (vfloat32m1_t*)val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + *pval = v; + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + *pval = vle64_v_u64m1(v, nlanes); + } + operator vuint64m1_t() const + { + return *pval; + } + uint64 get0() const + { + return vmv_x(*pval); + } + + inline v_uint64x2& operator=(const v_uint64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint64x2(const v_uint64x2& vec) { + *pval = *(vec.pval); + } + uint64 val[2]; + vuint64m1_t* pval = (vuint64m1_t*)val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + *pval = v; + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + *pval = vle64_v_i64m1(v, nlanes); + } + operator vint64m1_t() const + { + return *pval; + } + int64 get0() const + { + return vmv_x(*pval); + } + + inline v_int64x2& operator=(const v_int64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int64x2(const v_int64x2& vec) { + *pval = *(vec.pval); + } + int64 val[2]; + vint64m1_t* pval = (vint64m1_t*)val; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + *pval = v; + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + *pval = vle64_v_f64m1(v, nlanes); + } + operator vfloat64m1_t() const + { + return *pval; + } + double get0() const + { + return vfmv_f(*pval); + } + + inline v_float64x2& operator=(const v_float64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float64x2(const v_float64x2& vec) { + *pval = *(vec.pval); + } + double val[2]; + vfloat64m1_t* pval = (vfloat64m1_t*)val; +}; +#endif // CV_SIMD128_64F +#endif // __clang__ //////////// Initial //////////// @@ -1819,6 +2171,7 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a) } #if CV_SIMD128_64F +#ifndef __clang__ inline v_float32x4 v_cvt_f32(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; @@ -1832,6 +2185,18 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); } +#else +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} +#endif inline v_float64x2 v_cvt_f64(const v_int32x4& a) { @@ -2351,6 +2716,7 @@ OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) //////////// SignMask //////////// +#ifndef __clang__ #define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, vl, shift) \ inline int v_signmask(const _Tpvec& a) \ { \ @@ -2381,6 +2747,36 @@ inline int v_signmask(const v_float64x2& a) { return v_signmask(v_reinterpret_as_u64(a)); } #endif +#else +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, width, vl) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + uint8_t ans[16] = {0};\ + vsm(ans, vmslt(a, 0, vl), vl);\ + return reinterpret_cast(ans)[0];\ +} + +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8x16, 8, 16) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16x8, 16, 8) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32x4, 32, 4) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64x2, 64, 2) + +inline int v_signmask(const v_uint8x16& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#endif + +#endif //////////// Scan forward //////////// @@ -2520,6 +2916,7 @@ inline v_int32x4 v_trunc(const v_float32x4& a) return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4)); } #if CV_SIMD128_64F +#ifndef __clang__ inline v_int32x4 v_round(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; @@ -2554,6 +2951,42 @@ inline v_int32x4 v_trunc(const v_float64x2& a) vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4)); } + +#else +inline v_int32x4 v_round(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfsub_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfadd_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +#endif #endif From abebbf04b1e16847ee950bbbee99334d48116fde Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Wed, 30 Mar 2022 15:26:29 +0300 Subject: [PATCH 53/84] Add CUDA support for LSTM. Co-authored-by: Julia Bareeva --- modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp | 45 ++++ .../dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp | 195 ++++++++++++++++++ modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp | 85 ++++++++ .../cuda4dnn/primitives/recurrent_cells.hpp | 97 +++++++++ modules/dnn/src/layers/recurrent_layers.cpp | 113 +++++++--- modules/dnn/src/onnx/onnx_importer.cpp | 38 ++-- 6 files changed, 524 insertions(+), 49 deletions(-) create mode 100644 modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp create mode 100644 modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp index 2370492ad5..9bd8fcfe3b 100644 --- a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp @@ -287,6 +287,51 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu cudnnTensorDescriptor_t descriptor; }; + /** An array of number fully packed tensor descriptors + * + * @tparam T type of elements in the tensor + */ + template + class TensorDescriptorsArray + { + public: + TensorDescriptorsArray() noexcept = default; + TensorDescriptorsArray(const TensorDescriptorsArray&) = delete; + TensorDescriptorsArray(TensorDescriptorsArray&& other) noexcept + : descriptors{std::move(other.descriptors)} {} + + TensorDescriptorsArray(int seqLength, std::array dims) + { + for (int i = 0; i < seqLength; ++i) + { + descriptors.emplace_back(dims); + } + } + + ~TensorDescriptorsArray() noexcept = default; + + TensorDescriptorsArray& operator=(const TensorDescriptorsArray&) = delete; + TensorDescriptorsArray& operator=(TensorDescriptorsArray&& other) noexcept + { + descriptors = std::move(other.descriptors); + return *this; + }; + + std::vector get() const noexcept + { + std::vector descPtrs; + descPtrs.reserve(descriptors.size()); + for (auto& desc : descriptors) + { + descPtrs.push_back(desc.get()); + } + return descPtrs; + } + + private: + std::vector> descriptors; + }; + }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */ #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_HPP */ diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp new file mode 100644 index 0000000000..7ba6acdf17 --- /dev/null +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp @@ -0,0 +1,195 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP +#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP + +#include "cudnn.hpp" +#include + + +namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn { + +/** + */ +class DropoutDescriptor +{ +public: + DropoutDescriptor() noexcept = default; + DropoutDescriptor(const DropoutDescriptor &) = delete; + DropoutDescriptor(DropoutDescriptor &&other) noexcept : descriptor{other.descriptor} + { + states = std::move(other.states); + other.descriptor = nullptr; + } + + /** + */ + DropoutDescriptor(const Handle &handle, float dropout) + { + CUDA4DNN_CHECK_CUDNN(cudnnCreateDropoutDescriptor(&descriptor)); + + // we need additional memory for dropout descriptor + size_t stateSize; + CUDA4DNN_CHECK_CUDNN(cudnnDropoutGetStatesSize(handle.get(), &stateSize)); + states.reset(stateSize); + + try + { + auto seed = 1234ull; // Pick a seed. + CUDA4DNN_CHECK_CUDNN(cudnnSetDropoutDescriptor(descriptor, handle.get(), dropout, + states.get().get(), stateSize, seed)); + } + catch (...) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor)); + throw; + } + } + + ~DropoutDescriptor() noexcept + { + if (descriptor) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor)); + } + } + + DropoutDescriptor &operator=(const DropoutDescriptor &) = delete; + DropoutDescriptor &operator=(DropoutDescriptor &&other) noexcept + { + descriptor = other.descriptor; + states = std::move(other.states); + other.descriptor = nullptr; + return *this; + }; + + cudnnDropoutDescriptor_t get() const noexcept { return descriptor; } + +private: + cudnnDropoutDescriptor_t descriptor{nullptr}; + + using value_type = typename ManagedPtr::element_type; + ManagedPtr states; +}; + +/** + */ +template +class RNNDescriptor +{ +public: + enum class RNNMode + { + RNN_RELU, + RNN_TANH, + LSTM, + GRU + }; + + RNNDescriptor() noexcept = default; + RNNDescriptor(const RNNDescriptor &) = delete; + RNNDescriptor(RNNDescriptor &&other) noexcept : descriptor{other.descriptor} + { + other.descriptor = nullptr; + } + + /** + */ + RNNDescriptor(const Handle &handle, RNNMode mode, int hidden_size, int num_layers, + bool bidirectional, const DropoutDescriptor &dropoutDesc) + { + CUDA4DNN_CHECK_CUDNN(cudnnCreateRNNDescriptor(&descriptor)); + const auto rnn_mode = [mode] { + switch (mode) + { + case RNNMode::RNN_RELU: + return CUDNN_RNN_RELU; + case RNNMode::RNN_TANH: + return CUDNN_RNN_TANH; + case RNNMode::LSTM: + return CUDNN_LSTM; + case RNNMode::GRU: + return CUDNN_GRU; + default: + return CUDNN_LSTM; + } + }(); + + try + { + CUDA4DNN_CHECK_CUDNN(cudnnSetRNNDescriptor_v6( + handle.get(), descriptor, hidden_size, num_layers, dropoutDesc.get(), + CUDNN_LINEAR_INPUT, bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, + rnn_mode, + algo, //CUDNN_RNN_ALGO_STANDARD, + detail::get_data_type())); + } + catch (...) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor)); + throw; + } + } + + ~RNNDescriptor() noexcept + { + if (descriptor) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor)); + } + } + + RNNDescriptor &operator=(const RNNDescriptor &) = delete; + RNNDescriptor &operator=(RNNDescriptor &&other) noexcept + { + descriptor = other.descriptor; + other.descriptor = nullptr; + return *this; + }; + + cudnnRNNDescriptor_t get() const noexcept { return descriptor; } + +private: + cudnnRNNDescriptor_t descriptor{nullptr}; + cudnnRNNMode_t mode{CUDNN_LSTM}; + // support only one algo for a while + cudnnRNNAlgo_t algo{CUDNN_RNN_ALGO_STANDARD}; +}; + +template +size_t getRNNWorkspaceSize(const Handle &handle, const RNNDescriptor &rnnDesc, + const int seqLength, const TensorDescriptorsArray &inputDesc) +{ + size_t workSize; + CUDA4DNN_CHECK_CUDNN(cudnnGetRNNWorkspaceSize(handle.get(), rnnDesc.get(), seqLength, + inputDesc.get().data(), &workSize)); + return workSize; +} + +template +void LSTMForward(const Handle &handle, const RNNDescriptor &rnnDesc, + const FilterDescriptor &filterDesc, DevicePtr filterPtr, + const TensorDescriptorsArray &inputDesc, DevicePtr inputPtr, + const TensorDescriptor &initialHDesc, DevicePtr initialH, + const TensorDescriptor &initialCDesc, DevicePtr initialC, + const int seqLength, const TensorDescriptorsArray &outputDesc, + DevicePtr yOutputPtr, DevicePtr ycOutputPtr, WorkspaceInstance workspace) +{ + CV_Assert(handle); + + CUDA4DNN_CHECK_CUDNN(cudnnRNNForwardInference(handle.get(), rnnDesc.get(), seqLength, + inputDesc.get().data(), inputPtr.get(), // input sequence + initialHDesc.get(), initialH.get(), + initialCDesc.get(), initialC.get(), // hidden + filterDesc.get(), filterPtr.get(), // weights + outputDesc.get().data(), yOutputPtr.get(), // output + nullptr, nullptr, + initialCDesc.get(), ycOutputPtr.get(), + static_cast(workspace.get()), workspace.size_in_bytes())); +} + +}}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */ + +#endif //OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP \ No newline at end of file diff --git a/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp b/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp index 4ee0e8ab77..27f8306bf3 100644 --- a/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp +++ b/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp @@ -18,6 +18,7 @@ #include "cudnn/softmax.hpp" #include "cudnn/transform.hpp" #include "cudnn/transpose_convolution.hpp" +#include "cudnn/recurrent.hpp" #include @@ -472,6 +473,90 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { TensorTransformDescriptor transDesc; }; + template + class LSTM + { + using TensorDescriptor = cudnn::TensorDescriptor; + using DropoutDescriptor = cudnn::DropoutDescriptor; + using RNNDescriptor = cudnn::RNNDescriptor; + using FilterDescriptor = cudnn::FilterDescriptor; + using TensorDescriptorsArray = cudnn::TensorDescriptorsArray; + + public: + using RNNMode = typename RNNDescriptor::RNNMode; + + struct params_type + { + std::vector weights_shape; + + int seqLength; + int numLayers; + int hiddenSize; + int inputSize; + int miniBatch; + bool bidirectional; + + float dropout; + RNNMode type; + }; + + LSTM() = default; + LSTM(const LSTM&) = delete; + LSTM(LSTM&&) = default; + LSTM(cudnn::Handle handle, const params_type& params) + : cudnnHandle(std::move(handle)), seqLength{params.seqLength}, + inputDesc(seqLength, {params.miniBatch, params.inputSize, 1}), + outputDesc(seqLength, + {params.miniBatch, + params.bidirectional ? params.hiddenSize * 2 : params.hiddenSize, + 1}) + { + dropoutDesc = DropoutDescriptor(cudnnHandle, params.dropout); + filterDesc = FilterDescriptor(params.weights_shape); + rnnDesc = RNNDescriptor(cudnnHandle, params.type, params.hiddenSize, + params.numLayers, params.bidirectional, dropoutDesc); + + int num_direction = params.bidirectional ? 2 : 1; + h0TensorDesc = TensorDescriptor( + {num_direction, params.miniBatch, params.hiddenSize}); + c0TensorDesc = TensorDescriptor( + {num_direction, params.miniBatch, params.hiddenSize}); + + // Get amount of work space required to execute the RNN described by rnnDesc + // with input dimensions defined by inputDesc + csl::WorkspaceBuilder builder; + builder.require(cudnn::getRNNWorkspaceSize(cudnnHandle, rnnDesc, seqLength, inputDesc)); + scratch_mem_in_bytes = builder.required_workspace_size(); + } + + LSTM& operator=(const LSTM&) = delete; + LSTM& operator=(LSTM&&) = default; + + void inference(TensorView input, TensorSpan y_output, TensorSpan yc_output, TensorView filters, + TensorView h0, TensorView c0, WorkspaceInstance workspace) + { + cudnn::LSTMForward(cudnnHandle, rnnDesc, filterDesc, filters.get(), inputDesc, + input.get(), h0TensorDesc, h0.get(), c0TensorDesc, c0.get(), + seqLength, outputDesc, y_output.get(), yc_output.get(), workspace); + } + + std::size_t get_workspace_memory_in_bytes() const noexcept { return scratch_mem_in_bytes; } + + private: + cudnn::Handle cudnnHandle; + std::size_t scratch_mem_in_bytes{0}; + int seqLength; + + RNNDescriptor rnnDesc; + DropoutDescriptor dropoutDesc; + + FilterDescriptor filterDesc; + TensorDescriptor h0TensorDesc, c0TensorDesc; + + TensorDescriptorsArray inputDesc; + TensorDescriptorsArray outputDesc; + }; + }}}} /* namespace cv::dnn::cuda4dnn::csl */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_OPS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp b/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp new file mode 100644 index 0000000000..5cba788008 --- /dev/null +++ b/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp @@ -0,0 +1,97 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP +#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP + +#include "../../op_cuda.hpp" + +#include "../csl/cudnn.hpp" +#include "../csl/tensor_ops.hpp" +#include "../csl/cudnn/recurrent.hpp" + +namespace cv { namespace dnn { namespace cuda4dnn { + +struct RNNConfiguration +{ + int seqLength; + int numLayers; + int hiddenSize; + int inputSize; + int miniBatch; + bool bidirectional; +}; + +template +class LSTMOp final : public CUDABackendNode +{ +public: + using wrapper_type = GetCUDABackendWrapperType; + + LSTMOp(csl::Stream stream_, csl::cudnn::Handle handle, const Mat& filters, const Mat& h0, + const Mat& c0, const RNNConfiguration& config) + : stream(std::move(stream_)) + { + typename csl::LSTM::params_type params{ + {filters.total(), 1, 1}, // reshape + config.seqLength, + config.numLayers, + config.hiddenSize, + config.inputSize, + config.miniBatch, + config.bidirectional, + 0.0, /* dropout */ + csl::cudnn::RNNDescriptor::RNNMode::LSTM + }; + + lstm = csl::LSTM(handle, params); + auto correct_shape_filters = filters.reshape(1, {static_cast(filters.total()), 1, 1}); + filtersTensor = csl::makeTensorHeader(correct_shape_filters); + csl::copyMatToTensor(correct_shape_filters, filtersTensor, stream); + + h0Tensor = csl::makeTensorHeader(h0); + csl::copyMatToTensor(h0, h0Tensor, stream); + + c0Tensor = csl::makeTensorHeader(c0); + csl::copyMatToTensor(c0, c0Tensor, stream); + + csl::WorkspaceBuilder builder; + builder.require(lstm.get_workspace_memory_in_bytes()); + } + + void forward(const std::vector>& inputs, + const std::vector>& outputs, + csl::Workspace& workspace) override + { + CV_Assert(inputs.size() == 1 && !outputs.empty()); + + auto input_wrapper = inputs[0].dynamicCast(); + auto input = input_wrapper->getView(); + + auto y_output_wrapper = outputs[0].dynamicCast(); + auto y_output = y_output_wrapper->getSpan(); + + Ptr yc_output_wrapper = outputs.size() == 2 ? outputs[1].dynamicCast() : Ptr(); + csl::TensorSpan yc_output = yc_output_wrapper.empty() ? csl::TensorSpan() : yc_output_wrapper->getSpan(); + + csl::WorkspaceAllocator allocator(workspace); + lstm.inference(input, y_output, yc_output, filtersTensor, h0Tensor, c0Tensor, allocator.get_instance()); + } + + std::size_t get_workspace_memory_in_bytes() const noexcept override + { + return lstm.get_workspace_memory_in_bytes(); + } + +private: + csl::LSTM lstm; + csl::Stream stream; + csl::Tensor filtersTensor; + csl::Tensor h0Tensor; + csl::Tensor c0Tensor; +}; + +}}} /* namespace cv::dnn::cuda4dnn */ + +#endif //OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_RECURRENT_CELLS_HPP \ No newline at end of file diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 19e32e2b61..3961051c8e 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -42,10 +42,14 @@ #include "../precomp.hpp" #include -#include #include #include +#ifdef HAVE_CUDA +#include "../cuda4dnn/primitives/recurrent_cells.hpp" +using namespace cv::dnn::cuda4dnn; +#endif + #include "layers_common.hpp" namespace cv @@ -119,6 +123,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer ActivationFunction f_activation; ActivationFunction g_activation; ActivationFunction h_activation; + bool isDefaultActivations{true}; #if CV_TRY_AVX bool useAVX; @@ -202,11 +207,15 @@ public: f_activation = sigmoid; g_activation = tanh; h_activation = tanh; + isDefaultActivations = true; } else { CV_Assert(activations.size() == 3); f_activation = get_activation_function(activations.getStringValue(0)); g_activation = get_activation_function(activations.getStringValue(1)); h_activation = get_activation_function(activations.getStringValue(2)); + isDefaultActivations = activations.getStringValue(0) == "Sigmoid" + && activations.getStringValue(1) == "Tanh" + && activations.getStringValue(2) == "Tanh"; } allocated = false; @@ -245,6 +254,12 @@ public: blobs[2] = Mat(bias.clone()).reshape(1, 1); } + bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV + || (backendId == DNN_BACKEND_CUDA && isDefaultActivations && !reverse && !usePeephole); + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -582,29 +597,8 @@ public: cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); // permute to {0, 2, 1, 3}; - std::vector newShape = shape(cOut); - std::swap(newShape[1], newShape[2]); - cv::Mat newCellState(newShape, CV_32FC1); - const float* src = cOut.ptr(); - float* dst = newCellState.ptr(); - size_t sj = newCellState.size[3]; - size_t sk = newCellState.size[2] * sj; - size_t si = newCellState.size[1] * sk; - for (size_t i = 0; i < newCellState.size[0]; i++) - { - for (size_t j = 0; j < newCellState.size[2]; j++) - { - for (size_t k = 0; k < newCellState.size[1]; k++) - { - std::memcpy(dst, src, sizeof(float) * newCellState.size[3]); - src += cOut.size[3]; - dst += sk; - } - dst = dst + sj - si; - } - dst = dst + si - sk; - } - + cv::Mat newCellState; + cv::transposeND(cOut, {0, 2, 1, 3}, newCellState); cOut = newCellState; if (numDirs == 1) @@ -637,6 +631,77 @@ public: cOut = cOut.reshape(1, sizeof(finalShape)/sizeof(finalShape[0]), finalShape); } } + +#ifdef HAVE_CUDA + Ptr initCUDA(void *context_, const std::vector> &inputs, + const std::vector> &outputs) override + { + const int numDirs = 1 + static_cast(bidirectional); + auto toIFCO = [numDirs] (Mat& in) { + int first = in.size[0]; + int rest = in.total() / first / 4; + // every weight blob contains weights for Input, Output, Forget and Cell gates + Mat m = in.reshape(1, {first, 4, rest}); + Mat outputGate = m.col(1); + Mat forgetGate = m.col(2); + Mat cellGate = m.col(3); + // IOFC -> IFOC + std::swap_ranges(outputGate.begin(), outputGate.end(), forgetGate.begin()); + std::swap(outputGate, forgetGate); + // IFOC -> IFCO + std::swap_ranges(outputGate.begin(), outputGate.end(), cellGate.begin()); + in = in.reshape(1, numDirs); + }; + + Mat& b = originalBlobs[2]; + // B is a concatenation of biases for Wh and Wx + b = b.reshape(1, originalBlobs[2].size[0]*2); + + for (auto& m : originalBlobs) + { + toIFCO(m); + } + + b = b.reshape(1, static_cast(b.total())); + + Mat ordered_weights; + // Wx_f, Wh_f, [Wx_b, Wh_b,] b + for (int i = 0; i < numDirs; ++i) + { + for (size_t j = 0; j < 2; ++j) // Wx, Wh + { + Mat oneDirection = originalBlobs[j].row(i); + ordered_weights.push_back(oneDirection.reshape(1, static_cast(oneDirection.total()))); + } + } + ordered_weights.push_back(b); + + // Pass hidden states as is + Mat h0 = blobs[3]; + Mat c0 = blobs[4]; + + CV_Assert(!inputs.empty()); + auto input_wrapper = inputs[0].dynamicCast(); + auto input_shape = input_wrapper->getShape(); + + RNNConfiguration config + { + input_shape[0], // seqLength; + 1, // numLayers; + numHidden, // hiddenSize; + input_shape[2], // inputSize; + input_shape[1], // miniBatch; + bidirectional + }; + + + auto *context = reinterpret_cast(context_); + return make_cuda_node(preferableTarget, std::move(context->stream), + std::move(context->cudnn_handle), + ordered_weights, h0, c0, + config); + } +#endif }; Ptr LSTMLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index b43bb5a390..18ccf67bab 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1574,8 +1574,6 @@ void transformBlobs(std::vector& blobs) cudaWorkaround.push_back(b.clone()); const int numHidden = Wh.size[2]; - const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. - const int numFeatures = Wx.size[2]; Mat h0 = blobs[3]; h0 = h0.reshape(1, h0.size[0] * h0.size[1]); @@ -1587,30 +1585,20 @@ void transformBlobs(std::vector& blobs) Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; - // b is numDirs X numHidden*3 - CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns"); + auto toIFOC = [] (Mat& in) { + int first = in.size[0]; + int rest = in.total() / first / 4; + // every weight blob contains weights for Input, Output, Forget and Cell gates + Mat m = in.reshape(1, {first, 4, rest}); + Mat outputGate = m.col(1); + Mat forgetGate = m.col(2); + std::swap_ranges(outputGate.begin(), outputGate.end(), forgetGate.begin()); + }; + + toIFOC(Wx); + toIFOC(Wh); + toIFOC(b); - // IFGO->IGFO - for (int k = 0; k < numDirs; ++k) - { - float* WxData = Wx.ptr(k); - float* WhData = Wh.ptr(k); - float* biasData = b.ptr(k); - for (int j = 0; j < numHidden; ++j) - { - for (int i = 0; i < numFeatures; ++i) - { - std::swap(WxData[(numHidden + j) * numFeatures + i], - WxData[(numHidden * 2 + j) * numFeatures + i]); - } - for (int i = 0; i < numHidden; ++i) - { - std::swap(WhData[(numHidden + j) * numHidden + i], - WhData[(numHidden * 2 + j) * numHidden + i]); - } - std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); - } - } Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); From 9390c56831270a94af6480feb4cc330e4aa2ee5e Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 1 Apr 2022 00:00:45 +0300 Subject: [PATCH 54/84] Merge pull request #21782 from TolyaTalamanov:at/fix-1d-mat-problems [G-API] Fix problems with 1D cv::Mat as graph output * Fix issues with 1D cv::Mat * Fix cv::Mat::create * Fix standalone build * Add test on 1d mat * Fix warning * Add additional condition * Add more tests --- modules/core/src/matrix.cpp | 2 + modules/core/test/test_mat.cpp | 11 +++ .../pipeline_builder.hpp | 3 +- modules/gapi/src/api/gbackend.cpp | 6 ++ modules/gapi/test/gapi_sample_pipelines.cpp | 83 +++++++++++++++++++ 5 files changed, 104 insertions(+), 1 deletion(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 6a381c15a0..8111dc2230 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -664,6 +664,8 @@ void Mat::create(int d, const int* _sizes, int _type) if( data && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) { + if ( dims == 1 && (d == 1 && _sizes[0] == size[0]) ) + return; if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) return; for( i = 0; i < d; i++ ) diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 6c885a4dce..61ba306ba6 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2461,5 +2461,16 @@ TEST(Mat, reverse_iterator_19967) } +TEST(Mat, Recreate1DMatWithSameMeta) +{ + std::vector dims = {100}; + auto depth = CV_8U; + cv::Mat m(dims, depth); + + // By default m has dims: [1, 100] + m.dims = 1; + + EXPECT_NO_THROW(m.create(dims, depth)); +} }} // namespace diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp index 3906ae4f4c..a3f187249d 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -133,7 +133,8 @@ struct DummyCall { if (output.dims.size() == 2) { return cv::GMatDesc(output.precision, 1, - cv::Size(output.dims[0], output.dims[1])); + // NB: Dims[H, W] -> Size(W, H) + cv::Size(output.dims[1], output.dims[0])); } return cv::GMatDesc(output.precision, output.dims); } diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp index f063dd0eb9..e3b1e7123d 100644 --- a/modules/gapi/src/api/gbackend.cpp +++ b/modules/gapi/src/api/gbackend.cpp @@ -411,6 +411,12 @@ void createMat(const cv::GMatDesc &desc, cv::Mat& mat) { GAPI_Assert(!desc.planar); mat.create(desc.dims, desc.depth); +#if !defined(GAPI_STANDALONE) + // NB: WA for 1D mats. + if (desc.dims.size() == 1u) { + mat.dims = 1; + } +#endif } } diff --git a/modules/gapi/test/gapi_sample_pipelines.cpp b/modules/gapi/test/gapi_sample_pipelines.cpp index b4d9f3e100..e48d36b3ac 100644 --- a/modules/gapi/test/gapi_sample_pipelines.cpp +++ b/modules/gapi/test/gapi_sample_pipelines.cpp @@ -49,6 +49,24 @@ namespace static GMatDesc outMeta(GMatDesc in) { return in; } }; + G_TYPED_KERNEL(GZeros, , "org.opencv.test.zeros") + { + static GMatDesc outMeta(GMatDesc /*in*/, GMatDesc user_desc) + { + return user_desc; + } + }; + + GAPI_OCV_KERNEL(GOCVZeros, GZeros) + { + static void run(const cv::Mat& /*in*/, + const cv::GMatDesc& /*desc*/, + cv::Mat& out) + { + out.setTo(0); + } + }; + // These definitons test the correct macro work if the kernel has multiple output values G_TYPED_KERNEL(GRetGArrayTupleOfGMat2Kernel, >(GMat, Scalar)>, "org.opencv.test.retarrayoftupleofgmat2kernel") {}; G_TYPED_KERNEL(GRetGArraTupleyOfGMat3Kernel, >(GMat)>, "org.opencv.test.retarrayoftupleofgmat3kernel") {}; @@ -430,4 +448,69 @@ TEST(GAPI_Pipeline, ReplaceDefaultByFunctor) EXPECT_TRUE(f.is_called); } +TEST(GAPI_Pipeline, GraphOutputIs1DMat) +{ + int dim = 100; + cv::Mat in_mat(1, 1, CV_8UC3); + cv::Mat out_mat; + + cv::GMat in; + auto cc = cv::GComputation(in, GZeros::on(in, cv::GMatDesc(CV_8U, {dim}))) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + // NB: Computation is able to write 1D output cv::Mat to empty out_mat. + ASSERT_NO_THROW(cc(cv::gin(in_mat), cv::gout(out_mat))); + ASSERT_EQ(1, out_mat.size.dims()); + ASSERT_EQ(dim, out_mat.size[0]); + + // NB: Computation is able to write 1D output cv::Mat + // to pre-allocated with the same meta out_mat. + ASSERT_NO_THROW(cc(cv::gin(in_mat), cv::gout(out_mat))); + ASSERT_EQ(1, out_mat.size.dims()); + ASSERT_EQ(dim, out_mat.size[0]); +} + +TEST(GAPI_Pipeline, 1DMatBetweenIslands) +{ + int dim = 100; + cv::Mat in_mat(1, 1, CV_8UC3); + cv::Mat out_mat; + + cv::Mat ref_mat({dim}, CV_8U); + ref_mat.dims = 1; + ref_mat.setTo(0); + + cv::GMat in; + auto out = cv::gapi::copy(GZeros::on(cv::gapi::copy(in), cv::GMatDesc(CV_8U, {dim}))); + auto cc = cv::GComputation(in, out) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + cc(cv::gin(in_mat), cv::gout(out_mat)); + + EXPECT_EQ(0, cv::norm(out_mat, ref_mat)); +} + +TEST(GAPI_Pipeline, 1DMatWithinSingleIsland) +{ + int dim = 100; + cv::Size blur_sz(3, 3); + cv::Mat in_mat(10, 10, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat out_mat; + + cv::Mat ref_mat({dim}, CV_8U); + ref_mat.dims = 1; + ref_mat.setTo(0); + + cv::GMat in; + auto out = cv::gapi::blur( + GZeros::on(cv::gapi::blur(in, blur_sz), cv::GMatDesc(CV_8U, {dim})), blur_sz); + auto cc = cv::GComputation(in, out) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + cc(cv::gin(in_mat), cv::gout(out_mat)); + + EXPECT_EQ(0, cv::norm(out_mat, ref_mat)); +} + } // namespace opencv_test From 4e8c507276b71c987371a9239339a443367da59c Mon Sep 17 00:00:00 2001 From: Yash Singhal Date: Wed, 30 Mar 2022 11:41:50 +0530 Subject: [PATCH 55/84] Update Condition Update connectedcomponents.cpp --- modules/imgproc/src/connectedcomponents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/connectedcomponents.cpp b/modules/imgproc/src/connectedcomponents.cpp index f2d41f454d..a2f4b6e890 100644 --- a/modules/imgproc/src/connectedcomponents.cpp +++ b/modules/imgproc/src/connectedcomponents.cpp @@ -4339,8 +4339,8 @@ namespace cv{ // without going outside the image limits. #define condition_b c-1>=0 && r-2>=0 && img_row_prev_prev[c-1]>0 #define condition_c r-2>=0 && img_row_prev_prev[c]>0 - #define condition_d c+1=0 && img_row_prev_prev[c+1]>0 - #define condition_e c+2=0 && img_row_prev[c-1]>0 + #define condition_d c+1=0 && img_row_prev_prev[c+1]>0 + #define condition_e c+2=0 && img_row_prev_prev[c+2]>0 #define condition_g c-2>=0 && r-1>=0 && img_row_prev[c-2]>0 #define condition_h c-1>=0 && r-1>=0 && img_row_prev[c-1]>0 From 7b582b71ba8b53abb930dd7afab2e386dcddccc0 Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Fri, 1 Apr 2022 05:42:11 +0800 Subject: [PATCH 56/84] Merge pull request #21036 from fengyuentau:timvx_backend_support dnn: TIM-VX NPU backend support * Add TimVX NPU backend for DNN module. * use official branch from tim-vx repo; fix detecting viv sdk Co-authored-by: fytao --- 3rdparty/libtim-vx/tim-vx.cmake | 73 ++ CMakeLists.txt | 16 + cmake/OpenCVFindTIMVX.cmake | 69 ++ modules/dnn/CMakeLists.txt | 9 + .../dnn/include/opencv2/dnn/all_layers.hpp | 6 +- modules/dnn/include/opencv2/dnn/dnn.hpp | 17 +- .../dnn/src/int8layers/batch_norm_layer.cpp | 122 +++ .../dnn/src/int8layers/convolution_layer.cpp | 260 +++++ .../dnn/src/int8layers/elementwise_layers.cpp | 138 +++ modules/dnn/src/int8layers/eltwise_layer.cpp | 150 +++ .../src/int8layers/fully_connected_layer.cpp | 133 +++ modules/dnn/src/int8layers/pooling_layer.cpp | 157 ++- .../dnn/src/int8layers/quantization_utils.cpp | 83 ++ modules/dnn/src/int8layers/softmax_layer.cpp | 106 +- modules/dnn/src/layer.cpp | 10 + modules/dnn/src/layers/batch_norm_layer.cpp | 1 + modules/dnn/src/layers/concat_layer.cpp | 101 ++ modules/dnn/src/layers/convolution_layer.cpp | 1 + modules/dnn/src/layers/elementwise_layers.cpp | 7 + modules/dnn/src/layers/eltwise_layer.cpp | 3 +- .../dnn/src/layers/fully_connected_layer.cpp | 2 + modules/dnn/src/layers/permute_layer.cpp | 128 +++ modules/dnn/src/layers/pooling_layer.cpp | 11 + modules/dnn/src/layers/reshape_layer.cpp | 111 +++ modules/dnn/src/layers/softmax_layer.cpp | 2 + modules/dnn/src/legacy_backend.cpp | 8 + modules/dnn/src/net_impl.cpp | 26 +- modules/dnn/src/net_impl.hpp | 9 + modules/dnn/src/net_impl_backend.cpp | 22 +- modules/dnn/src/net_impl_fuse.cpp | 3 +- modules/dnn/src/onnx/onnx_importer.cpp | 14 +- modules/dnn/src/op_timvx.cpp | 931 ++++++++++++++++++ modules/dnn/src/op_timvx.hpp | 187 ++++ modules/dnn/src/registry.cpp | 8 + modules/dnn/test/test_common.hpp | 1 + modules/dnn/test/test_common.impl.hpp | 7 + modules/dnn/test/test_int8_layers.cpp | 80 +- 37 files changed, 2982 insertions(+), 30 deletions(-) create mode 100644 3rdparty/libtim-vx/tim-vx.cmake create mode 100644 cmake/OpenCVFindTIMVX.cmake create mode 100644 modules/dnn/src/op_timvx.cpp create mode 100644 modules/dnn/src/op_timvx.hpp diff --git a/3rdparty/libtim-vx/tim-vx.cmake b/3rdparty/libtim-vx/tim-vx.cmake new file mode 100644 index 0000000000..7fb9f34352 --- /dev/null +++ b/3rdparty/libtim-vx/tim-vx.cmake @@ -0,0 +1,73 @@ +set(TIMVX_COMMIT_HASH "1d9c7ab941b3d8d9c4d28d80058402725731e3d6") +set(OCV_TIMVX_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtim-vx") +set(OCV_TIMVX_SOURCE_PATH "${OCV_TIMVX_DIR}/TIM-VX-${TIMVX_COMMIT_HASH}") + +# Download TIM-VX source code +if(EXISTS "${OCV_TIMVX_SOURCE_PATH}") + message(STATUS "TIM-VX: Use cache of TIM-VX source code at ${OCV_TIMVX_SOURCE_PATH}") + set(TIMVX_FOUND ON) +else() + set(OCV_TIMVX_FILENAME "${TIMVX_COMMIT_HASH}.zip") + set(OCV_TIMVX_URL "https://github.com/VeriSilicon/TIM-VX/archive/") + set(timvx_zip_md5sum 92619cc4498014ac7a09834d5e33ebd5) + + ocv_download(FILENAME ${OCV_TIMVX_FILENAME} + HASH ${timvx_zip_md5sum} + URL "${OCV_TIMVX_URL}" + DESTINATION_DIR "${OCV_TIMVX_DIR}" + ID "TIM-VX" + STATUS res + UNPACK RELATIVE_URL) + if(res) + set(TIMVX_FOUND ON) + message(STATUS "TIM-VX: Source code downloaded at ${OCV_TIMVX_SOURCE_PATH}.") + else() + set(TIMVX_FOUND OFF) + message(STATUS "TIM-VX: Failed to download source code from github. Turning off TIMVX_FOUND") + return() + endif() +endif() + +# set VIVANTE SDK especially for x86_64 which comes along with TIM-VX source code +if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) + set(VIVANTE_SDK_DIR "${OCV_TIMVX_SOURCE_PATH}/prebuilt-sdk/x86_64_linux") + message(STATUS "TIM-VX: Build from source using prebuilt x86_64 VIVANTE SDK.") +endif() + +# Verify if requested VIVANTE SDK libraries are all found +find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR}) +if(missing) + message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND") + set(TIMVX_VIV_FOUND OFF) +else() + message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.") + set(TIMVX_VIV_FOUND ON) +endif() + +if(TIMVX_VIV_FOUND) + # vars used by TIM-VX CMake scripts + set(EXTERNAL_VIV_SDK "${VIVANTE_SDK_DIR}" CACHE INTERNAL "" FORCE) + set(VIV_SDK_DRIVER_PREFIX "lib" CACHE INTERNAL "" FORCE) +endif() + +if(TIMVX_FOUND AND TIMVX_VIV_FOUND) + set(BUILD_TIMVX ON) +else() + return() +endif() + +if(BUILD_TIMVX) + set(HAVE_TIMVX 1) + + ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wmissing-prototypes -Wmissing-declarations -Wstrict-aliasing -Wunused-but-set-variable -Wmaybe-uninitialized -Wshadow -Wsuggest-override -Wswitch) + ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wunused-but-set-variable -Wshadow -Wsuggest-override -Wmissing-declarations -Wswitch) + + set(TIMVX_INC_DIR "${OCV_TIMVX_SOURCE_PATH}/include" CACHE INTERNAL "TIM-VX include directory") + if(EXISTS "${OCV_TIMVX_SOURCE_PATH}/CMakeLists.txt") + add_subdirectory("${OCV_TIMVX_SOURCE_PATH}" "${OCV_TIMVX_DIR}/build") + else() + message(WARNING "TIM-VX: Missing 'CMakeLists.txt' in the source code: ${OCV_TIMVX_SOURCE_PATH}") + endif() + ocv_install_target(tim-vx EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) + set(TIMVX_LIB "tim-vx") +endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 209e653cb2..fa409f516c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -453,6 +453,9 @@ OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF VISIBLE_IF TRUE VERIFY HAVE_ONNX) +OCV_OPTION(WITH_TIMVX "Include Tim-VX support" OFF + VISIBLE_IF TRUE + VERIFY HAVE_TIMVX) # OpenCV build components # =================================================== @@ -733,6 +736,9 @@ include(cmake/OpenCVFindProtobuf.cmake) if(WITH_TENGINE) include(cmake/OpenCVFindTengine.cmake) endif() +if(WITH_TIMVX) + include(cmake/OpenCVFindTIMVX.cmake) +endif() # ---------------------------------------------------------------------------- # Detect other 3rd-party libraries/tools @@ -1645,6 +1651,16 @@ if(WITH_WEBNN OR HAVE_WEBNN) endif() endif() +if(WITH_TIMVX) + status("") + status(" Tim-VX:" HAVE_TIMVX THEN "YES" ELSE "NO") + if(HAVE_TIMVX) + status(" Include path" TIMVX_INCLUDE_DIR THEN "${TIMVX_INCLUDE_DIR}" ELSE "NO") + status(" Link libraries:" TIMVX_LIBRARY THEN "${TIMVX_LIBRARY}" ELSE "NO") + status(" VIVANTE SDK path" VIVANTE_SDK_DIR THEN "${VIVANTE_SDK_DIR}" ELSE "NO") + endif() +endif() + if(WITH_OPENCL OR HAVE_OPENCL) ocv_build_features_string(opencl_features IF HAVE_OPENCL_SVM THEN "SVM" diff --git a/cmake/OpenCVFindTIMVX.cmake b/cmake/OpenCVFindTIMVX.cmake new file mode 100644 index 0000000000..339f726bd9 --- /dev/null +++ b/cmake/OpenCVFindTIMVX.cmake @@ -0,0 +1,69 @@ +set(TIMVX_INSTALL_DIR "" CACHE PATH "Path to libtim-vx installation") +set(VIVANTE_SDK_DIR "" CACHE PATH "Path to VIVANTE SDK needed by TIM-VX.") +set(VIVANTE_SDK_LIB_CANDIDATES "OpenVX;VSC;GAL;ArchModelSw;NNArchPerf" CACHE STRING "VIVANTE SDK library candidates") + +# Ensure VIVANTE SDK library candidates are present in given search path +function(find_vivante_sdk_libs _viv_notfound _viv_search_path) + foreach(one ${VIVANTE_SDK_LIB_CANDIDATES}) + #NO_DEFAULT_PATH is used to ensure VIVANTE SDK libs are from one only source + find_library(VIV_${one}_LIB ${one} PATHS "${_viv_search_path}/lib" NO_DEFAULT_PATH) + if(NOT VIV_${one}_LIB) + list(APPEND _viv_notfound_list ${one}) + endif() + endforeach() + set(${_viv_notfound} ${_viv_notfound_list} PARENT_SCOPE) +endfunction() +# Default value for VIVANTE_SDK_DIR: /usr +if(NOT VIVANTE_SDK_DIR) + set(VIVANTE_SDK_DIR "/usr") +endif() +# Environment variable VIVANTE_SDK_DIR overrides the one in this script +if(DEFINED ENV{VIVANTE_SDK_DIR}) + set(VIVANTE_SDK_DIR $ENV{VIVANTE_SDK_DIR}) + message(STATUS "TIM-VX: Load VIVANTE_SDK_DIR from system environment: ${VIVANTE_SDK_DIR}") +endif() + + +# Compile with pre-installed TIM-VX; Or compile together with TIM-VX from source +if(TIMVX_INSTALL_DIR AND NOT BUILD_TIMVX) + message(STATUS "TIM-VX: Use binaries at ${TIMVX_INSTALL_DIR}") + set(BUILD_TIMVX OFF) + + set(TIMVX_INC_DIR "${TIMVX_INSTALL_DIR}/include" CACHE INTERNAL "TIM-VX include directory") + find_library(TIMVX_LIB "tim-vx" PATHS "${TIMVX_INSTALL_DIR}/lib") + if(TIMVX_LIB) + set(TIMVX_FOUND ON) + else() + set(TIMVX_FOUND OFF) + endif() + + # Verify if requested VIVANTE SDK libraries are all found + find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR}) + if(missing) + message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND") + set(TIMVX_VIV_FOUND OFF) + else() + message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.") + set(TIMVX_VIV_FOUND ON) + endif() +else() + message(STATUS "TIM-VX: Build from source") + include("${OpenCV_SOURCE_DIR}/3rdparty/libtim-vx/tim-vx.cmake") +endif() + +if(TIMVX_FOUND AND TIMVX_VIV_FOUND) + set(HAVE_TIMVX 1) + + message(STATUS "TIM-VX: Found TIM-VX includes: ${TIMVX_INC_DIR}") + message(STATUS "TIM-VX: Found TIM-VX library: ${TIMVX_LIB}") + set(TIMVX_LIBRARY ${TIMVX_LIB}) + set(TIMVX_INCLUDE_DIR ${TIMVX_INC_DIR}) + + message(STATUS "TIM-VX: Found VIVANTE SDK libraries: ${VIVANTE_SDK_DIR}/lib") + link_directories(${VIVANTE_SDK_DIR}/lib) +endif() + +MARK_AS_ADVANCED( + TIMVX_INC_DIR + TIMVX_LIB +) diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index a9540f1088..e0773d5214 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -23,6 +23,10 @@ if(WITH_WEBNN AND HAVE_WEBNN) add_definitions(-DHAVE_WEBNN=1) endif() +if(HAVE_TIMVX) + add_definitions(-DHAVE_TIMVX=1) +endif() + ocv_option(OPENCV_DNN_CUDA "Build with CUDA support" HAVE_CUDA AND HAVE_CUBLAS @@ -146,6 +150,11 @@ if(HAVE_TENGINE) list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive) endif() +if(HAVE_TIMVX) + list(APPEND include_dirs ${TIMVX_INCLUDE_DIR}) + list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive) +endif() + set(webnn_srcs "") if(NOT EMSCRIPTEN) if(HAVE_WEBNN) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index c8c14759d3..2acb41076d 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -262,7 +262,7 @@ CV__DNN_INLINE_NS_BEGIN { public: int input_zp, output_zp; - float output_sc; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; @@ -322,6 +322,7 @@ CV__DNN_INLINE_NS_BEGIN { public: int input_zp, output_zp; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; @@ -365,7 +366,8 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer { public: - int output_zp; + int input_zp, output_zp; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index ab443cd67e..8bca6c538b 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -75,6 +75,7 @@ CV__DNN_INLINE_NS_BEGIN DNN_BACKEND_VKCOM, DNN_BACKEND_CUDA, DNN_BACKEND_WEBNN, + DNN_BACKEND_TIMVX, #ifdef __OPENCV_BUILD DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() @@ -95,7 +96,8 @@ CV__DNN_INLINE_NS_BEGIN DNN_TARGET_FPGA, //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin. DNN_TARGET_CUDA, DNN_TARGET_CUDA_FP16, - DNN_TARGET_HDDL + DNN_TARGET_HDDL, + DNN_TARGET_NPU, }; CV_EXPORTS std::vector< std::pair > getAvailableBackends(); @@ -321,6 +323,19 @@ CV__DNN_INLINE_NS_BEGIN const std::vector>& outputs ); + /** + * @brief Returns a TimVX backend node + * + * @param timVxInfo void pointer to CSLContext object + * @param inputsWrapper layer inputs + * @param outputsWrapper layer outputs + * @param isLast if the node is the last one of the TimVX Graph. + */ + virtual Ptr initTimVX(void* timVxInfo, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast); + /** * @brief Automatic Halide scheduling based on layer hyper-parameters. * @param[in] node Backend node with Halide functions. diff --git a/modules/dnn/src/int8layers/batch_norm_layer.cpp b/modules/dnn/src/int8layers/batch_norm_layer.cpp index c5b8c3d9e9..a3a9ebb261 100644 --- a/modules/dnn/src/int8layers/batch_norm_layer.cpp +++ b/modules/dnn/src/int8layers/batch_norm_layer.cpp @@ -4,6 +4,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" + #include namespace cv @@ -103,6 +105,11 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + return true; + } + return backendId == DNN_BACKEND_OPENCV; } @@ -116,6 +123,121 @@ public: return false; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + const int numChannels = (int)origin_bias.total(); + Mat tvGamma = origin_weights.reshape(1, numChannels); + Mat tvBeta = origin_bias.reshape(1, numChannels); + + std::vector inputsIndex; + std::vector outputsIndex; + + Mat tvMean = Mat::zeros(1, numChannels, CV_32F); + tvMean = tvMean.reshape(1, numChannels); + Mat tvVar = Mat::ones(1, numChannels, CV_32F); + tvVar = tvVar.reshape(1, numChannels); + + CV_Assert(inputsWrapper.size() == 1); + if (outputsWrapper.size() > 1) + return Ptr(); + + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + Mat tmpInput = inputWrapper->getMat(); + + if (tmpInput.dims != 4) // Only support 4 dim input. + return Ptr(); + + int input_index = -1, mean_index = -1, var_index = -1, gamma_index = -1, beta_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // Mean tensor + Ptr meanWrapper = Ptr(new TimVXBackendWrapper(tvMean)); + Ptr meanQuant; + meanWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT); + mean_index = tvGraph->addWrapper(meanWrapper); + inputsIndex.push_back(mean_index); + + // Var tensor + Ptr varWrapper = Ptr(new TimVXBackendWrapper(tvVar)); + varWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + var_index = tvGraph->addWrapper(varWrapper); + inputsIndex.push_back(var_index); + + // Gamma tensor + Ptr gammaWrapper = Ptr(new TimVXBackendWrapper(tvGamma)); + gammaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + gamma_index = tvGraph->addWrapper(gammaWrapper); + inputsIndex.push_back(gamma_index); + + // Beta tensor + Ptr betaWrapper = Ptr(new TimVXBackendWrapper(tvBeta)); + betaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + beta_index = tvGraph->addWrapper(betaWrapper); + inputsIndex.push_back(beta_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvBatchNorm = graph->CreateOperation(0.f); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvBatchNorm, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp index ea29610222..45aaa3bc19 100644 --- a/modules/dnn/src/int8layers/convolution_layer.cpp +++ b/modules/dnn/src/int8layers/convolution_layer.cpp @@ -9,6 +9,7 @@ #include "opencv2/core/hal/hal.hpp" #include "opencv2/core/hal/intrin.hpp" +#include "../op_timvx.hpp" #include #include @@ -46,6 +47,7 @@ public: int ngroups = params.get("group", 1); CV_Assert(numOutput % ngroups == 0); + input_sc = params.get("input_scale"); input_zp = params.get("input_zeropoint"); output_zp = params.get("zeropoints"); output_sc = params.get("scales"); @@ -181,6 +183,16 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { size_t ksize = kernel_size.size(); + +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX) + { + /* only Conv1d and Conv2d supported. */ + if (ksize == 2 || ksize == 1) + return true; + return false; + } +#endif // Only default backend and Conv1D/Conv2D/Conv3D are supported return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3; } @@ -261,6 +273,11 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { + // TODO! add activation in convolution. +#ifdef HAVE_TIMVX + if (preferableTarget == DNN_TARGET_NPU) + return false; +#endif Ptr activ_int8 = layer.dynamicCast(); if (!activ_int8.empty()) { @@ -300,6 +317,249 @@ public: outputMultiplier[outCn] = outputMultiplier[outCn+1] = outputMultiplier[outCn-1]; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + /* TODO :support GroupConv; + Ref: + https://github.com/VeriSilicon/TIM-VX/blob/main/docs/Operators.md#conv2d + Link Reference: https://github.com/VeriSilicon/TIM-VX/blob/main/src/tim/vx/ops/conv1d_test.cc + */ + + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + Mat tvWeightMat = blobs[0]; + + std::vector tvBiasVec; + tvBiasVec.assign(biasvec.begin(), biasvec.end() - 2); + Mat tvBiasMat(tvBiasVec); + + for (int i = 0; i < numOutput; i++) + { + tvBiasVec[i] += input_zp * (cv::sum(blobs[0].row(i))[0]); + } + + // Padding Type + tim::vx::PadType tvPadType; + + if (padMode.empty()) + { + tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type. + } + else if(padMode == "VALID") + { + tvPadType = tim::vx::PadType::VALID; + } + else if (padMode == "SAME") + { + tvPadType = tim::vx::PadType::SAME; + } + else + { + CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!"); + } + + size_t ksize = kernel_size.size(); + + std::vector inputsIndex; + std::vector outputsIndex; + + CV_Assert(inputsWrapper.size() == 1); + CV_Assert(ksize == 2 || ksize == 1); + + std::vector weight_scs, bias_scs; + std::vector weight_zps, bias_zps; + + weight_scs.resize(numOutput); + bias_scs.resize(numOutput); + + for (int i = 0; i < numOutput; i++) + { + bias_scs[i] = outputMultiplier[i] * output_sc; + weight_scs[i] = bias_scs[i] / input_sc; + } + + weight_zps.assign(numOutput, 0); + bias_zps.assign(numOutput, 0); + + bool tvSymmetric; + tvSymmetric = getQuantType(weight_scs, numOutput); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index = -1, weight_index = -1, bias_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // weight Tensor + auto tvConvWeightShape = shape(tvWeightMat); + Mat tvInputMat = inputWrapper->getMat(); + // calculate group value. + int group = tvInputMat.size[1] / tvWeightMat.size[1]; + + // TODO! It will be supported in future. + if (tvSymmetric && tvWeightMat.total() == tvConvWeightShape[0]) + return Ptr(); + // Reverse weight shape From OpenCV NCHW to TimVX WHCN. + std::reverse(tvConvWeightShape.begin(), tvConvWeightShape.end()); + + Ptr weightWrapper = Ptr(new TimVXBackendWrapper(tvWeightMat)); + Ptr weightQuant; + + if (tvSymmetric) + { + int wtChanneldim = tvWeightMat.dims - 1; + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, wtChanneldim, + weight_scs, weight_zps)); + } + else + { + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0], 0)); + } + weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant); + + weight_index = tvGraph->addWrapper(weightWrapper); + inputsIndex.push_back(weight_index); + + // Bias Tensor + Ptr biasWrapper = Ptr(new TimVXBackendWrapper(tvBiasMat)); + Ptr biasQuant; + + if (tvSymmetric) + { + biasQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, 0, + bias_scs, bias_zps)); + } + else + { + biasQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0] * input_sc, 0)); + } + + biasWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT, biasQuant); + bias_index = tvGraph->addWrapper(biasWrapper); + inputsIndex.push_back(bias_index); + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + auto outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + // From OpenCV NCHW, to TimVX WHCN + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvConv; + + if (ksize == 2) // for conv2d + { + int multiplier = 0; + if(group == tvConvWeightShape[3] && group != 1) + multiplier = 1; + if (group == 1 || (group == tvConvWeightShape[3] && group != 1)) // Conv2D || DeConv2D + { + if (tvPadType == tim::vx::PadType::AUTO) { + tvConv = graph->CreateOperation( + tvConvWeightShape[3], tvPadType, + std::array({(uint32_t) kernel_size[1], (uint32_t) kernel_size[0]}), + std::array({(uint32_t) strides[1], (uint32_t) strides[0]}), + std::array({(uint32_t) dilations[1], (uint32_t) dilations[0]}), + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + multiplier); + } + else + { + tvConv = graph->CreateOperation( + tvPadType, + std::array({(uint32_t) strides[1], (uint32_t) strides[0]}), + std::array({(uint32_t) dilations[1], (uint32_t) dilations[0]}), + multiplier); + } + } + else + { + // GroupedConv2d + if (tvPadType == tim::vx::PadType::AUTO) + { + tvConv = graph->CreateOperation( + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + std::array({(uint32_t)dilations[1], (uint32_t)dilations[0]}), + group); + } + else + { + tvConv = graph->CreateOperation( + tvPadType, + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + std::array({(uint32_t)dilations[1], (uint32_t)dilations[0]}), + group); + } + } + } + else + { + // for Conv1d + if (group != 1) + CV_Error( CV_StsNotImplemented, " Grouped Conv1d or Depth-Wise Conv1d are not supported by " + "TimVX Backend. Please try OpenCV Backend."); + tvConv = graph->CreateOperation( + tvConvWeightShape[2], tvPadType, (uint32_t)kernel_size[0], + (uint32_t)strides[0],(uint32_t)dilations[0], + std::array({(uint32_t)pads_begin[0], (uint32_t)pads_end[0]})); + } + // Create TimVXBackendNode + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvConv, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class ParallelConv : public cv::ParallelLoopBody { public: diff --git a/modules/dnn/src/int8layers/elementwise_layers.cpp b/modules/dnn/src/int8layers/elementwise_layers.cpp index 75118b6bc1..f1b78f48fb 100644 --- a/modules/dnn/src/int8layers/elementwise_layers.cpp +++ b/modules/dnn/src/int8layers/elementwise_layers.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include #include @@ -16,14 +17,45 @@ namespace dnn class ActivationLayerInt8Impl CV_FINAL : public ActivationLayerInt8 { public: + int input_zp, output_zp; + float input_sc, output_sc; + float slope = 0.0f; + +#ifdef HAVE_TIMVX + tvActivationType tvActType; +#endif ActivationLayerInt8Impl(const LayerParams ¶ms) { setParamsFrom(params); activationLUT = !blobs.empty() ? blobs[0] : Mat(); + + input_zp = params.get("input_zeropoint"); + input_sc = params.get("input_scale"); + output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); + + if (params.has("slope")) + { + slope = params.get("slope"); + } + +#ifdef HAVE_TIMVX + tvActType = getTimVXActType(type); +#endif + } virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX) + { + // TODO!: Leaky ReLU will be supported in future. + if (tvActType == tvActReLU && slope != 0.f) + return false; + return tvActType != tvActNotSupported; + } +#endif return backendId == DNN_BACKEND_OPENCV; } @@ -106,6 +138,112 @@ public: } }; + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index, output_index; + CV_Assert(inputsWrapper.size() == 1); + + // input Tensor + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if(input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + + inputsIndex.push_back(input_index); + + // output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + Ptr outputTensor; + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvAct; + + switch(tvActType) { + case tvActReLU: + { + if (slope != 0.f) + tvAct = graph->CreateOperation(slope); + else + tvAct = graph->CreateOperation(); + break; + } + case tvActReLU6: + tvAct = graph->CreateOperation(); + break; + case tvActTanH: + tvAct = graph->CreateOperation(); + break; + case tvActSwish: + tvAct = graph->CreateOperation(); + break; + case tvActMish: + tvAct = graph->CreateOperation(); + break; + case tvActSigmoid: + tvAct = graph->CreateOperation(); + break; + case tvActELU: + tvAct = graph->CreateOperation(); + break; + default: + // TODO! check the default function. + tvAct = graph->CreateOperation(); + break; + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvAct, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/eltwise_layer.cpp b/modules/dnn/src/int8layers/eltwise_layer.cpp index a522bc9031..e0a8d4787c 100644 --- a/modules/dnn/src/int8layers/eltwise_layer.cpp +++ b/modules/dnn/src/int8layers/eltwise_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include namespace cv @@ -22,6 +23,10 @@ public: } op; std::vector coeffs; std::vector zeropoints; + std::vector scales; + + int output_zp; + float output_sc; enum OutputChannelsMode { @@ -84,6 +89,20 @@ public: } } + if (params.has("input_scales")) + { + DictValue sc = params.get("input_scales"); + int i, n = sc.size(); + scales.resize(n); + for (i = 0; i < n; i++) + { + scales[i] = sc.get(i); + } + } + + output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); + channelsModeInput = ELTWISE_CHANNNELS_SAME; if (params.has("output_channels_mode")) { @@ -116,6 +135,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + // For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported. + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + return channelsModeInput == ELTWISE_CHANNNELS_SAME; return backendId == DNN_BACKEND_OPENCV; } @@ -219,6 +241,134 @@ public: } } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + if (inputsWrapper.size() != 2) + return Ptr(); + + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + bool isSub = false; + // TODO: support variable coeffs. + if (op == SUM) + { + CV_Assert(coeffs.size() == scales.size()); + std::vector originalCoeffs; + + for (int i = 0; i < coeffs.size(); i++) + { + originalCoeffs.push_back(coeffs[i] * output_sc / scales[i]); + } + + float eps = std::numeric_limits::epsilon(); + if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) && + std::fabs(originalCoeffs[1] + 1.0f) <= eps * std::fabs(originalCoeffs[1] - 1.0f)) + { + // Sub, if coeffs = {1., -1.}, isSub = true. + isSub = true; + } + else if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) && + std::abs(originalCoeffs[1] - 1.0f) <= eps * std::abs(originalCoeffs[1] + 1.0f)) + { + // Sum, if coeff = {1., 1.}, isSub = false. + isSub = false; + } + else + { + return Ptr(); + } + } + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + CV_Assert(channelsModeInput == ELTWISE_CHANNNELS_SAME); + + // Input + Ptr inputWrapper; + + CV_Assert(!scales.empty() && !zeropoints.empty()); + + for (int i = 0; i(); + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scales[i], zeropoints[i])); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + + inputsIndex.push_back(input_index); + } + + // Output + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvEltwise; + + switch (op) { + case SUM: + if (isSub) + tvEltwise = graph->CreateOperation(); + else + tvEltwise = graph->CreateOperation(); + break; + case PROD: + tvEltwise = graph->CreateOperation(); + break; + case MAX: + tvEltwise = graph->CreateOperation(); + break; + default: + CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvEltwise, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class EltwiseInvoker : public ParallelLoopBody { EltwiseLayerInt8Impl& self; diff --git a/modules/dnn/src/int8layers/fully_connected_layer.cpp b/modules/dnn/src/int8layers/fully_connected_layer.cpp index 83da677a47..0887388b0b 100644 --- a/modules/dnn/src/int8layers/fully_connected_layer.cpp +++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include @@ -19,7 +20,11 @@ public: FullyConnectedLayerInt8Impl(const LayerParams& params) { setParamsFrom(params); + + input_sc = params.get("input_scale"); + input_zp = params.get("input_zeropoint"); output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); axis = params.get("axis", 1); if (blobs.size() == 3) { @@ -71,11 +76,25 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + if (biasMat.empty()) + return true; + else + return false; + } + return backendId == DNN_BACKEND_OPENCV; } virtual bool setActivation(const Ptr& layer) CV_OVERRIDE { + // TODO! add activation in Fully connection. +#ifdef HAVE_TIMVX + if(preferableTarget == DNN_TARGET_NPU) + return false; +#endif + Ptr activ_int8 = layer.dynamicCast(); if (!activ_int8.empty()) { @@ -87,6 +106,120 @@ public: return false; } + + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + int numOutput = blobs[0].size[0]; + Mat weightMat = blobs[0]; + + std::vector inputsIndex; + std::vector outputsIndex; + + std::vector weight_scs, bias_scs; + std::vector weight_zps; + + bias_scs.resize(numOutput); + weight_scs.resize(numOutput); + + for (int i = 0; i < numOutput; i++) + { + bias_scs[i] = outputMultiplier.at(i) * output_sc; + weight_scs[i] = bias_scs[i] / input_sc; + } + + weight_zps.assign(numOutput, 0); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index = -1, weight_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor() || input_index == -1) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // weight tensor + Ptr weightWrapper = Ptr(new TimVXBackendWrapper(weightMat)); + Ptr weightQuant; + + bool tvSymmetric; + tvSymmetric = getQuantType(weight_scs, numOutput); + + if (tvSymmetric) + { + // TODO! fix the following issue. + // TimVX does not support the SYMMETRIC PER CHANNEL MatMul. + return Ptr(); + } + else + { + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0], 0)); + } + weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant); + + weight_index = tvGraph->addWrapper(weightWrapper); + inputsIndex.push_back(weight_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvMatmul; + + tvMatmul = graph->CreateOperation(false, true); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvMatmul, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class FullyConnected : public ParallelLoopBody { public: diff --git a/modules/dnn/src/int8layers/pooling_layer.cpp b/modules/dnn/src/int8layers/pooling_layer.cpp index 20a0486a46..98cf17c06c 100644 --- a/modules/dnn/src/int8layers/pooling_layer.cpp +++ b/modules/dnn/src/int8layers/pooling_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include "opencv2/core/hal/intrin.hpp" #include @@ -26,9 +27,12 @@ public: globalPooling = false; isGlobalPooling = std::vector(3, false); output_zp = params.get("zeropoints"); - input_zp = params.get("input_zeropoint", 0); + input_zp = params.get("input_zeropoint", output_zp); multiplier = params.get("multiplier", 1.f); + output_sc = params.get("scales"); + input_sc = multiplier * output_sc; + hasDynamicShapes = params.get("has_dynamic_shapes", false); shapesInitialized = !hasDynamicShapes; @@ -103,6 +107,24 @@ public: else return false; } + else if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + // Only pool 2d and pool 1d were supported. + if (kernel_size.size() == 3) + { + // fallback to CPU implementation. + preferableTarget = DNN_TARGET_CPU; + return false; + } + if (!avePoolPaddedArea) // TimVX does not support exclude padding. + return false; + if (globalPooling) // TODO support globalPooling in TimVX backend. + return false; + if (kernel_size.size() == 2) + return type == MAX || type == AVE; + return false; + } + return false; } @@ -116,6 +138,139 @@ public: return false; } + + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + tim::vx::PoolType tvPoolType; + tim::vx::RoundType tvRoundType; + size_t ksize = kernel_size.size(); + if (ksize != 2) + return Ptr(); + + // type Change from OpenCV to TimVX only MAX and AVG are supported. + switch (type) { + case MAX: { + tvPoolType = tim::vx::PoolType::MAX; + break; + } + case AVE:{ + tvPoolType = tim::vx::PoolType::AVG; + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented Pooling type in TimVX Backend."); + } + + // Padding Type + tim::vx::PadType tvPadType; + if (padMode.empty()) + { + tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type. + } + else if(padMode == "VALID") + { + tvPadType = tim::vx::PadType::VALID; + } + else if (padMode == "SAME") + { + tvPadType = tim::vx::PadType::SAME; + } + else + { + CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!"); + } + + if (ceilMode) + tvRoundType = tim::vx::RoundType::CEILING; + else + tvRoundType = tim::vx::RoundType::FLOOR; + + auto input = inputsWrapper[0]; + std::vector inputsIndex; + std::vector outputsIndex; + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index, output_index; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + auto outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + std::shared_ptr tvPool; + + if (tvPadType == tim::vx::PadType::AUTO) + { + tvPool = graph->CreateOperation( tvPoolType, + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + std::array({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + tvRoundType); + } + else + { + tvPool = graph->CreateOperation( + tvPoolType, tvPadType, + std::array({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + tvRoundType); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvPool, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/quantization_utils.cpp b/modules/dnn/src/int8layers/quantization_utils.cpp index d72487639e..6e2f0bb61c 100644 --- a/modules/dnn/src/int8layers/quantization_utils.cpp +++ b/modules/dnn/src/int8layers/quantization_utils.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" namespace cv { @@ -149,15 +150,21 @@ public: class RequantizeLayerImpl CV_FINAL : public RequantizeLayer { public: + bool isEltwise; RequantizeLayerImpl(const LayerParams& params) { scale = params.get("scale", 1.f); shift = params.get("shift", 0.f); + isEltwise = params.get("isEltwise", false); setParamsFrom(params); } virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX() && !isEltwise) + { + return true; + } return backendId == DNN_BACKEND_OPENCV; } @@ -178,6 +185,82 @@ public: outputs_arr.getMatVector(outputs); } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // preprocessing + // Check if data is 8-bit. + CV_Assert(inputsWrapper.size() == 1 && outputsWrapper.size() == 1); + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + + if (!inputWrapper->isTensor()) + { + return Ptr(); + } + + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + // Input + std::shared_ptr inputTensor = inputWrapper->getTensor(); + input_index = tvGraph->getTensorIndex(inputTensor); + if (input_index == -1) + return Ptr(); + + inputsIndex.push_back(input_index); + + Ptr inputQuant = inputWrapper->getTensorQuantization(); + + tim::vx::QuantType quanType = inputQuant->Type(); + CV_Assert(quanType == tim::vx::QuantType::ASYMMETRIC); + + std::vector scales = inputQuant->Scales(); + std::vector zeropoints = inputQuant->ZeroPoints(); + CV_Assert(!scales.empty() && !zeropoints.empty()); + int input_zp = int(zeropoints[0]); + float input_scale = scales[0]; + + float tmpOut_sc = input_scale/scale; + int tmpOut_zp = int(shift + scale * input_zp); + + // Output + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, tmpOut_sc, tmpOut_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvRequantize = graph->CreateOperation(); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvRequantize, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/softmax_layer.cpp b/modules/dnn/src/int8layers/softmax_layer.cpp index 7e3c82bc21..b2caf56fb0 100644 --- a/modules/dnn/src/int8layers/softmax_layer.cpp +++ b/modules/dnn/src/int8layers/softmax_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include #include @@ -16,11 +17,17 @@ namespace dnn class SoftMaxLayerInt8Impl CV_FINAL : public SoftmaxLayerInt8 { public: + float input_sc; + int input_zp; SoftMaxLayerInt8Impl(const LayerParams& params) { axisRaw = params.get("axis", 1); logSoftMax = params.get("log_softmax", false); + + input_sc = params.get("input_scale"); + input_zp = params.get("input_zeropoint"); + output_sc = params.get("scales"); output_zp = params.get("zeropoints"); setParamsFrom(params); @@ -41,7 +48,8 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV; + return backendId == DNN_BACKEND_OPENCV || + (backendId == DNN_BACKEND_TIMVX && haveTimVX()); } virtual bool tryFuse(Ptr& top) CV_OVERRIDE @@ -50,6 +58,102 @@ public: return !dequantize_layer.empty() && preferableTarget != DNN_TARGET_OPENCL_FP16; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index, output_index; + + // input Tensor + CV_Assert(inputsWrapper.size() == 1); + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + const Mat &src = inputWrapper->getMat(); + + // convert axis from OpenCV NCHW toTimVX WHCN. + int axis = normalize_axis(axisRaw, src.dims); + int tvAxis = src.dims - 1 - axis; + if(tvAxis < 0) + tvAxis = 0; // default value is 0. + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Mat dstMat = outputWrapper->getMat(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + Ptr outputTensor; + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + if (dstMat.type() == CV_32F) + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT); + else + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + if (dstMat.type() == CV_32F) + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT); + else + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvSoftmax; + + if (logSoftMax) + { + tvSoftmax = graph->CreateOperation(tvAxis); + + } + else + { + tvSoftmax = graph->CreateOperation(1.0f, tvAxis); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvSoftmax, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/layer.cpp b/modules/dnn/src/layer.cpp index ee5c255d57..0ed3488da6 100644 --- a/modules/dnn/src/layer.cpp +++ b/modules/dnn/src/layer.cpp @@ -74,6 +74,16 @@ Ptr Layer::initWebnn(const std::vector>& inputs return Ptr(); } +Ptr Layer::initTimVX(void* timVxInfo, + const std::vector > & inputsWrapper, + const std::vector > & outputsWrapper, + bool isLast) +{ + CV_Error(Error::StsNotImplemented, "TimVX pipeline of " + type + + " layers is not defined."); + return Ptr(); +} + Ptr Layer::tryAttach(const Ptr& node) { return Ptr(); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index bb81f14425..377e05f5cc 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -409,6 +409,7 @@ public: { params.set("input_scale", scales[0][0]); params.set("input_zeropoint", zeropoints[0][0]); + params.set("eps", epsilon); params.blobs.clear(); params.blobs.push_back(origin_weights); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 675546f76f..5ba0cd199b 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -48,6 +48,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -72,6 +73,9 @@ public: axis = params.get("axis", 1); padding = params.get("padding", false); paddingValue = params.get("padding_value", 0); + + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); } virtual bool getMemoryShapes(const std::vector &inputs, @@ -113,6 +117,21 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX && haveTimVX() && !padding) + { + if (axis == -1) + return false; + int len = this->type.length(); + if (len <= 4) + return false; + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } +#endif + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; @@ -393,6 +412,86 @@ public: } #endif // HAVE_DNN_NGRAPH +#ifdef HAVE_TIMVX + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + // convert axis from OpenCV NCHW toTimVX WHCN. + Mat blob0 = inputWrapper->getMat(); + + // TODO! support TimVX 5 dim in future. + if(blob0.dims >4) + return Ptr(); + + int cAxis = normalize_axis(axis, blob0.dims); + int tvAxis = blob0.dims - 1 - cAxis; + CV_Assert(tvAxis>= 0); + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + // Input + Ptr tvQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + + for (int i = 0; i(); + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + } + + //Output + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, tvQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, tvQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvConcate = graph->CreateOperation(tvAxis, inputsWrapper.size()); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvConcate, inputsIndex, outputsIndex); + + return tvBackendNode; + } +#endif // HAVE_TIMVX + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { @@ -416,6 +515,8 @@ public: } #endif + int zeropoint; + float scale; }; Ptr ConcatLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 1af34472df..4e377b9f7e 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -2168,6 +2168,7 @@ public: float inputScale = scales[0][0], outputScale = scales[1][0]; int inputZp = zeropoints[0][0]; params.set("input_zeropoint", inputZp); + params.set("input_scale", inputScale); Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S); Mat biasQuantized(1, numOutput, CV_32S); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 0085ba7449..0accbe0fbb 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -496,6 +496,9 @@ struct ReLUFunctor : public BaseFunctor params.blobs.clear(); params.blobs.push_back(lookUpTable); } + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); + params.set("slope", slope); return true; } @@ -635,6 +638,8 @@ struct ReLU6Functor : public BaseFunctor bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) { + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); return true; } @@ -704,6 +709,8 @@ struct BaseDefaultFunctor : public BaseFunctor } params.blobs.clear(); params.blobs.push_back(lookUpTable); + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); return true; } diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 43d925055c..a67b0c4bb5 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -875,6 +875,8 @@ public: virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { + params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); if (op == SUM) { std::vector newCoeffs; @@ -897,7 +899,6 @@ public: newCoeffs[0] /= scales[1][0]; params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size())); params.set("offset", zeropoints[1][0]); - params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); return true; } return op == MAX; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 5d0ad5fde7..e9632e20be 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -642,6 +642,8 @@ public: params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0]))); params.blobs.push_back(biasQuantized); params.blobs.push_back(outputMultiplier); + params.set("input_scale", inputScale); + params.set("input_zeropoint", inputZp); return true; } diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 033b3d9aee..cce36b951f 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -47,6 +47,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #include #include @@ -108,6 +109,9 @@ public: _order.push_back(currentOrder); } + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); + setParamsFrom(params); checkNeedForPermutation(); } @@ -122,6 +126,20 @@ public: return true; } #endif + +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + int len = this->type.length(); + if (len <= 4) + return false; + + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_WEBNN || @@ -471,12 +489,120 @@ public: } #endif // HAVE_VULKAN +#ifdef HAVE_TIMVX + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + if (outputsWrapper.size() != 1) // only work for single outputBlob + return Ptr(); + + // Input + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + //Output + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + // output has the same quantized attrib. + Ptr outputQuant = inputWrapper->getTensorQuantization(); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::vector tvOrder; + if (getOrderWHCN(tvOrder)) + { + std::shared_ptr tvPermute = graph->CreateOperation(tvOrder); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvPermute, inputsIndex, outputsIndex); + + return tvBackendNode; + } + else + { + return Ptr(); + } + } +#endif // HAVE_TIMVX + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { return true; } + // convert OpenCV NCHW order to WHCN order. + bool getOrderWHCN(std::vector& orderWHCN) + { + std::map lookup; + int orderLen = _order.size(); + if (orderLen <2) + return false; + orderWHCN.assign(_order.begin(), _order.end()); + + if (orderLen == 2) + { + return true; + } + else if (orderLen >= 3) + { + for (int i = 0; i < orderLen; i++) + { + lookup[i] = orderLen - i - 1; + } + + for (int i = 0; i < orderLen; i++) + { + orderWHCN[i] = lookup[_order[i]]; + } + std::reverse(orderWHCN.begin(), orderWHCN.end()); + return true; + } + else + return false; + } + size_t _count; std::vector _order; @@ -492,6 +618,8 @@ public: #endif size_t _numAxes; + int zeropoint; + float scale; }; Ptr PermuteLayer::create(const LayerParams ¶ms) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index f8616a4184..6c584bf2dd 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -272,6 +272,17 @@ public: return true; } } + else if (backendId == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + if (kernel_size.size() == 3) + { + // fallback to CPU implementation. + preferableTarget = DNN_TARGET_CPU; + } +#endif + return false; + } return false; } diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index f62235dc20..290effd380 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -46,6 +46,7 @@ #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #include @@ -167,6 +168,9 @@ public: hasDynamicShapes = params.get("has_dynamic_shapes", false); shapesInitialized = !hasDynamicShapes; + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); + CV_Assert(numAxes >= -1); newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes); @@ -202,6 +206,18 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + int len = this->type.length(); + if (len <= 4) + return false; + + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; @@ -348,6 +364,99 @@ public: } #endif + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + int reshapeNum = 0; + Ptr tmpWrapper, inputWrapper, outputWrapper; + for (size_t i = 0; i < outputsWrapper.size(); i++) + { + tmpWrapper = inputsWrapper[i].dynamicCast(); + Mat srcBlob = tmpWrapper->getMat(); + + tmpWrapper = outputsWrapper[i].dynamicCast(); + Mat dstBlob = tmpWrapper->getMat(); + if (dstBlob.data != srcBlob.data) + { + reshapeNum++; + inputWrapper = inputsWrapper[i].dynamicCast(); + outputWrapper = outputsWrapper[i].dynamicCast(); + } + } + + // Only work for single reshape Mat + if (reshapeNum != 1) + { + return Ptr(); + } + + // Input + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor() || input_index == -1) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT,tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + //Output + // Output Tensor has the same quantized attrib as Input Tesor. + Ptr outputQuant = inputWrapper->getTensorQuantization(); + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + // generate output shape. + MatShape outputShape = shape(outputWrapper->getMat()); + // reverse shape, from NCHW to WHCN + std::reverse(outputShape.begin(), outputShape.end()); + std::vector tvShape(outputShape.begin(), outputShape.end()); + + std::shared_ptr tvReshape = graph->CreateOperation(tvShape); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvReshape, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { @@ -360,6 +469,8 @@ private: std::vector inputIndices; // Which axes from input are needed to compute correct output shape bool hasDynamicShapes; bool shapesInitialized; + float scale; + int zeropoint; }; Ptr ReshapeLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 790f181325..b10aef3453 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -390,6 +390,8 @@ public: } params.blobs.clear(); params.blobs.push_back(lookUpTable); + params.set("input_scale", inpScale); + params.set("input_zeropoint", zeropoints[0][0]); return true; } diff --git a/modules/dnn/src/legacy_backend.cpp b/modules/dnn/src/legacy_backend.cpp index 92661abb63..fa9407aacd 100644 --- a/modules/dnn/src/legacy_backend.cpp +++ b/modules/dnn/src/legacy_backend.cpp @@ -12,6 +12,7 @@ #include "op_vkcom.hpp" #include "op_cuda.hpp" #include "op_webnn.hpp" +#include "op_timvx.hpp" namespace cv { namespace dnn { @@ -110,6 +111,13 @@ Ptr wrapMat(int backendId, int targetId, cv::Mat& m) CV_Assert(IS_DNN_CUDA_TARGET(targetId)); } #endif + } + else if (backendId == DNN_BACKEND_TIMVX) + { + CV_Assert(haveTimVX()); +#ifdef HAVE_TIMVX + return Ptr(new TimVXBackendWrapper(m)); +#endif // HAVE_TIMVX } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp index 315de13213..24fb31f03e 100644 --- a/modules/dnn/src/net_impl.cpp +++ b/modules/dnn/src/net_impl.cpp @@ -133,6 +133,9 @@ void Net::Impl::setUpNet(const std::vector& blobsToKeep_) preferableTarget == DNN_TARGET_VULKAN); CV_Assert(preferableBackend != DNN_BACKEND_CUDA || IS_DNN_CUDA_TARGET(preferableTarget)); + CV_Assert(preferableBackend != DNN_BACKEND_TIMVX || + preferableTarget == DNN_TARGET_NPU); + if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) { if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) @@ -179,6 +182,12 @@ void Net::Impl::setUpNet(const std::vector& blobsToKeep_) preferableTarget = DNN_TARGET_CPU; } + if (preferableBackend == DNN_BACKEND_TIMVX && !haveTimVX()) + { + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + clear(); if (hasDynamicShapes) @@ -515,7 +524,7 @@ void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes) ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); /* CUDA backend has its own system for internal blobs; we don't need these */ - ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size()); + ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA || preferableBackend == DNN_BACKEND_TIMVX) ? 0 : ld.internals.size()); for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); @@ -814,6 +823,10 @@ void Net::Impl::forwardLayer(LayerData& ld) { forwardWebnn(ld.outputBlobsWrappers, node, isAsync); } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { + forwardTimVX(ld.outputBlobsWrappers, node); + } #ifdef HAVE_VULKAN else if (preferableBackend == DNN_BACKEND_VKCOM) { @@ -1568,7 +1581,7 @@ string Net::Impl::dump(bool forceAllocation) const prevNode = itBackend->second; } } - std::vector colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff" }; + std::vector colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff", "#b266ff", "#3cb371"}; string backend; switch (prefBackend) { @@ -1580,9 +1593,8 @@ string Net::Impl::dump(bool forceAllocation) const case DNN_BACKEND_OPENCV: backend = "OCV/"; break; case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; case DNN_BACKEND_CUDA: backend = "CUDA/"; break; - case DNN_BACKEND_WEBNN: - backend = "WEBNN/"; - break; + case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; + case DNN_BACKEND_TIMVX: backend = "TIMVX/"; break; // don't use default: } out << "digraph G {\n"; @@ -1767,6 +1779,10 @@ string Net::Impl::dump(bool forceAllocation) const out << "CUDA_FP16"; colorId = 6; break; + case DNN_TARGET_NPU: + out << "NPU"; + colorId = 9; + break; // don't use default: } CV_Assert(colorId < colors.size()); diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp index 022e2374ca..9dc96fe82d 100644 --- a/modules/dnn/src/net_impl.hpp +++ b/modules/dnn/src/net_impl.hpp @@ -11,6 +11,7 @@ #include "op_vkcom.hpp" #include "op_cuda.hpp" #include "op_webnn.hpp" +#include "op_timvx.hpp" #include #include @@ -152,6 +153,14 @@ struct Net::Impl : public detail::NetImplBase void initVkComBackend(); #endif +#ifdef HAVE_TIMVX + // Create timVxInfo for reserve tvGraphList. + TimVXInfo timVxInfo = TimVXInfo(); + void tvUpdateConfictMap(int graphIndex, LayerData& ld, std::vector >& graphConflictMap); + void tvConvertToOutputNode(const LayerData& ld, Ptr& targetWrap); + void initTimVXBackend(); +#endif + #ifdef HAVE_CUDA struct CudaInfo_t { diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp index 4de4fb595a..e26126d86c 100644 --- a/modules/dnn/src/net_impl_backend.cpp +++ b/modules/dnn/src/net_impl_backend.cpp @@ -74,6 +74,12 @@ Ptr Net::Impl::wrap(Mat& host) default: CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); } +#endif + } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + return Ptr(new TimVXBackendWrapper(baseBuffer, host)); #endif } else @@ -131,6 +137,14 @@ void Net::Impl::initBackend(const std::vector& blobsToKeep_) initCUDABackend(blobsToKeep_); #else CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of CUDA/CUDNN"); +#endif + } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + initTimVXBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of TimVX"); #endif } else @@ -145,9 +159,9 @@ void Net::Impl::setPreferableBackend(int backendId) if (backendId == DNN_BACKEND_DEFAULT) backendId = (Backend)getParam_DNN_BACKEND_DEFAULT(); - if (netWasQuantized && backendId != DNN_BACKEND_OPENCV) + if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX) { - CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks"); + CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks"); backendId = DNN_BACKEND_OPENCV; } @@ -166,9 +180,9 @@ void Net::Impl::setPreferableBackend(int backendId) void Net::Impl::setPreferableTarget(int targetId) { if (netWasQuantized && targetId != DNN_TARGET_CPU && - targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) + targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16 && targetId != DNN_TARGET_NPU) { - CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks"); + CV_LOG_WARNING(NULL, "DNN: Only CPU, OpenCL/OpenCL FP16 and NPU targets are supported by quantized networks"); targetId = DNN_TARGET_CPU; } diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp index c8d79c2959..753c00de90 100644 --- a/modules/dnn/src/net_impl_fuse.cpp +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -38,7 +38,8 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA && - preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) + preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && + preferableBackend != DNN_BACKEND_TIMVX)) return; #if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index b43bb5a390..b1811412c4 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -3271,6 +3271,7 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP layerParams.type = "ConvolutionInt8"; layerParams.set("num_output", outCn); layerParams.set("input_zeropoint", inp_zp.at(0)); + layerParams.set("input_scale",inp_sc.at(0)); layerParams.blobs.push_back(weights); layerParams.blobs.push_back(biasFused); layerParams.blobs.push_back(outputMultiplier); @@ -3310,6 +3311,9 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod layerParams.type = "InnerProductInt8"; layerParams.set("num_output", outCn); layerParams.set("axis", firstInpDims - secondInpDims + 1); + layerParams.set("input_scale", inp_sc.at(0)); + layerParams.set("input_zeropoint", inp_zp.at(0)); + layerParams.blobs.push_back(weights); layerParams.blobs.push_back(bias); layerParams.blobs.push_back(outputMultiplier); @@ -3380,6 +3384,7 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); + rescaleParams.set("isEltwise", true); addLayer(rescaleParams, node_proto); return; } @@ -3428,7 +3433,6 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No Mat blob_dequantized; blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1])); layerParams.blobs.push_back(blob_dequantized); - layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); } } } @@ -3443,9 +3447,9 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No { layerParams.type = "ScaleInt8"; layerParams.set("bias_term", op == "sum"); - layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); } + layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size())); addLayer(layerParams, node_proto); } @@ -3471,6 +3475,9 @@ void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx:: } layerParams.type = "ReLUInt8"; + layerParams.set("input_scale", inp_sc); + layerParams.set("input_zeropoint", inp_zp); + layerParams.set("slope", slope); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } @@ -3495,6 +3502,8 @@ void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::No } layerParams.type = "SigmoidInt8"; + layerParams.set("input_scale", inp_sc); + layerParams.set("input_zeropoint", inp_zp); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } @@ -3548,6 +3557,7 @@ void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::Nod rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); + rescaleParams.set("isEltwise", false); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(i)); diff --git a/modules/dnn/src/op_timvx.cpp b/modules/dnn/src/op_timvx.cpp new file mode 100644 index 0000000000..107d660d39 --- /dev/null +++ b/modules/dnn/src/op_timvx.cpp @@ -0,0 +1,931 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019-2021, Shenzhen Institute of Artificial Intelligence and +// Robotics for Society, all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "precomp.hpp" +#include +#include "op_timvx.hpp" +#include "net_impl.hpp" + +namespace cv +{ +namespace dnn +{ +#ifdef HAVE_TIMVX + +CV__DNN_INLINE_NS_BEGIN + +// update all comsumer +void Net::Impl::tvUpdateConfictMap(int graphIndex, LayerData& ld, std::vector >& graphConflictMap) +{ + if (ld.consumers.empty()) + return; + for (int i = 0; i < ld.consumers.size(); i++) + { + LayerData &consumerld = layers[ld.consumers[i].lid]; + std::vector::iterator it = std::find(graphConflictMap[ld.consumers[i].lid].begin(), + graphConflictMap[ld.consumers[i].lid].end(), graphIndex); + + if (it == graphConflictMap[ld.consumers[i].lid].end()) + { + graphConflictMap[ld.consumers[i].lid].push_back(graphIndex); + tvUpdateConfictMap(graphIndex, consumerld, graphConflictMap); + } + else + continue; + } +} + +// Convert TRANSIENT to OUTPUT +void Net::Impl::tvConvertToOutputNode(const LayerData& ld, Ptr& targetWrap) +{ + // find right layer. + for (auto& inputLayerId : ld.inputLayersId) + { + LayerData &inputld = layers[inputLayerId]; + auto itWrap = std::find(inputld.outputBlobsWrappers.begin(), + inputld.outputBlobsWrappers.end(), targetWrap); + if (itWrap != inputld.outputBlobsWrappers.end()) + { + auto outputWrap = (*itWrap).dynamicCast(); + if (!outputWrap->isTensor()) + continue; + + auto inputNode = inputld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + if (!inputNode->isLast && inputNode->opIndex != -1) + { + CV_Assert(outputWrap->getTensorAttr() == tim::vx::TRANSIENT); + // set last + inputNode->isLast = true; + + auto shapeType = getShapeTypeFromMat(outputWrap->getMat()); + auto outQuant = outputWrap->getTensorQuantization(); + + outputWrap->setTensorShape(shapeType); + outputWrap->createTensor(inputNode->tvGraph->graph, + tim::vx::TensorAttribute::OUTPUT, outQuant); + int outIndex = inputNode->tvGraph->addWrapper(outputWrap); + inputNode->outputIndexList.clear(); + inputNode->outputIndexList.push_back(outIndex); + } + } + } +} + +void Net::Impl::initTimVXBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert(preferableBackend == DNN_BACKEND_TIMVX); + + // Build TimVX Graph from sets of layers that support this TimVX backend. + // Split a whole model on several TimVX Graph if some of layers are not implemented by TimVX backend. + if (!haveTimVX()) + return; + + // Allocate graphConflictMap + if (timVxInfo.graphConflictMap.empty()) + timVxInfo.graphConflictMap.resize(layers.size()); + + auto it = layers.begin(); + bool isLast = false; // If the node is the last node in current tvGraph. + + for (; it != layers.end(); it++) + { + isLast = false; + LayerData &ld = it->second; + if(ld.skip) + continue; + Ptr layer = ld.layerInstance; + if (!layer->supportBackend(preferableBackend)) + { + continue; + } + + // If layer consumers are more than one, set isLast true. + // For now, TimVX backend divides multiple branchs into multiple tvGraph. + if (ld.consumers.size() == 0) + { + isLast = true; + } + else if(ld.consumers.size() == 1) + { + LayerData* consumerld = &layers[ld.consumers[0].lid]; + + while (consumerld) + { + if (consumerld->skip) + { + if (consumerld->consumers.size() == 1) + { + int nextLayerId = consumerld->consumers[0].lid; + consumerld = &layers[nextLayerId]; + } + else + { + isLast = true; + break; + } + } + else + { + break; + } + } + Ptr& consumerLayer = consumerld->layerInstance; + + if (!isLast && !consumerLayer->supportBackend(preferableBackend)) + { + isLast = true; + } + } + else + { + // If there are is multiple input, and only one of them is supported. + int tvSupportNum = 0; + for (int i = 0; iskip) + { + if (consumerld->consumers.size() == 1) + { + int nextLayerId = consumerld->consumers[0].lid; + consumerld = &layers[nextLayerId]; + } + else + { + isLast = true; + break; + } + } + else + { + break; + } + } + Ptr& consumerLayer = consumerld->layerInstance; + + if (consumerLayer->supportBackend(preferableBackend)) + { + tvSupportNum++; + } + } + + if (tvSupportNum != 1) + isLast = true; + } + + int graphIndex = -1; + bool needRecorrect = !timVxInfo.findGraphIndex(ld.inputBlobsWrappers, graphIndex); + + if (graphIndex != -1 && !needRecorrect) + { + needRecorrect = timVxInfo.isConflict(ld.id, graphIndex); + } + + // Recorrect the input layer. + if (needRecorrect) + { + // set all inputLayers' as last layer, and convert TRANSIENT to output. + for (int i = 0; i < ld.inputBlobsWrappers.size(); i++) + { + auto inputWrap = ld.inputBlobsWrappers[i]; + auto tvInputWrap = inputWrap.dynamicCast(); + if (!tvInputWrap->isTensor()) + continue; + + auto attr = tvInputWrap->getTensorAttr(); + if (attr == tim::vx::TensorAttribute::OUTPUT) + { + continue; + } + else if (attr == tim::vx::TensorAttribute::INPUT) + { + Mat matTmp = tvInputWrap->getMat(); + tvInputWrap = Ptr(new TimVXBackendWrapper(matTmp)); + + } + else if (attr == tim::vx::TensorAttribute::TRANSIENT) + { + tvConvertToOutputNode(ld, tvInputWrap); + // updateConflictMap + tvUpdateConfictMap(graphIndex, ld, timVxInfo.graphConflictMap); + } + } + graphIndex = -1; + } + + if (graphIndex == -1) + { + graphIndex = timVxInfo.createGraph(); + } + timVxInfo.setTmpGraphIndex(graphIndex); + + ld.backendNodes[DNN_BACKEND_TIMVX] = + layer->initTimVX(&timVxInfo, ld.inputBlobsWrappers, ld.outputBlobsWrappers, isLast); + + // post process, create last node correctly. + if (isLast && ld.backendNodes[DNN_BACKEND_TIMVX]) + { + auto tmpNode = ld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + tmpNode->isLast = true; + // update graphConflictMap + tvUpdateConfictMap(graphIndex, ld, timVxInfo.graphConflictMap); + } + + // post process for failing to create timvx Node. + if (!ld.backendNodes[DNN_BACKEND_TIMVX]) + { + for (int i = 0; i < ld.inputBlobsWrappers.size(); i++) + { + auto inputWrap = ld.inputBlobsWrappers[i]; + auto tvInputWrap = inputWrap.dynamicCast(); + if (!tvInputWrap->isTensor()) + continue; + + auto attr = tvInputWrap->getTensorAttr(); + if (attr == tim::vx::TensorAttribute::TRANSIENT) + { + tvConvertToOutputNode(ld, tvInputWrap); + } + } + } + } + + // Op Binding + it = layers.begin(); + Ptr node; + std::vector > tmpGrapList; + for (; it != layers.end(); it++) + { + LayerData &ld = it->second; + + if (ld.backendNodes[DNN_BACKEND_TIMVX]) + node = ld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + else + continue; + + // Binding tvTensor and tvOp + if (node->opIndex >= 0) + node->opBinding(); + } +} + +CV__DNN_INLINE_NS_END + +// from CPU to NPU +bool copyToTensor(std::shared_ptr &dst, const Mat &src) +{ + CV_Assert(src.isContinuous() && (src.type() == CV_8S || src.type() == CV_32F)); + if (dst->CopyDataToTensor(src.data, src.total())) + { + return true; + } + else + return false; +} + +// from NPU to CPU +bool copyToMat(const Mat &dst, std::shared_ptr &src) +{ + CV_Assert(dst.isContinuous() && (dst.type() == CV_8S || dst.type() == CV_32F)); + if (src->CopyDataFromTensor(dst.data)) + { + return true; + } + else + return false; +} + +tvActivationType getTimVXActType(String & actString) +{ + if (actString == "ReLUInt8") return tvActReLU; + if (actString == "ReLU6Int8") return tvActReLU6; + if (actString == "TanHInt8") return tvActTanH; + if (actString == "SwishInt8") return tvActSwish; + if (actString == "MishInt8") return tvActMish; + if (actString == "SigmoidInt8") return tvActSigmoid; + if (actString == "ELUInt8") return tvActELU; + + return tvActNotSupported; +} + +tim::vx::ShapeType getShapeTypeFromMat(const Mat& mat, bool ifConst) +{ + /* Convert Mat shape to TimVX Tensor shape. + DataLayout in TimVX is WHCN, while NCHW in OpenCV. + So we do vector reverse. + */ + CV_Assert(!mat.empty()); + tim::vx::ShapeType tvInputShape; + auto matShape = shape(mat); + tvInputShape.assign(matShape.begin(), matShape.end()); + + if ( matShape.size() > 1 ) // TODO: check when we need reverse the shape vector. + { + if (ifConst && tvInputShape.size() == 2 && tvInputShape[1] == 1) + { // if bias vector, shape [n, 1] to [n]. + tvInputShape.resize(1); + } + else + std::reverse(tvInputShape.begin(), tvInputShape.end()); + } + return tvInputShape; +} + +bool getQuantType(const std::vector& scales, int numOutput) +{ + CV_Assert(!scales.empty()); + if (numOutput == -1) + { + numOutput = scales.size(); + } + bool tvSymmetric = false; + + for (int i =1; i < numOutput; i++) + { + if (std::abs(scales[0] - scales[i]) > std::numeric_limits::epsilon()) + { + tvSymmetric = true; + break; + } + } + + return tvSymmetric; +} + +// convert mat Depth to tensorDataType +tim::vx::DataType dataTypeConvert(int matDepth) +{ + tim::vx::DataType tensorDataType; + switch(matDepth) + { + case CV_8U: + { + tensorDataType = tim::vx::DataType::UINT8; + break; + } + case CV_8S: + { + tensorDataType = tim::vx::DataType::INT8; + break; + } + case CV_16U: + { + tensorDataType = tim::vx::DataType::UINT16; + break; + } + case CV_16S: + { + tensorDataType = tim::vx::DataType::INT16; + break; + } + case CV_32S: + { + tensorDataType = tim::vx::DataType::INT32; + break; + } + case CV_32F: + { + tensorDataType = tim::vx::DataType::FLOAT32; + break; + } + case CV_16F: + { + tensorDataType = tim::vx::DataType::FLOAT16; + break; + } + default: + { + tensorDataType = tim::vx::DataType::UNKNOWN; + break; + } + } + return tensorDataType; +} + +std::vector > getWrappers(const std::vector wrappersIndex, + Ptr tvGraph) +{ + std::vector > wrappers; + for (int i = 0; igetWrapper(wrappersIndex[i]); + if (wrapper) + wrappers.push_back(wrapper); + } + + return wrappers; +} + +// *********************** TimVXGraph ******************** +TimVXGraph::TimVXGraph() +{ + // new TimVX Graph + context = tim::vx::Context::Create(); + graph = context->CreateGraph(); + isCompiled = false; +} + +TimVXGraph::~TimVXGraph() +{ + + // release opList + for (auto& tensor: tensorList) + tensor.reset(); + + // release tensorList + for (auto& op: opList) + op.reset(); + + // release graph + graph.reset(); + + // release context + context.reset(); +} + +std::shared_ptr TimVXGraph::getOp(const int opIndex) +{ + CV_Assert(0 <= opIndex && !opList.empty() && opIndex < opList.size()); + return opList[opIndex]; +} + +int TimVXGraph::addWrapper(Ptr& tensorWrapper) +{ + CV_Assert(tensorWrapper->isTensor()); + tim::vx::TensorAttribute tensorAttr = tensorWrapper->getTensorAttr(); + + wrapperList.push_back(tensorWrapper); + tensorList.push_back(tensorWrapper->getTensor()); + int wrapperIndex = wrapperList.size() -1; + + if (tensorAttr == tim::vx::TensorAttribute::INPUT) + { + inputWrappersIndex.push_back(wrapperIndex); + } + + if (tensorAttr == tim::vx::TensorAttribute::OUTPUT) + { + outputWrappersIndex.push_back(wrapperIndex); + } + + return wrapperIndex; +} + +Ptr TimVXGraph::getWrapper(int wrapperIndex) +{ + CV_Assert(wrapperIndex>=0 && wrapperIndex < wrapperList.size()); + return wrapperList[wrapperIndex]; +} + +int TimVXGraph::addOp(const std::shared_ptr& op) +{ + CV_Assert(op); + opList.emplace_back(op); + return opList.size()-1; +} + +int TimVXGraph::getTensorIndex(const std::shared_ptr& tensor) +{ + auto it = find(tensorList.begin(), tensorList.end(), tensor); + if (it != tensorList.end()) + return it - tensorList.begin(); + else + return -1; +} + +void TimVXGraph::forward() +{ + CV_Assert(!inputWrappersIndex.empty() && !outputWrappersIndex.empty()); + + // Every TimVXGraph Instance only compiles once. + if (!this->isCompiled) + { + if (!graph->Compile()) + CV_Error(cv::Error::StsBadArg, " Fail to compile TimVX graph!"); + this->isCompiled = true; + } + + if (!graph->Run()) + CV_Error(cv::Error::StsBadArg, " Fail to run TimVX graph!"); +} + +// *********************** TimVXBackendNode ******************** +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_): BackendNode(DNN_BACKEND_TIMVX) +{ + opIndex = -1; + tvGraph = tvGraph_; + isLast = false; +} + +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_, + const std::shared_ptr& op_): BackendNode(DNN_BACKEND_TIMVX) +{ + tvGraph = tvGraph_; + opIndex = tvGraph->addOp(op_); + isLast = false; +} + +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_, std::shared_ptr& op_, + std::vector& inputsIndex, std::vector& outpusIndex) + :BackendNode(DNN_BACKEND_TIMVX) +{ + tvGraph = tvGraph_; + opIndex = tvGraph->addOp(op_); + isLast = false; + + if (!inputsIndex.empty()) + inputIndexList.assign(inputsIndex.begin(), inputsIndex.end()); + + if (!outpusIndex.empty()) + outputIndexList.assign(outpusIndex.begin(), outpusIndex.end()); +} + +bool TimVXBackendNode::opBinding() +{ + if (!tvGraph || tvGraph->isCompiled || opIndex == -1) + return false; + + std::shared_ptr op = tvGraph->getOp(opIndex); + + if (!inputIndexList.empty()) + { + std::vector > inputsWrapper = getWrappers(inputIndexList, tvGraph); + // Binding input Tensor. + for (auto& warpper: inputsWrapper) + { + op->BindInput(warpper->getTensor()); + } + } + + if (!outputIndexList.empty()) + { + std::vector > outputsWrapper = getWrappers(outputIndexList, tvGraph); + for (auto& warpper: outputsWrapper) + { + op->BindOutput(warpper->getTensor()); + } + } + return true; +} + +void TimVXBackendNode::setInputTensor() +{ + if (!tvGraph || opIndex == -1) + return; + + if (!inputIndexList.empty()) + { + std::vector > inputsWrapper = getWrappers(inputIndexList, tvGraph); + + // Binding input Tensor. + for (auto& warpper: inputsWrapper) + { + if (warpper->getTensorAttr() == tim::vx::TensorAttribute::INPUT) + { + warpper->setHostDirty(); + warpper->copyToDevice(); + } + } + } +} + +// *********************** TimVXBackendWrapper ******************** +// Default Constructor +TimVXBackendWrapper::TimVXBackendWrapper() : BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + isTensor_ = false; + deviceDirty = false; + hostDirty = false; + tensorType = tim::vx::DataType::UNKNOWN; + tensorShape = {}; + tensorIndex = -1; + tensorAttr = tim::vx::TensorAttribute::CONSTANT; +} + +TimVXBackendWrapper::TimVXBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_TIMVX, + DNN_TARGET_NPU) +{ + host = m; + isTensor_ = false; + deviceDirty = false; + hostDirty = true; + tensorType = dataTypeConvert(m.type()); + tensorShape = {}; + tensorIndex = -1; + tensorAttr = tim::vx::TensorAttribute::CONSTANT; + + // TODO: unsupported data by TimVX should run convert function first. + CV_Assert(tensorType != tim::vx::DataType::UNKNOWN); +} + +TimVXBackendWrapper::TimVXBackendWrapper(const Ptr& baseBuffer, Mat& m) + :BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + Ptr base = baseBuffer.dynamicCast(); + CV_Assert(!base.empty()); + tensor = base->tensor; + isTensor_ = base->isTensor_; + tensorIndex = base->tensorIndex; + tensorType = base->tensorType; + tensorAttr = base->tensorAttr; + tensorShape = base->tensorShape; + deviceDirty = base->deviceDirty; + hostDirty = base->hostDirty; + host = m; +} + +TimVXBackendWrapper::TimVXBackendWrapper(std::shared_ptr& tensor_) + :BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + tensor = tensor_; + isTensor_ = true; + deviceDirty = true; + hostDirty = false; + tensorType = tensor_->GetDataType(); // getTensor DataType. + tensorAttr = tensor_->GetSpec().attr_; // getTensor Attribution. + tensorShape = tensor_->GetShape(); + tensorIndex = -1; +} + +void TimVXBackendWrapper::setTensorShape(const tim::vx::ShapeType & matShape) +{ + CV_Assert(!matShape.empty()); + tensorShape.assign(matShape.begin(), matShape.end()); +} + +int TimVXBackendWrapper::getTensorIndex() +{ + CV_Assert(isTensor_); + return tensorIndex; +} + +tim::vx::TensorAttribute TimVXBackendWrapper::getTensorAttr() +{ + CV_Assert(isTensor_); + return tensorAttr; +} + +// Create tensor +void TimVXBackendWrapper::createTensor(std::shared_ptr& graph, + tim::vx::TensorAttribute tensorAttribute) +{ + Ptr epmtyQuant = nullptr; + return this->createTensor(graph, tensorAttribute, epmtyQuant); +} + +// Create tensor +void TimVXBackendWrapper::createTensor(std::shared_ptr& graph, + tim::vx::TensorAttribute tensorAttribute, Ptr& tvQuant) +{ + CV_Assert(graph); + tim::vx::TensorSpec tensorSpec; + + if (tensorAttribute == tim::vx::INPUT) + { + CV_Assert(!host.empty()); + tensorShape = getShapeTypeFromMat(host); + } + else if (tensorAttribute == tim::vx::OUTPUT) + { + CV_Assert(!tensorShape.empty() && !host.empty()); + tensorShape = getShapeTypeFromMat(host); + } + else if (tensorAttribute == tim::vx::CONSTANT) + { + if (!host.empty()) + tensorShape = getShapeTypeFromMat(host, true); + } + else + { + if (!host.empty()) + tensorShape = getShapeTypeFromMat(host); + } + + // Tensor shape + if (tvQuant) + { + tensorSpec = tim::vx::TensorSpec(tensorType, tensorShape, tensorAttribute, *tvQuant); + } + else + { + tensorSpec = tim::vx::TensorSpec(tensorType, tensorShape, tensorAttribute); + } + + if (!host.empty() && tensorAttribute != tim::vx::INPUT && tensorAttribute != tim::vx::OUTPUT && tensorAttribute != tim::vx::TRANSIENT) + { + tensor = graph->CreateTensor(tensorSpec, (void *)(host.data)); + } + else + { + tensor = graph->CreateTensor(tensorSpec); + } + isTensor_ = true; + + // set Attribution + tensorAttr = tensorAttribute; +} + +Ptr TimVXBackendWrapper::getTensorQuantization() +{ + CV_Assert(isTensor_ && tensor); + auto quantize = tensor->GetQuantization(); + return makePtr(quantize); +} + +std::shared_ptr TimVXBackendWrapper::getTensor() +{ + CV_Assert(isTensor_); + return tensor; +} + +Mat TimVXBackendWrapper::getMat() +{ + if (host.empty()) + return {}; + return host; +} + + +bool TimVXBackendWrapper::isTensor() +{ + return isTensor_; +} + +void TimVXBackendWrapper::copyToHost() +{ + if (deviceDirty && !host.empty()) + { + copyToMat(host, tensor); + deviceDirty = false; + } +} + +void TimVXBackendWrapper::setHostDirty() +{ + hostDirty = true; +} + +void TimVXBackendWrapper::setDeviceDirty() +{ + deviceDirty = true; +} + +void TimVXBackendWrapper::copyToDevice() +{ + if (isTensor_ && hostDirty && !host.empty()) + { + copyToTensor(tensor, host); + hostDirty = false; + } +} + +// *********************** TimVXInfo ******************** +TimVXInfo::TimVXInfo() +{ + graphIndex = -1; +} + +TimVXInfo::~TimVXInfo() +{} + +int TimVXInfo::createGraph() +{ + Ptr tmpGraph = Ptr(new TimVXGraph()); + this->tvGraphList.push_back(tmpGraph); + return this->tvGraphList.size() - 1; +} + +bool TimVXInfo::findGraphIndex(const std::vector > &inputsWrapper, int& graphIndex) +{ + graphIndex = -1; + int wrapperSize = inputsWrapper.size(); + int graphSize = tvGraphList.size(); + + if (wrapperSize != 0 && graphSize == 0) + { + return true; + } + + int tensorIndex = -1; + Ptr wrapper; + Ptr tvGraph; + + for (int i = 0; i < graphSize; i++) + { + tvGraph = tvGraphList[i]; + for (int j = 0; j < wrapperSize; j++ ) + { + wrapper = inputsWrapper[j].dynamicCast(); + + if (!wrapper->isTensor()) // Skip wrapper without Tensor. + continue; + + tensorIndex = tvGraph->getTensorIndex(wrapper->getTensor()); + if (tensorIndex != -1 && wrapper->getTensorAttr() == tim::vx::TensorAttribute::TRANSIENT) + { + if (graphIndex == -1) + graphIndex = i; + else if (graphIndex != i) // if inputs of the same inputWrapper are from differen tvGraph. + { + graphIndex = -1; + return false; + } + } + } + } + return true; +} + +void TimVXInfo::setTmpGraphIndex(int graphIndex) +{ + this->graphIndex = graphIndex; +} + +int TimVXInfo::getTmpGraphIndex() +{ + int res = -1; + if (graphIndex != -1) + { + res = graphIndex; + graphIndex = -1; + } + return res; +} + +bool TimVXInfo::isConflict(int layerId, int graphIndex) +{ + if (graphConflictMap[layerId].empty()) + return false; + + std::vector::iterator it = std::find(graphConflictMap[layerId].begin(), + graphConflictMap[layerId].end(), graphIndex); + if (it != graphConflictMap[layerId].end()) + return true; + else + return false; +} + +Ptr TimVXInfo::getGraph() +{ + int index = getTmpGraphIndex(); + if (0 <= index && index < tvGraphList.size()) + return tvGraphList[index]; + else + return {}; +} + +#endif + +void forwardTimVX(std::vector >& outputs, const Ptr& node_) +{ +#ifdef HAVE_TIMVX + CV_Assert(!node_.empty()); + Ptr node = node_.dynamicCast(); + + if (node) + { + // set input + node->setInputTensor(); + + // graph Forward + if (node->isLast) + { + node->tvGraph->forward(); + } + } + else + return; + + // set ouput + Ptr outWarpper; + for (int i = 0; i < outputs.size(); i++) + { + outWarpper = outputs[i].dynamicCast(); + if (outWarpper->isTensor() && outWarpper->getTensorAttr() == tim::vx::TensorAttribute::OUTPUT) + { + outWarpper->setDeviceDirty(); + outWarpper->copyToHost(); + } + } +#endif +} + +bool haveTimVX() +{ +#ifdef HAVE_TIMVX + return true; +#else + return false; +#endif +} +} // namespace dnn +} // namespace cv \ No newline at end of file diff --git a/modules/dnn/src/op_timvx.hpp b/modules/dnn/src/op_timvx.hpp new file mode 100644 index 0000000000..d08cf64bb5 --- /dev/null +++ b/modules/dnn/src/op_timvx.hpp @@ -0,0 +1,187 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019-2021, Shenzhen Institute of Artificial Intelligence and +// Robotics for Society, all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_DNN_OP_TIMVX_HPP +#define OPENCV_DNN_OP_TIMVX_HPP + +#include + +// TimVX head file. +#ifdef HAVE_TIMVX +#include "tim/vx/context.h" +#include "tim/vx/graph.h" +#include "tim/vx/operation.h" +#include "tim/vx/ops.h" +#include "tim/vx/tensor.h" +#endif // HAVE_TIMVX + +namespace cv +{ +namespace dnn +{ +#ifdef HAVE_TIMVX + +enum tvActivationType{ + tvActNotSupported = -1, + tvActReLU, + tvActReLU6, + tvActTanH, + tvActSwish, + tvActMish, + tvActSigmoid, + tvActELU +}; + +// Data copied from/to Mat to/from Tensor. Change the shape of dst if +// needed to make it the same shape as src. +bool copyToTensor(Ptr &dst, const Mat &src); +bool copyToMat(const Mat &dst, Ptr &src); +tvActivationType getTimVXActType(String & actString); + +// Convert Mat shape to TimVX TensorShape +tim::vx::ShapeType getShapeTypeFromMat(const Mat& mat, bool ifConst = false); + +// if all value in weight +bool getQuantType(const std::vector& scales, int numOutput = -1); + +class TimVXInfo; +class TimVXGraph; +class TimVXBackendNode; +class TimVXBackendWrapper; + +// Maintain the tvGraph and tvTensor List. For now, every tvGraph only have one output node, and each node +// in tvGraph has only one output too. It could be optimized in future. +// TODO: tvGraph supports multiple output node. +class TimVXGraph +{ +public: + TimVXGraph(); + ~TimVXGraph(); + std::shared_ptr getOp(const int opIndex); + + // It will add tensorWrapper to wrapperList, and return index. + // And add tensor Ptr to tensorList. + int addWrapper(Ptr& tensorWrapper); + + void forward(); + + // Add new op to opList, and return the index. + int addOp(const std::shared_ptr& op); + + // If tensor existed in tensorList, return the tensorIndex, otherwise return -1. + int getTensorIndex(const std::shared_ptr& tensor); + + Ptr getWrapper(int wrapperIndex); + + std::shared_ptr graph; + bool isCompiled; // Every tvGraph can only be compiled once. + +private: + std::shared_ptr context; + std::vector inputWrappersIndex; + std::vector outputWrappersIndex; + std::vector > wrapperList; + std::vector > tensorList; + std::vector > opList; +}; + +class TimVXBackendNode : public BackendNode +{ +public: + TimVXBackendNode(const Ptr& tvGraph); + TimVXBackendNode(const Ptr& tvGraph, const std::shared_ptr& op); + TimVXBackendNode(const Ptr& tvGraph, std::shared_ptr& op, + std::vector& inputsIndex, std::vector& outpusIndex); + + void setInputTensor(); + bool opBinding(); + + // flag for marking OutputNode of tvGraph this node is the last node in this TimVX Graph. + bool isLast; + int opIndex; + + // index of tensor and wrapper. + std::vector inputIndexList; + std::vector outputIndexList; + Ptr tvGraph; +}; + +class TimVXBackendWrapper : public BackendWrapper +{ +public: + TimVXBackendWrapper(); + TimVXBackendWrapper(Mat& m); + TimVXBackendWrapper(const Ptr& baseBuffer, Mat& m); + TimVXBackendWrapper(std::shared_ptr& tensor); + + // Create Output Tensor + void createTensor(std::shared_ptr& graph, tim::vx::TensorAttribute tensorAttribute); + void createTensor(std::shared_ptr& graph, tim::vx::TensorAttribute tensorAttribute, + Ptr& tvQuant); + std::shared_ptr getTensor(); + Mat getMat(); + + // The Output tensor in TimVX doesn't have HostMat, The shape can only be given. + void setTensorShape(const tim::vx::ShapeType & matShape); + int getTensorIndex(); + Ptr getTensorQuantization(); + tim::vx::TensorAttribute getTensorAttr(); + bool isTensor(); + + // Data Copy, CPU <==> NPU + virtual void copyToHost() CV_OVERRIDE; + virtual void setHostDirty() CV_OVERRIDE; + void setDeviceDirty(); + void copyToDevice(); + +private: + tim::vx::DataType tensorType; + bool deviceDirty; + bool hostDirty; + int tensorIndex; // index of tensorList in specific TimVXGraph. + bool isTensor_; + Mat host; + + tim::vx::ShapeType tensorShape; + std::shared_ptr tensor; + tim::vx::TensorAttribute tensorAttr; +}; + +// Contain all created tvGraphList, used in every +class TimVXInfo{ +public: + TimVXInfo(); + ~TimVXInfo(); + + // Find the right graph Index set as graphIndex, if cannot find, return empty ptr. + Ptr getGraph(); + bool findGraphIndex(const std::vector > &inputsWrapper, int& graphIndex); + void setTmpGraphIndex(int graphIndex); + bool isConflict(int layerId, int graphIndex); + + // create a TimVXGraph, add it to tvGraphList, and return the index in tvGraphList. + int createGraph(); + + // graphConflictIndex[layerIndex] saves conflict graph index, which should be excluded + std::vector > graphConflictMap; + +private: + int getTmpGraphIndex(); + std::vector > tvGraphList; + int graphIndex; + +}; + +#endif + +void forwardTimVX(std::vector > &outputs, const Ptr& node); +bool haveTimVX(); +} // namespace dnn +} // namespace cv + +#endif // OPENCV_DNN_OP_TIMVX_HPP \ No newline at end of file diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp index a802e1602b..697fca6015 100644 --- a/modules/dnn/src/registry.cpp +++ b/modules/dnn/src/registry.cpp @@ -10,6 +10,7 @@ #include "op_vkcom.hpp" #include "op_cuda.hpp" #include "op_webnn.hpp" +#include "op_timvx.hpp" #include "halide_scheduler.hpp" @@ -109,6 +110,13 @@ private: backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); } #endif + +#ifdef HAVE_TIMVX + if (haveTimVX()) + { + backends.push_back(std::make_pair(DNN_BACKEND_TIMVX, DNN_TARGET_NPU)); + } +#endif } BackendsList backends; diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index fb64697574..3d616e17da 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -48,6 +48,7 @@ #define CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE "dnn_skip_onnx_conformance" #define CV_TEST_TAG_DNN_SKIP_PARSER "dnn_skip_parser" +#define CV_TEST_TAG_DNN_SKIP_TIMVX "dnn_skip_timvx" #ifdef HAVE_INF_ENGINE #if INF_ENGINE_VER_MAJOR_EQ(2018050000) diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index 747dc02ce6..35f658cc90 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -30,6 +30,7 @@ void PrintTo(const cv::dnn::Backend& v, std::ostream* os) case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: *os << "DLIE"; return; case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: *os << "NGRAPH"; return; case DNN_BACKEND_WEBNN: *os << "WEBNN"; return; + case DNN_BACKEND_TIMVX: *os << "TIMVX"; return; } // don't use "default:" to emit compiler warnings *os << "DNN_BACKEND_UNKNOWN(" << (int)v << ")"; } @@ -46,6 +47,7 @@ void PrintTo(const cv::dnn::Target& v, std::ostream* os) case DNN_TARGET_FPGA: *os << "FPGA"; return; case DNN_TARGET_CUDA: *os << "CUDA"; return; case DNN_TARGET_CUDA_FP16: *os << "CUDA_FP16"; return; + case DNN_TARGET_NPU: *os << "NPU"; return; } // don't use "default:" to emit compiler warnings *os << "DNN_TARGET_UNKNOWN(" << (int)v << ")"; } @@ -478,6 +480,11 @@ void initDNNTests() registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_CUDA, CV_TEST_TAG_DNN_SKIP_CUDA_FP32, CV_TEST_TAG_DNN_SKIP_CUDA_FP16 ); +#endif +#ifdef HAVE_TIMVX + registerGlobalSkipTag( + CV_TEST_TAG_DNN_SKIP_TIMVX + ); #endif registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE, diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 6c41a8dbbb..1f9ae6c2a6 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -12,6 +12,9 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget { std::vector< tuple > targets; targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); +#ifdef HAVE_TIMVX + targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU)); +#endif return testing::ValuesIn(targets); } @@ -104,14 +107,29 @@ TEST_P(Test_Int8_layers, Convolution1D) TEST_P(Test_Int8_layers, Convolution2D) { - testLayer("layer_convolution", "Caffe", 0.0174, 0.0758, 1, 1, true); - testLayer("single_conv", "TensorFlow", 0.00413, 0.02201); - testLayer("depthwise_conv2d", "TensorFlow", 0.0388, 0.169); + if(backend == DNN_BACKEND_TIMVX) + testLayer("single_conv", "TensorFlow", 0.00424, 0.02201); + else + testLayer("single_conv", "TensorFlow", 0.00413, 0.02201); + testLayer("atrous_conv2d_valid", "TensorFlow", 0.0193, 0.0633); testLayer("atrous_conv2d_same", "TensorFlow", 0.0185, 0.1322); testLayer("keras_atrous_conv2d_same", "TensorFlow", 0.0056, 0.0244); - testLayer("convolution", "ONNX", 0.0052, 0.01516); - testLayer("two_convolution", "ONNX", 0.00295, 0.00840); + + if(backend == DNN_BACKEND_TIMVX) + testLayer("convolution", "ONNX", 0.00534, 0.01516); + else + testLayer("convolution", "ONNX", 0.0052, 0.01516); + + if(backend == DNN_BACKEND_TIMVX) + testLayer("two_convolution", "ONNX", 0.0033, 0.01); + else + testLayer("two_convolution", "ONNX", 0.00295, 0.00840); + + if(backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); + testLayer("layer_convolution", "Caffe", 0.0174, 0.0758, 1, 1, true); + testLayer("depthwise_conv2d", "TensorFlow", 0.0388, 0.169); } TEST_P(Test_Int8_layers, Convolution3D) @@ -130,9 +148,21 @@ TEST_P(Test_Int8_layers, Flatten) TEST_P(Test_Int8_layers, Padding) { - testLayer("padding_valid", "TensorFlow", 0.0026, 0.0064); - testLayer("padding_same", "TensorFlow", 0.0081, 0.032); - testLayer("spatial_padding", "TensorFlow", 0.0078, 0.028); + if (backend == DNN_BACKEND_TIMVX) + testLayer("padding_valid", "TensorFlow", 0.0292, 0.0105); + else + testLayer("padding_valid", "TensorFlow", 0.0026, 0.0064); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("padding_same", "TensorFlow", 0.0085, 0.032); + else + testLayer("padding_same", "TensorFlow", 0.0081, 0.032); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("spatial_padding", "TensorFlow", 0.0079, 0.028); + else + testLayer("spatial_padding", "TensorFlow", 0.0078, 0.028); + testLayer("mirror_pad", "TensorFlow", 0.0064, 0.013); testLayer("pad_and_concat", "TensorFlow", 0.0021, 0.0098); testLayer("padding", "ONNX", 0.0005, 0.0069); @@ -283,20 +313,35 @@ TEST_P(Test_Int8_layers, InnerProduct) { testLayer("layer_inner_product", "Caffe", 0.005, 0.02, 1, 1, true); testLayer("matmul", "TensorFlow", 0.0061, 0.019); - testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0009, 0.0091); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0018, 0.0175); + else + testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0009, 0.0091); + testLayer("nhwc_reshape_matmul", "TensorFlow", 0.03, 0.071); testLayer("matmul_layout", "TensorFlow", 0.035, 0.06); testLayer("tf2_dense", "TensorFlow", 0, 0); testLayer("matmul_add", "ONNX", 0.041, 0.082); testLayer("linear", "ONNX", 0.0018, 0.0029); - testLayer("constant", "ONNX", 0.00021, 0.0006); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("constant", "ONNX", 0.00048, 0.0013); + else + testLayer("constant", "ONNX", 0.00021, 0.0006); + testLayer("lin_with_constant", "ONNX", 0.0011, 0.0016); } TEST_P(Test_Int8_layers, Reshape) { testLayer("reshape_layer", "TensorFlow", 0.0032, 0.0082); - testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("reshape_nchw", "TensorFlow", 0.0092, 0.0495); + else + testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029); + testLayer("reshape_conv", "TensorFlow", 0.035, 0.054); testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078); testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028); @@ -307,7 +352,12 @@ TEST_P(Test_Int8_layers, Reshape) testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081); testLayer("squeeze", "ONNX", 0.0048, 0.0081); testLayer("unsqueeze", "ONNX", 0.0033, 0.0053); - testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.0054, 0.0154); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.006, 0.0212); + else + testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.0054, 0.0154); + testLayer("unsqueeze_and_conv_dynamic_axes", "ONNX", 0.0037, 0.0151); } @@ -378,6 +428,10 @@ TEST_P(Test_Int8_layers, Dropout) TEST_P(Test_Int8_layers, Eltwise) { testLayer("layer_eltwise", "Caffe", 0.062, 0.15); + + if (backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); + testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false); testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047); testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365 @@ -862,6 +916,8 @@ TEST_P(Test_Int8_nets, EfficientDet) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel()) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + if (backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); if (target != DNN_TARGET_CPU) { From 8b44ee2ce1a8d6b4f1a8291b66915de61c2ee732 Mon Sep 17 00:00:00 2001 From: shengwenxue Date: Wed, 30 Mar 2022 11:29:01 +0800 Subject: [PATCH 57/84] fix MSA sum overflow issue --- modules/core/include/opencv2/core/hal/intrin_msa.hpp | 12 ++++++------ modules/core/include/opencv2/core/hal/msa_macros.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index a1fbb093a8..c035fdad60 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \ return (scalartype)msa_sum_##suffix(a.val); \ } -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32) OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) inline uint64 v_reduce_sum(const v_uint64x2& a) diff --git a/modules/core/include/opencv2/core/hal/msa_macros.h b/modules/core/include/opencv2/core/hal/msa_macros.h index bd6ddb127a..fad8c5adda 100644 --- a/modules/core/include/opencv2/core/hal/msa_macros.h +++ b/modules/core/include/opencv2/core/hal/msa_macros.h @@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v2i64 _c; \ _b = __builtin_msa_hadd_s_w(__a, __a); \ _c = __builtin_msa_hadd_s_d(_b, _b); \ - (int16_t)(_c[0] + _c[1]); \ + (int32_t)(_c[0] + _c[1]); \ }) @@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); ({ \ v2i64 _b; \ _b = __builtin_msa_hadd_s_d(__a, __a); \ - (int32_t)(_b[0] + _b[1]); \ + (int64_t)(_b[0] + _b[1]); \ }) /* uint8_t msa_sum_u8(v16u8 __a)*/ @@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v4i32 _c32; \ _b16 = __builtin_msa_hadd_s_h(__a, __a); \ _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ - (int8_t)msa_sum_s32(_c32); \ + (int16_t)msa_sum_s32(_c32); \ }) /* float msa_sum_f32(v4f32 __a)*/ From f3945fbddb4202f1de423c5404ce69aa00b4abf9 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Fri, 1 Apr 2022 13:06:47 +0300 Subject: [PATCH 58/84] Merge pull request #21688 from sivanov-work:vpp_ie_integration G-API: VPP preprocessing GIEBackend integration * Add ROI in VPP prepro * Apply comments * Integration to IE * Removed extra invocations * Fix no-vpl compilation * Fix compilations * Apply comments * Use thin wrapper for device & ctx * Implement Device/Context constructor functions * Apply samples comment * Fix compilation * Fix compilation * Fix build * Separate Device&Context from selector, apply other comments * Fix typo * Intercept OV exception with pretty print out --- modules/gapi/CMakeLists.txt | 1 + .../gapi/include/opencv2/gapi/infer/ie.hpp | 19 +- .../gapi/streaming/onevpl/accel_types.hpp | 72 +++++ .../onevpl/device_selector_interface.hpp | 43 +-- .../opencv2/gapi/streaming/onevpl/source.hpp | 4 + .../gapi/samples/onevpl_infer_single_roi.cpp | 203 +++++++++----- .../gapi/samples/pipeline_modeling_tool.cpp | 2 + modules/gapi/src/backends/ie/giebackend.cpp | 254 +++++++++++++++++- .../onevpl/cfg_param_device_selector.cpp | 60 +++++ .../onevpl/cfg_param_device_selector.hpp | 7 +- .../onevpl/device_selector_interface.cpp | 36 +++ .../engine/preproc/preproc_dispatcher.cpp | 32 ++- .../engine/preproc/preproc_dispatcher.hpp | 5 - .../onevpl/engine/preproc/preproc_engine.cpp | 4 +- .../onevpl/engine/preproc_defines.hpp | 4 +- .../engine/preproc_engine_interface.cpp | 83 ++++++ .../engine/preproc_engine_interface.hpp | 10 + modules/gapi/src/streaming/onevpl/source.cpp | 11 + 18 files changed, 715 insertions(+), 135 deletions(-) create mode 100644 modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp create mode 100644 modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 04e1906c75..29036c4e26 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -204,6 +204,7 @@ set(gapi_srcs src/streaming/onevpl/engine/preproc/preproc_engine.cpp src/streaming/onevpl/engine/preproc/preproc_session.cpp src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp + src/streaming/onevpl/engine/preproc_engine_interface.cpp src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp src/streaming/onevpl/data_provider_dispatcher.cpp diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index e6b7be58ad..204bd8f266 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -20,6 +20,7 @@ #include // GAPI_EXPORTS #include // GKernelPackage #include // Generic +#include // Preproc Dev & Ctx namespace cv { namespace gapi { @@ -84,6 +85,9 @@ struct ParamDesc { // have 2D (Layout::NC) input and if the first dimension not equal to 1 // net.setBatchSize(1) will overwrite it. cv::optional batch_size; + + cv::optional vpl_preproc_device; + cv::optional vpl_preproc_ctx; }; } // namespace detail @@ -126,6 +130,8 @@ public: , {} , 1u , {} + , {} + , {} , {}} { }; @@ -148,6 +154,8 @@ public: , {} , 1u , {} + , {} + , {} , {}} { }; @@ -336,6 +344,13 @@ public: return *this; } + Params& cfgPreprocessingParams(const cv::gapi::wip::onevpl::Device &device, + const cv::gapi::wip::onevpl::Context &ctx) { + desc.vpl_preproc_device = cv::util::make_optional(device); + desc.vpl_preproc_ctx = cv::util::make_optional(ctx); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return Net::tag(); } @@ -370,7 +385,7 @@ public: const std::string &device) : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u, - {}, {}}, + {}, {}, {}, {}}, m_tag(tag) { }; @@ -388,7 +403,7 @@ public: const std::string &device) : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u, - {}, {}}, + {}, {}, {}, {}}, m_tag(tag) { }; diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp new file mode 100644 index 0000000000..421b592aae --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP +#define GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP + +#include +#include + +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +enum class AccelType: uint8_t { + HOST, + DX11, + + LAST_VALUE = std::numeric_limits::max() +}; + +GAPI_EXPORTS const char* to_cstring(AccelType type); + +struct IDeviceSelector; +struct GAPI_EXPORTS Device { + friend struct IDeviceSelector; + using Ptr = void*; + + ~Device(); + const std::string& get_name() const; + Ptr get_ptr() const; + AccelType get_type() const; +private: + Device(Ptr device_ptr, const std::string& device_name, + AccelType device_type); + + std::string name; + Ptr ptr; + AccelType type; +}; + +struct GAPI_EXPORTS Context { + friend struct IDeviceSelector; + using Ptr = void*; + + ~Context(); + Ptr get_ptr() const; + AccelType get_type() const; +private: + Context(Ptr ctx_ptr, AccelType ctx_type); + Ptr ptr; + AccelType type; +}; + +GAPI_EXPORTS Device create_host_device(); +GAPI_EXPORTS Context create_host_context(); + +GAPI_EXPORTS Device create_dx11_device(Device::Ptr device_ptr, + const std::string& device_name); +GAPI_EXPORTS Context create_dx11_context(Context::Ptr ctx_ptr); + +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp index 04f8cae02a..2e2d879fba 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp @@ -12,53 +12,12 @@ #include #include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include namespace cv { namespace gapi { namespace wip { namespace onevpl { - -enum class AccelType: uint8_t { - HOST, - DX11, - - LAST_VALUE = std::numeric_limits::max() -}; - -GAPI_EXPORTS const char* to_cstring(AccelType type); - -struct IDeviceSelector; -struct GAPI_EXPORTS Device { - friend struct IDeviceSelector; - using Ptr = void*; - - ~Device(); - const std::string& get_name() const; - Ptr get_ptr() const; - AccelType get_type() const; -private: - Device(Ptr device_ptr, const std::string& device_name, - AccelType device_type); - - std::string name; - Ptr ptr; - AccelType type; -}; - -struct GAPI_EXPORTS Context { - friend struct IDeviceSelector; - using Ptr = void*; - - ~Context(); - Ptr get_ptr() const; - AccelType get_type() const; -private: - Context(Ptr ctx_ptr, AccelType ctx_type); - Ptr ptr; - AccelType type; -}; - struct GAPI_EXPORTS IDeviceSelector { using Ptr = std::shared_ptr; diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp index 6334480c1b..04dc2e246d 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp @@ -46,6 +46,10 @@ public: void* accel_device_ptr, void* accel_ctx_ptr); + GSource(const std::string& filePath, + const CfgParams& cfg_params, + const Device &device, const Context &ctx); + GSource(const std::string& filePath, const CfgParams& cfg_params, std::shared_ptr selector); diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index 6935cbb709..7f0da6070c 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include // CommandLineParser #include @@ -46,26 +45,45 @@ const std::string keys = "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" - "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}"; + "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" + "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }"; namespace { -bool is_gpu(const std::string &device_name) { - return device_name.find("GPU") != std::string::npos; -} - std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); - CV_Assert(sz > EXT_LEN); + GAPI_Assert(sz > EXT_LEN); auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast(std::tolower(c)); }); - CV_Assert(ext == ".xml"); + GAPI_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } +// TODO: It duplicates infer_single_roi sample +cv::util::optional parse_roi(const std::string &rc) { + cv::Rect rv; + char delim[3]; + + std::stringstream is(rc); + is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height; + if (is.bad()) { + return cv::util::optional(); // empty value + } + const auto is_delim = [](char c) { + return c == ','; + }; + if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) { + return cv::util::optional(); // empty value + } + if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) { + return cv::util::optional(); // empty value + } + return cv::util::make_optional(std::move(rv)); +} + #ifdef HAVE_INF_ENGINE #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 @@ -127,9 +145,15 @@ using GRect = cv::GOpaque; using GSize = cv::GOpaque; using GPrims = cv::GArray; -G_API_OP(LocateROI, )>, "sample.custom.locate-roi") { - static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &, - std::reference_wrapper) { +G_API_OP(ParseSSD, , "sample.custom.parse-ssd") { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) { + return cv::empty_array_desc(); + } +}; + +// TODO: It duplicates infer_single_roi sample +G_API_OP(LocateROI, , "sample.custom.locate-roi") { + static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) { return cv::empty_gopaque_desc(); } }; @@ -151,29 +175,18 @@ GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) { // the most convenient aspect ratio for detectors to use) static void run(const cv::Size& in_size, - std::reference_wrapper device_id_ref, cv::Rect &out_rect) { // Identify the central point & square size (- some padding) - // NB: GPU plugin in InferenceEngine doesn't support ROI at now - if (!is_gpu(device_id_ref.get())) { - const auto center = cv::Point{in_size.width/2, in_size.height/2}; - auto sqside = std::min(in_size.width, in_size.height); + const auto center = cv::Point{in_size.width/2, in_size.height/2}; + auto sqside = std::min(in_size.width, in_size.height); - // Now build the central square ROI - out_rect = cv::Rect{ center.x - sqside/2 - , center.y - sqside/2 - , sqside - , sqside - }; - } else { - // use whole frame for GPU device - out_rect = cv::Rect{ 0 - , 0 - , in_size.width - , in_size.height - }; - } + // Now build the central square ROI + out_rect = cv::Rect{ center.x - sqside/2 + , center.y - sqside/2 + , sqside + , sqside + }; } }; @@ -194,6 +207,55 @@ GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { } }; +GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) { + static void run(const cv::Mat &in_ssd_result, + const cv::Rect &in_roi, + const cv::Size &in_parent_size, + std::vector &out_objects) { + const auto &in_ssd_dims = in_ssd_result.size; + GAPI_Assert(in_ssd_dims.dims() == 4u); + + const int MAX_PROPOSALS = in_ssd_dims[2]; + const int OBJECT_SIZE = in_ssd_dims[3]; + GAPI_Assert(OBJECT_SIZE == 7); // fixed SSD object size + + const cv::Size up_roi = in_roi.size(); + const cv::Rect surface({0,0}, in_parent_size); + + out_objects.clear(); + + const float *data = in_ssd_result.ptr(); + for (int i = 0; i < MAX_PROPOSALS; i++) { + const float image_id = data[i * OBJECT_SIZE + 0]; + const float label = data[i * OBJECT_SIZE + 1]; + const float confidence = data[i * OBJECT_SIZE + 2]; + const float rc_left = data[i * OBJECT_SIZE + 3]; + const float rc_top = data[i * OBJECT_SIZE + 4]; + const float rc_right = data[i * OBJECT_SIZE + 5]; + const float rc_bottom = data[i * OBJECT_SIZE + 6]; + (void) label; // unused + + if (image_id < 0.f) { + break; // marks end-of-detections + } + if (confidence < 0.5f) { + continue; // skip objects with low confidence + } + + // map relative coordinates to the original image scale + // taking the ROI into account + cv::Rect rc; + rc.x = static_cast(rc_left * up_roi.width); + rc.y = static_cast(rc_top * up_roi.height); + rc.width = static_cast(rc_right * up_roi.width) - rc.x; + rc.height = static_cast(rc_bottom * up_roi.height) - rc.y; + rc.x += in_roi.x; + rc.y += in_roi.y; + out_objects.emplace_back(rc & surface); + } + } +}; + } // namespace custom namespace cfg { @@ -212,6 +274,7 @@ int main(int argc, char *argv[]) { // get file name const auto file_path = cmd.get("input"); const auto output = cmd.get("output"); + const auto opt_roi = parse_roi(cmd.get("roi")); const auto face_model_path = cmd.get("facem"); const auto streaming_queue_capacity = cmd.get("streaming_queue_capacity"); const auto source_decode_queue_capacity = cmd.get("frames_pool_size"); @@ -259,8 +322,8 @@ int main(int argc, char *argv[]) { // GAPI InferenceEngine backend to provide interoperability with onevpl::GSource // So GAPI InferenceEngine backend and onevpl::GSource MUST share the same // device and context - void* accel_device_ptr = nullptr; - void* accel_ctx_ptr = nullptr; + cv::util::optional accel_device; + cv::util::optional accel_ctx; #ifdef HAVE_INF_ENGINE #ifdef HAVE_DIRECTX @@ -268,7 +331,7 @@ int main(int argc, char *argv[]) { auto dx11_dev = createCOMPtrGuard(); auto dx11_ctx = createCOMPtrGuard(); - if (is_gpu(device_id)) { + if (device_id.find("GPU") != std::string::npos) { auto adapter_factory = createCOMPtrGuard(); { IDXGIFactory* out_factory = nullptr; @@ -302,8 +365,13 @@ int main(int argc, char *argv[]) { } std::tie(dx11_dev, dx11_ctx) = create_device_with_ctx(intel_adapter.get()); - accel_device_ptr = reinterpret_cast(dx11_dev.get()); - accel_ctx_ptr = reinterpret_cast(dx11_ctx.get()); + accel_device = cv::util::make_optional( + cv::gapi::wip::onevpl::create_dx11_device( + reinterpret_cast(dx11_dev.get()), + device_id)); + accel_ctx = cv::util::make_optional( + cv::gapi::wip::onevpl::create_dx11_context( + reinterpret_cast(dx11_ctx.get()))); // put accel type description for VPL source source_cfgs.push_back(cfg::create_from_string( @@ -315,9 +383,10 @@ int main(int argc, char *argv[]) { #endif // HAVE_D3D11 #endif // HAVE_DIRECTX // set ctx_config for GPU device only - no need in case of CPU device type - if (is_gpu(device_id)) { + if (accel_device.has_value() && + accel_device.value().get_name().find("GPU") != std::string::npos) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, - {"VA_DEVICE", accel_device_ptr} }); + {"VA_DEVICE", accel_device.value().get_ptr()} }); face_net.cfgContextParams(ctx_config); // NB: consider NV12 surface because it's one of native GPU image format @@ -325,8 +394,16 @@ int main(int argc, char *argv[]) { } #endif // HAVE_INF_ENGINE + // turn on preproc + if (accel_device.has_value() && accel_ctx.has_value()) { + face_net.cfgPreprocessingParams(accel_device.value(), + accel_ctx.value()); + std::cout << "enforce VPP preprocessing on " << device_id << std::endl; + } + auto kernels = cv::gapi::kernels < custom::OCVLocateROI + , custom::OCVParseSSD , custom::OCVBBoxes>(); auto networks = cv::gapi::networks(face_net); auto face_detection_args = cv::compile_args(networks, kernels); @@ -335,13 +412,12 @@ int main(int argc, char *argv[]) { } // Create source - cv::Ptr cap; + cv::gapi::wip::IStreamSource::Ptr cap; try { - if (is_gpu(device_id)) { + if (accel_device.has_value() && accel_ctx.has_value()) { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs, - device_id, - accel_device_ptr, - accel_ctx_ptr); + accel_device.value(), + accel_ctx.value()); } else { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs); } @@ -353,29 +429,35 @@ int main(int argc, char *argv[]) { cv::GMetaArg descr = cap->descr_of(); auto frame_descr = cv::util::get(descr); + cv::GOpaque in_roi; + auto inputs = cv::gin(cap); // Now build the graph cv::GFrame in; auto size = cv::gapi::streaming::size(in); - auto roi = custom::LocateROI::on(size, std::cref(device_id)); - auto blob = cv::gapi::infer(in); - cv::GArray rcs = cv::gapi::parseSSD(blob, size, 0.5f, true, true); - auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi)); - auto out = cv::gapi::streaming::BGR(out_frame); - - cv::GStreamingCompiled pipeline; - try { - pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) - .compileStreaming(std::move(face_detection_args)); - } catch (const std::exception& ex) { - std::cerr << "Exception occured during pipeline construction: " << ex.what() << std::endl; - return -1; + auto graph_inputs = cv::GIn(in); + if (!opt_roi.has_value()) { + // Automatically detect ROI to infer. Make it output parameter + std::cout << "ROI is not set or invalid. Locating it automatically" + << std::endl; + in_roi = custom::LocateROI::on(size); + } else { + // Use the value provided by user + std::cout << "Will run inference for static region " + << opt_roi.value() + << " only" + << std::endl; + graph_inputs += cv::GIn(in_roi); + inputs += cv::gin(opt_roi.value()); } + auto blob = cv::gapi::infer(in_roi, in); + cv::GArray rcs = custom::ParseSSD::on(blob, in_roi, size); + auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, in_roi)); + auto out = cv::gapi::streaming::BGR(out_frame); + cv::GStreamingCompiled pipeline = cv::GComputation(std::move(graph_inputs), cv::GOut(out)) // and move here + .compileStreaming(std::move(face_detection_args)); // The execution part - - // TODO USE may set pool size from outside and set queue_capacity size, - // compile arg: cv::gapi::streaming::queue_capacity - pipeline.setSource(std::move(cap)); + pipeline.setSource(std::move(inputs)); pipeline.start(); size_t frames = 0u; @@ -384,7 +466,7 @@ int main(int argc, char *argv[]) { if (!output.empty() && !writer.isOpened()) { const auto sz = cv::Size{frame_descr.size.width, frame_descr.size.height}; writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); - CV_Assert(writer.isOpened()); + GAPI_Assert(writer.isOpened()); } cv::Mat outMat; @@ -399,6 +481,7 @@ int main(int argc, char *argv[]) { } tm.stop(); std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl; + return 0; } diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp index 2ed9642256..7a0f94655c 100644 --- a/modules/gapi/samples/pipeline_modeling_tool.cpp +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -10,7 +10,9 @@ #include #if defined(_WIN32) +#define NOMINMAX #include +#undef NOMINMAX #endif #include "pipeline_modeling_tool/dummy_source.hpp" diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 52c60c1f0b..4969b79860 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -64,6 +64,9 @@ template using QueueClass = cv::gapi::own::concurrent_bounded_queue< #include "utils/itt.hpp" +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + namespace IE = InferenceEngine; namespace { @@ -261,12 +264,36 @@ struct IEUnit { InferenceEngine::RemoteContext::Ptr rctx = nullptr; + std::shared_ptr preproc_engine_impl; + // FIXME: Unlike loadNetwork case, importNetwork requires that preprocessing // should be passed as ExecutableNetwork::SetBlob method, so need to collect // and store this information at the graph compilation stage (outMeta) and use in runtime. using PreProcMap = std::unordered_map; PreProcMap preproc_map; + // NEW FIXME: Need to aggregate getInputInfo & GetInputInfo from network + // into generic wrapper and invoke it at once in single place instead of + // analyzing ParamDesc::Kind::Load/Import every time when we need to get access + // for network info. + // In term of introducing custom VPP/VPL preprocessing functionality + // It was decided to use GFrameDesc as such aggregated network info with limitation + // that VPP/VPL produces cv::MediaFrame only. But it should be not considered as + // final solution + class InputFramesDesc { + using input_name_type = std::string; + using description_type = cv::GFrameDesc; + std::map map; + public: + static bool is_applicable(const cv::GMetaArg &mm); + const description_type &get_param(const input_name_type &input) const; + + void set_param(const input_name_type &input, + const IE::TensorDesc& desc); + }; + + InputFramesDesc net_input_params; + explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) : params(pp) { InferenceEngine::ParamMap* ctx_params = @@ -336,6 +363,17 @@ struct IEUnit { } else { cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind")); } + + using namespace cv::gapi::wip::onevpl; + if (params.vpl_preproc_device.has_value() && params.vpl_preproc_ctx.has_value()) { + using namespace cv::gapi::wip; + GAPI_LOG_INFO(nullptr, "VPP preproc creation requested"); + preproc_engine_impl = + IPreprocEngine::create_preproc_engine( + params.vpl_preproc_device.value(), + params.vpl_preproc_ctx.value()); + GAPI_LOG_INFO(nullptr, "VPP preproc created successfuly"); + } } // This method is [supposed to be] called at Island compilation stage @@ -354,6 +392,39 @@ struct IEUnit { } }; +bool IEUnit::InputFramesDesc::is_applicable(const cv::GMetaArg &mm) { + return cv::util::holds_alternative(mm); +} + +const IEUnit::InputFramesDesc::description_type & +IEUnit::InputFramesDesc::get_param(const input_name_type &input) const { + auto it = map.find(input); + GAPI_Assert(it != map.end() && "No appropriate input is found in InputFramesDesc"); + return it->second; +} + +void IEUnit::InputFramesDesc::set_param(const input_name_type &input, + const IE::TensorDesc& desc) { + description_type ret; + ret.fmt = cv::MediaFormat::NV12; + const InferenceEngine::SizeVector& inDims = desc.getDims(); + auto layout = desc.getLayout(); + GAPI_LOG_DEBUG(nullptr, "network input: " << input << + ", tensor dims: " << inDims[0] << ", " << inDims[1] << + ", " << inDims[2] << ", " << inDims[3]); + if (layout != InferenceEngine::NHWC && layout != InferenceEngine::NCHW) { + GAPI_LOG_WARNING(nullptr, "Unsupported layout for VPP preproc: " << layout << + ", input name: " << input); + GAPI_Assert(false && "Unsupported layout for VPP preproc"); + } + GAPI_Assert(inDims.size() == 4u); + ret.size.width = static_cast(inDims[3]); + ret.size.height = static_cast(inDims[2]); + + auto res = map.emplace(input, ret); + GAPI_Assert(res.second && "Duplicated input info in InputFramesDesc are not allowable"); +} + class IECallContext { public: @@ -396,6 +467,9 @@ public: // To store exception appeared in callback. std::exception_ptr eptr; + using req_key_t = void*; + cv::MediaFrame* prepareKeepAliveFrameSlot(req_key_t key); + size_t releaseKeepAliveFrame(req_key_t key); private: cv::detail::VectorRef& outVecRef(std::size_t idx); @@ -417,6 +491,10 @@ private: // Input parameters passed to an inference operation. cv::GArgs m_args; cv::GShapes m_in_shapes; + + // keep alive preprocessed frames + std::mutex keep_alive_frames_mutex; + std::unordered_map keep_alive_pp_frames; }; IECallContext::IECallContext(const IEUnit & unit, @@ -516,6 +594,35 @@ cv::GArg IECallContext::packArg(const cv::GArg &arg) { } } +cv::MediaFrame* IECallContext::prepareKeepAliveFrameSlot(req_key_t key) { + std::lock_guard lock(keep_alive_frames_mutex); + return &keep_alive_pp_frames[key]; +} + +size_t IECallContext::releaseKeepAliveFrame(req_key_t key) { + size_t elapsed_count = 0; + void *prev_slot = nullptr; + // NB: release MediaFrame previously captured by prepareKeepAliveFrameSlot + // We must capture it to keep a reference counter on inner media adapter + // to ensure that frame resource would be locked until inference done. + // Otherwise decoder could seized this frame resource as free/unlocked resource + // from resource pool + // Current function just take a unique frame `key` and overwrite stored + // actual frame by empty frame + { + std::lock_guard lock(keep_alive_frames_mutex); + auto ka_frame_it = keep_alive_pp_frames.find(key); + if (ka_frame_it != keep_alive_pp_frames.end()) { + prev_slot = &ka_frame_it->second; + ka_frame_it->second = cv::MediaFrame(); + } + elapsed_count = keep_alive_pp_frames.size(); + } + GAPI_LOG_DEBUG(nullptr, "Release keep alive frame, slot: " << prev_slot << + ", reserved frames count: " << elapsed_count); + return elapsed_count; +} + struct IECallable { static const char *name() { return "IERequestCallable"; } using Run = std::function, cv::gimpl::ie::RequestPool&)>; @@ -552,11 +659,65 @@ using GConstGIEModel = ade::ConstTypedGraph , IECallable >; -inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { +cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::string &layer_name, + IECallContext& ctx, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame, + bool* out_is_preprocessed) { + cv::util::optional param = + ctx.uu.preproc_engine_impl->is_applicable(in_frame); + if (param.has_value()) { + GAPI_LOG_DEBUG(nullptr, "VPP preprocessing for decoded remote frame will be used"); + cv::GFrameDesc expected_net_input_descr = + ctx.uu.net_input_params.get_param(layer_name); + + // TODO: Find a better place to configure media format for GPU + // adjust color conversion to NV12 according to OV GPU limitation + if(ctx.uu.params.device_id.find("GPU") != std::string::npos && + ctx.uu.rctx) { + auto it = ctx.uu.params.config.find(std::string("GPU_NV12_TWO_INPUTS")); + if (it != ctx.uu.params.config.end()) { + if (it->second == "YES") { + GAPI_LOG_DEBUG(nullptr, "Adjust preprocessing GPU media format to NV12"); + expected_net_input_descr.fmt = cv::MediaFormat::NV12; + } + } + } + + cv::gapi::wip::pp_session pp_sess = + ctx.uu.preproc_engine_impl->initialize_preproc(param.value(), + expected_net_input_descr); + + in_frame = ctx.uu.preproc_engine_impl->run_sync(pp_sess, in_frame, opt_roi); + + if (out_keep_alive_frame != nullptr) { + GAPI_LOG_DEBUG(nullptr, "remember preprocessed remote frame to keep it busy from reuse, slot: " << + out_keep_alive_frame); + *out_keep_alive_frame = in_frame; + } + if (out_is_preprocessed) { + *out_is_preprocessed = true; + } + } // otherwise it is not suitable frame, then check on other preproc backend or rely on IE plugin + return std::move(in_frame); +} + +inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i, + const std::string &layer_name, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame, + bool* out_is_preprocessed) { GAPI_Assert(ctx.inShape(i) == cv::GShape::GFRAME && "Remote blob is supported for MediaFrame only"); + cv::MediaFrame frame = ctx.inFrame(i); + if (ctx.uu.preproc_engine_impl) { + GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded remote frame in remote ctx"); + frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, + out_keep_alive_frame, out_is_preprocessed); + } - cv::util::any any_blob_params = ctx.inFrame(i).blobParams(); + // Request params for result frame whatever it got preprocessed or not + cv::util::any any_blob_params = frame.blobParams(); using ParamType = std::pair; using NV12ParamType = std::pair; @@ -582,14 +743,24 @@ inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, - cv::gapi::ie::TraitAs hint) { + cv::gapi::ie::TraitAs hint, + const std::string& layer_name, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame = nullptr, + bool* out_is_preprocessed = nullptr) { if (ctx.uu.rctx != nullptr) { - return extractRemoteBlob(ctx, i); + return extractRemoteBlob(ctx, i, layer_name, opt_roi, + out_keep_alive_frame, out_is_preprocessed); } switch (ctx.inShape(i)) { case cv::GShape::GFRAME: { - const auto& frame = ctx.inFrame(i); + auto frame = ctx.inFrame(i); + if (ctx.uu.preproc_engine_impl) { + GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded frame in local ctx"); + frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, + out_keep_alive_frame, out_is_preprocessed); + } ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); return wrapIE(*(ctx.views.back()), frame.desc()); } @@ -626,10 +797,20 @@ static void setROIBlob(InferenceEngine::InferRequest& req, const IECallContext& ctx) { if (ctx.uu.params.device_id.find("GPU") != std::string::npos && ctx.uu.rctx) { - GAPI_LOG_WARNING(nullptr, "ROI blob creation for device_id: " << - ctx.uu.params.device_id << ", layer: " << layer_name << - "is not supported yet"); - GAPI_Assert(false && "Unsupported ROI blob creation for GPU remote context"); + try { + // NB: make_shared_blob() cannot work with GPU NV12 & ROI at the moment. + // OpenVINO produces exception with unsupported status. + // To do not encounter with silent crash situation we should catch OV exception + // and suggest to avoid this problem by using inner preprocessing feature. + // VPP/VPL proprocessing are supported at the moment + setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); + } catch (const std::exception &ex) { + GAPI_LOG_WARNING(nullptr, "cannot set ROI blob for layer: " << layer_name << + ", reason:\n" << ex.what() << + "\nTry using self GAPI preprocessing feature: " + " Check method `cfgPreprocessingParams` in `cv::gapi::ie::Params`"); + throw; + } } else { setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); } @@ -975,6 +1156,8 @@ static void PostOutputs(InferenceEngine::InferRequest &request, ctx->out.meta(output, ctx->input(0).meta); ctx->out.post(std::move(output), ctx->eptr); } + + ctx->releaseKeepAliveFrame(&request); } class PostOutputsList { @@ -1088,6 +1271,12 @@ struct Infer: public cv::detail::KernelTag { if (isApplicableForResize(ii->getTensorDesc())) { ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); } + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } // FIXME: This isn't the best place to call reshape function. @@ -1107,6 +1296,12 @@ struct Infer: public cv::detail::KernelTag { auto ii = inputs.at(input_name); const auto & mm = std::get<1>(it); non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } } @@ -1145,7 +1340,9 @@ struct Infer: public cv::detail::KernelTag { (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) ? cv::gapi::ie::TraitAs::IMAGE : cv::gapi::ie::TraitAs::TENSOR; - IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint); + IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint, + layer_name, + cv::util::optional{}); setBlob(req, layer_name, this_blob, *ctx); } // FIXME: Should it be done by kernel ? @@ -1200,6 +1397,12 @@ struct InferROI: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); auto inputs = uu.this_network.GetInputsInfo(); @@ -1207,6 +1410,12 @@ struct InferROI: public cv::detail::KernelTag { auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + + // NB: configure intput param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } // FIXME: It would be nice here to have an exact number of network's @@ -1236,13 +1445,26 @@ struct InferROI: public cv::detail::KernelTag { GAPI_Assert(ctx->uu.params.num_in == 1); auto&& this_roi = ctx->inArg(0).rref(); + // reserve unique slot for keep alive preprocessed frame + cv::MediaFrame* slot_ptr = ctx->prepareKeepAliveFrameSlot(&req); + // NB: This blob will be used to make roi from its, so // it should be treated as image + bool preprocessed = false; IE::Blob::Ptr this_blob = - extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE); - setROIBlob(req, + extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + *(ctx->uu.params.input_names.begin()), + cv::util::make_optional(this_roi), + slot_ptr, &preprocessed); + if (!preprocessed) { + setROIBlob(req, *(ctx->uu.params.input_names.begin()), this_blob, this_roi, *ctx); + } else { + setBlob(req, + *(ctx->uu.params.input_names.begin()), + this_blob, *ctx); + } // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? req.StartAsync(); @@ -1336,7 +1558,9 @@ struct InferList: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image - IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE); + IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + ctx->uu.params.input_names[0u], + cv::util::optional{}); std::vector> cached_dims(ctx->uu.params.num_out); for (auto i : ade::util::iota(ctx->uu.params.num_out)) { @@ -1483,7 +1707,9 @@ struct InferList2: public cv::detail::KernelTag { && "This operation must have at least two arguments"); // NB: This blob will be used to make roi from its, so // it should be treated as image - IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE); + IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE, + ctx->uu.params.input_names[0u], + cv::util::optional{}); const auto list_size = ctx->inArg(1u).size(); if (list_size == 0u) { for (auto i : ade::util::iota(ctx->uu.params.num_out)) { diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp index a4d85f2598..b3beb71fb1 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp @@ -38,6 +38,20 @@ namespace gapi { namespace wip { namespace onevpl { +std::vector insertCfgparam(std::vector &¶m_array, AccelType type) { + switch (type) { + case AccelType::HOST: + break; + case AccelType::DX11: + param_array.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + break; + default: + GAPI_DbgAssert(false && "Unexpected AccelType"); + break; + } + return std::move(param_array); +} + CfgParamDeviceSelector::CfgParamDeviceSelector(const CfgParams& cfg_params) : suggested_device(IDeviceSelector::create(nullptr, "CPU", AccelType::HOST)), suggested_context(IDeviceSelector::create(nullptr, AccelType::HOST)) { @@ -231,6 +245,52 @@ CfgParamDeviceSelector::CfgParamDeviceSelector(Device::Ptr device_ptr, } } +CfgParamDeviceSelector::CfgParamDeviceSelector(const Device &device, + const Context &ctx, + CfgParams) : + suggested_device(device), + suggested_context(ctx) { + + switch(device.get_type()) { + case AccelType::DX11: { +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 + ID3D11Device* dx_device_ptr = + reinterpret_cast(suggested_device.get_ptr()); + dx_device_ptr->AddRef(); + + ID3D11DeviceContext* dx_ctx_ptr = + reinterpret_cast(suggested_context.get_ptr()); + + // oneVPL recommendation + { + ID3D11Multithread *pD11Multithread = nullptr; + dx_ctx_ptr->QueryInterface(IID_PPV_ARGS(&pD11Multithread)); + pD11Multithread->SetMultithreadProtected(true); + pD11Multithread->Release(); + } + + dx_ctx_ptr->AddRef(); + break; +#else + GAPI_LOG_WARNING(nullptr, "Unavailable \"" << CfgParam::acceleration_mode_name() << + ": MFX_ACCEL_MODE_VIA_D3D11\"" + "was chosen for current project configuration"); + throw std::logic_error(std::string("Unsupported \"") + + CfgParam::acceleration_mode_name() + ": MFX_ACCEL_MODE_VIA_D3D11\""); +#endif // HAVE_DIRECTX +#endif // HAVE_D3D11 + } + case AccelType::HOST: + break; + default: + throw std::logic_error(std::string("Unsupported \"") + CfgParam::acceleration_mode_name() + + "\" requested: " + + to_cstring(device.get_type())); + break; + } +} + CfgParamDeviceSelector::~CfgParamDeviceSelector() { GAPI_LOG_INFO(nullptr, "release context: " << suggested_context.get_ptr()); AccelType ctype = suggested_context.get_type(); diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp index 2a55fb09cf..5dae1c508d 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp @@ -7,14 +7,14 @@ #ifndef GAPI_STREAMING_ONEVPL_CFG_PARAM_DEVICE_SELECTOR_HPP #define GAPI_STREAMING_ONEVPL_CFG_PARAM_DEVICE_SELECTOR_HPP -#ifdef HAVE_ONEVPL - #include #include #include #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#ifdef HAVE_ONEVPL + namespace cv { namespace gapi { namespace wip { @@ -26,6 +26,9 @@ struct GAPI_EXPORTS CfgParamDeviceSelector final: public IDeviceSelector { const std::string& device_id, Context::Ptr ctx_ptr, const CfgParams& params); + CfgParamDeviceSelector(const Device &device_ptr, + const Context &ctx_ptr, + CfgParams params); ~CfgParamDeviceSelector(); DeviceScoreTable select_devices() const override; diff --git a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp index 1ac88bd807..404b2f3872 100644 --- a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp +++ b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp @@ -81,6 +81,42 @@ IDeviceSelector::Score::Type IDeviceSelector::Score::get() const { IDeviceSelector::~IDeviceSelector() { } +namespace detail +{ +struct DeviceContextCreator : public IDeviceSelector { + DeviceScoreTable select_devices() const override { return {};} + DeviceContexts select_context() override { return {};} + + template + static Entity create_entity(Args &&...args) { + return IDeviceSelector::create(std::forward(args)...); + } +}; +} + +Device create_host_device() { + return detail::DeviceContextCreator::create_entity(nullptr, + "CPU", + AccelType::HOST); +} + +Context create_host_context() { + return detail::DeviceContextCreator::create_entity(nullptr, + AccelType::HOST); +} + +Device create_dx11_device(Device::Ptr device_ptr, + const std::string& device_name) { + return detail::DeviceContextCreator::create_entity(device_ptr, + device_name, + AccelType::DX11); +} + +Context create_dx11_context(Context::Ptr ctx_ptr) { + return detail::DeviceContextCreator::create_entity(ctx_ptr, + AccelType::DX11); +} + } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp index 23ad385b51..5a08f2bd09 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp @@ -4,30 +4,33 @@ // // Copyright (C) 2022 Intel Corporation -#ifdef HAVE_ONEVPL - #include #include #include +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" #include "streaming/onevpl/engine/preproc/preproc_engine.hpp" #include "streaming/onevpl/engine/preproc/preproc_session.hpp" -#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" #include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #include "streaming/onevpl/accelerators/surface/surface.hpp" #include "streaming/onevpl/cfg_params_parser.hpp" -#include "logger.hpp" +#endif // HAVE_ONEVPL +#include "logger.hpp" namespace cv { namespace gapi { namespace wip { namespace onevpl { +#ifdef HAVE_ONEVPL cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::MediaFrame& in_frame) { cv::util::optional param; GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + bool worker_found = false; for (const auto &w : workers) { param = w->is_applicable(in_frame); if (param.has_value()) { @@ -42,11 +45,12 @@ cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::Medi if (worker_accel_type == adapter->accel_type()){ vpp_param.reserved = reinterpret_cast(w.get()); GAPI_LOG_DEBUG(nullptr, "selected worker: " << vpp_param.reserved); + worker_found = true; break; } } } - return param; + return worker_found ? param : cv::util::optional{}; } pp_session VPPPreprocDispatcher::initialize_preproc(const pp_params& initial_frame_param, @@ -78,8 +82,24 @@ cv::MediaFrame VPPPreprocDispatcher::run_sync(const pp_session &session_handle, } GAPI_Assert(false && "Cannot invoke VPP preproc in dispatcher, no suitable worker"); } + +#else // HAVE_ONEVPL +cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::MediaFrame&) { + return cv::util::optional{}; +} + +pp_session VPPPreprocDispatcher::initialize_preproc(const pp_params&, + const GFrameDesc&) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} + +cv::MediaFrame VPPPreprocDispatcher::run_sync(const pp_session &, + const cv::MediaFrame&, + const cv::util::optional &) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} +#endif // HAVE_ONEVPL } // namespace onevpl } // namespace wip } // namespace gapi } // namespace cv -#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp index 6e2ebc81f9..ea808bd542 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp @@ -11,10 +11,6 @@ #include #include "streaming/onevpl/engine/preproc_engine_interface.hpp" -#include "streaming/onevpl/engine/preproc_defines.hpp" - -#ifdef HAVE_ONEVPL -#include "streaming/onevpl/onevpl_export.hpp" namespace cv { namespace gapi { @@ -49,5 +45,4 @@ private: } // namespace wip } // namespace gapi } // namespace cv -#endif // HAVE_ONEVPL #endif // GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp index d205211903..ec27a6422d 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -51,7 +51,7 @@ void apply_roi(mfxFrameSurface1* surface_handle, VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& accel) : ProcessingEngineBase(std::move(accel)) { - GAPI_LOG_INFO(nullptr, "Create VPP preprocessing engine"); + GAPI_LOG_DEBUG(nullptr, "Create VPP preprocessing engine"); preprocessed_frames_count = 0; create_pipeline( // 0) preproc decoded surface with VPP params @@ -455,7 +455,7 @@ ProcessingEngineBase::ExecutionStatus VPPPreprocEngine::process_error(mfxStatus "MFX_ERR_REALLOC_SURFACE is not processed"); break; case MFX_WRN_IN_EXECUTION: - GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_IN_EXECUTION"); + GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "] got MFX_WRN_IN_EXECUTION"); return ExecutionStatus::Continue; default: GAPI_LOG_WARNING(nullptr, "Unknown status code: " << mfxstatus_to_string(status) << diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp index 5f68d9c4f7..be215fec74 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp @@ -23,8 +23,8 @@ namespace wip { #else // VPP_PREPROC_ENGINE struct empty_pp_params {}; struct empty_pp_session {}; -#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::empty_pp_params; -#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::empty_pp_session; +#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::empty_pp_params +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::empty_pp_session #endif // VPP_PREPROC_ENGINE struct pp_params { diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp new file mode 100644 index 0000000000..ff9f103b5a --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp @@ -0,0 +1,83 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" + +#endif //HAVE_ONEVPL + +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +template +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl(const PreprocEngineArgs& ...) { + GAPI_Assert(false && "Unsupported "); +} + +template <> +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl(const onevpl::Device &device, + const onevpl::Context &context) { + using namespace onevpl; + cv::util::suppress_unused_warning(device); + cv::util::suppress_unused_warning(context); + std::unique_ptr dispatcher(new VPPPreprocDispatcher); +#ifdef HAVE_ONEVPL + if (device.get_type() == onevpl::AccelType::DX11) { + bool gpu_pp_is_created = false; +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 + GAPI_LOG_INFO(nullptr, "Creating DX11 VPP preprocessing engine"); + // create GPU VPP preproc engine + dispatcher->insert_worker( + std::unique_ptr{ + new VPLDX11AccelerationPolicy( + std::make_shared( + device, context, CfgParams{})) + }); + GAPI_LOG_INFO(nullptr, "DX11 VPP preprocessing engine created"); + gpu_pp_is_created = true; +#endif +#endif + GAPI_Assert(gpu_pp_is_created && "VPP preproc for GPU is requested, but it is avaiable only for DX11 at now"); + } else { + GAPI_LOG_INFO(nullptr, "Creating CPU VPP preprocessing engine"); + dispatcher->insert_worker( + std::unique_ptr{ + new VPLCPUAccelerationPolicy( + std::make_shared(CfgParams{}))}); + GAPI_LOG_INFO(nullptr, "CPU VPP preprocessing engine created"); + } +#endif // HAVE_ONEVPL + return dispatcher; +} + + +// Force instantiation +template +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl + (const onevpl::Device &device, + const onevpl::Context &ctx); +} // namespace wip +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp index be347a258f..72c1dbd0a7 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp @@ -29,6 +29,16 @@ struct IPreprocEngine { virtual cv::MediaFrame run_sync(const pp_session &sess, const cv::MediaFrame& in_frame, const cv::util::optional &opt_roi = {}) = 0; + + template + static std::unique_ptr create_preproc_engine(const PreprocEngineArgs& ...args) { + static_assert(std::is_base_of::value, + "SpecificPreprocEngine must have reachable ancessor IPreprocEngine"); + return create_preproc_engine_impl(args...); + } +private: + template + static std::unique_ptr create_preproc_engine_impl(const PreprocEngineArgs &...args); }; } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/source.cpp b/modules/gapi/src/streaming/onevpl/source.cpp index e5b045188d..3bad463e41 100644 --- a/modules/gapi/src/streaming/onevpl/source.cpp +++ b/modules/gapi/src/streaming/onevpl/source.cpp @@ -33,6 +33,13 @@ GSource::GSource(const std::string& filePath, accel_ctx_ptr, cfg_params)) { } +GSource::GSource(const std::string& filePath, + const CfgParams& cfg_params, + const Device &device, const Context &ctx) : + GSource(filePath, cfg_params, + std::make_shared(device, ctx, cfg_params)) { +} + GSource::GSource(const std::string& filePath, const CfgParams& cfg_params, std::shared_ptr selector) : @@ -74,6 +81,10 @@ GSource::GSource(const std::string&, const CfgParams&, const std::string&, GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); } +GSource::GSource(const std::string&, const CfgParams&, const Device &, const Context &) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} + GSource::GSource(const std::string&, const CfgParams&, std::shared_ptr) { GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); } From b379b67a3255ceaa58cb96baff59508b57962e25 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Fri, 1 Apr 2022 20:35:12 +0800 Subject: [PATCH 59/84] Merge pull request #21752 from fengyuentau:add_github_actions_for_timvx_backend CI for TIM-VX backend * github actions for TIM-VX backend https://github.com/opencv/opencv/pull/21036 * add reference to yuentau/ocv_ubuntu:20.04; remove extra quotes; enable BUILD_TESTS * rename to timvx_backend_tests.yml * add image source prefix * remove if condition for x86_64 simulator --- .github/workflows/timvx_backend_tests.yml | 104 ++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 .github/workflows/timvx_backend_tests.yml diff --git a/.github/workflows/timvx_backend_tests.yml b/.github/workflows/timvx_backend_tests.yml new file mode 100644 index 0000000000..c45a86f8b2 --- /dev/null +++ b/.github/workflows/timvx_backend_tests.yml @@ -0,0 +1,104 @@ +name: TIM-VX Backend + +on: + pull_request: + branches: [ 4.x ] + types: [ labeled, opened, synchronize, reopened ] + + +jobs: + x86-simulator-build-test: + runs-on: ubuntu-20.04 + # Docker image from https://hub.docker.com/r/yuentau/ocv_ubuntu + container: docker.io/yuentau/ocv_ubuntu:20.04 + env: + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + steps: + - name: info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: clean + shell: bash + run: find . -mindepth 1 -delete + - name: fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + path: opencv + - name: merge opencv with test branch + shell: bash + run: | + cd opencv + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + - name: configure + run: | + cmake -B build -DWITH_TIMVX=ON -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON -DBUILD_PERF_TESTS=ON -DBUILD_TESTS=ON -DBUILD_EXAMPLES=OFF -DBUILD_DOCS=OFF -DWITH_OPENCL=OFF opencv + - name: build + run: cmake --build build --target install -j $(nproc) + + khadas-vim3-tests: + if: contains(github.event.pull_request.labels.*.name, 'category:dnn_timvx') + concurrency: + group: khadas-vim3 + cancel-in-progress: false + runs-on: [self-hosted, Linux, ARM64, khadas-vim3] + env: + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + steps: + - name: info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: clean + shell: bash + run: find . -mindepth 1 -delete + - name: fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + path: opencv + - name: merge opencv with test branch + shell: bash + run: | + cd opencv + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + - name: fetch opencv_extra + uses: actions/checkout@v3 + with: + repository: opencv/opencv_extra + path: opencv_extra + - name: merge opencv_extra with test branch + shell: bash + run: | + RET=$(git ls-remote --heads "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}") + if [[ ! -z "$RET" ]]; then + cd opencv_extra + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + else + echo "no merge since ${{ env.PR_AUTHOR }}/opencv_extra does not have branch ${{ env.SOURCE_BRANCH_NAME }}" + fi + - name: configure + run: | + cmake -B build -D CMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=./install -DWITH_TIMVX=ON -DWITH_OPENCL=OFF -DWITH_EIGEN=OFF opencv + - name: build + run: cmake --build build --target opencv_test_dnn -j 4 + - name: unit tests for int8 layers + run: | + OPENCV_TEST_DATA_PATH=./opencv_extra/testdata ./build/bin/opencv_test_dnn --gtest_filter="Test_Int8_layers.*/1" From 38788a31619b742f3af86db4e0e5e9fcf8343de9 Mon Sep 17 00:00:00 2001 From: Oguzhan Guclu Date: Sat, 2 Apr 2022 01:13:14 +0300 Subject: [PATCH 60/84] Merge pull request #21803 from oguzhanguclu:matches_info_pybinding python binding for matches and inliers_mask attributes of cv2.detail_MatchesInfo class * making matches and inliers_mask attributes of cv2.detail_MatchesInfo class accessible from python interface * binding test for cv2.detail_MatchesInfo class --- .../opencv2/stitching/detail/matchers.hpp | 4 ++-- .../misc/python/test/test_stitching.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp index cd9749ca8b..1b7d7d6897 100644 --- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp @@ -104,8 +104,8 @@ struct CV_EXPORTS_W_SIMPLE MatchesInfo CV_PROP_RW int src_img_idx; CV_PROP_RW int dst_img_idx; //!< Images indices (optional) - std::vector matches; - std::vector inliers_mask; //!< Geometrically consistent matches mask + CV_PROP_RW std::vector matches; + CV_PROP_RW std::vector inliers_mask; //!< Geometrically consistent matches mask CV_PROP_RW int num_inliers; //!< Number of geometrically consistent matches CV_PROP_RW Mat H; //!< Estimated transformation CV_PROP_RW double confidence; //!< Confidence two images are from the same panorama diff --git a/modules/stitching/misc/python/test/test_stitching.py b/modules/stitching/misc/python/test/test_stitching.py index 0d66182fb8..2e7b2b5818 100644 --- a/modules/stitching/misc/python/test/test_stitching.py +++ b/modules/stitching/misc/python/test/test_stitching.py @@ -118,5 +118,22 @@ class stitching_compose_panorama_args(NewOpenCVTests): assert result == 0 +class stitching_matches_info_test(NewOpenCVTests): + + def test_simple(self): + finder = cv.ORB.create() + img1 = self.get_sample('stitching/a1.png') + img2 = self.get_sample('stitching/a2.png') + + img_feat1 = cv.detail.computeImageFeatures2(finder, img1) + img_feat2 = cv.detail.computeImageFeatures2(finder, img2) + + matcher = cv.detail.BestOf2NearestMatcher_create() + matches_info = matcher.apply(img_feat1, img_feat2) + + self.assertIsNotNone(matches_info.matches) + self.assertIsNotNone(matches_info.inliers_mask) + + if __name__ == '__main__': NewOpenCVTests.bootstrap() From 4d927e73f1aebba2ce34c14330ab39441636c8e6 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 30 Mar 2022 19:03:38 +0000 Subject: [PATCH 61/84] dnn(test): update OpenVINO tests 2022.1.0 --- modules/dnn/test/test_backends.cpp | 44 +++- modules/dnn/test/test_caffe_importer.cpp | 53 ++++- modules/dnn/test/test_darknet_importer.cpp | 144 ++++++++++++-- modules/dnn/test/test_halide_layers.cpp | 26 ++- modules/dnn/test/test_layers.cpp | 17 +- modules/dnn/test/test_onnx_importer.cpp | 221 ++++++++++++++++++--- modules/dnn/test/test_tf_importer.cpp | 215 +++++++++++++++----- modules/dnn/test/test_torch_importer.cpp | 15 ++ 8 files changed, 625 insertions(+), 110 deletions(-) diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index d1df4c35aa..1a8b747a65 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -206,17 +206,31 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) { if (backend == DNN_BACKEND_HALIDE) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); -#if defined(INF_ENGINE_RELEASE) - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && - target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // May hang on some configurations + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE exception: Ngraph operation Transpose with name conv15_2_mbox_conf_perm has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif Mat sample = imread(findDataFile("dnn/street.png")); @@ -410,8 +424,8 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.015 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0; + float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.02 : 0.0; + float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.1 : 0.0; processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", inp, "detection_out", "", l1, lInf); expectNoFallbacksFromIE(net); @@ -462,6 +476,20 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) // Output image has values in range [-143.526, 148.539]. float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.4 : 4e-5; float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.45 : 2e-3; + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + { + l1 = 5e-3; + lInf = 5e-3; + } + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + lInf = 25; + } +#endif + + processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf); #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000) expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 7249fb4e9f..d1ea09a3bf 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -534,7 +534,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121) if (target == DNN_TARGET_OPENCL_FP16) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019020000) - l1 = 0.045; lInf = 0.21; + l1 = 0.05; lInf = 0.3; #else l1 = 0.017; lInf = 0.0795; #endif @@ -692,6 +692,18 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84} + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); @@ -715,6 +727,13 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); #endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -734,19 +753,43 @@ TEST_P(Test_Caffe_nets, RFCN) CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + + double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1; + double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf; + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Sporadic: "Cannot get memory!" + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + scoreDiff = 0.1f; + iouDiff = 0.2f; + } + + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif +#elif defined(INF_ENGINE_RELEASE) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); - double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1; - double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf; +#endif static Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref, scoreDiff, iouDiff); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index f069ad190d..c9f8c6271f 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -139,7 +139,14 @@ public: inp.copyTo(inp2(ranges1)); net2.setInput(inp2); Mat out2 = net2.forward(); - EXPECT_EQ(0, cv::norm(out2(ranges0), out2(ranges1), NORM_INF)) << "Batch result is not equal: " << name; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + EXPECT_LT(cv::norm(out2(ranges0), out2(ranges1), NORM_INF), 1e-4) << "Batch result is not similar: " << name; + } + else + { + EXPECT_EQ(0, cv::norm(out2(ranges0), out2(ranges1), NORM_INF)) << "Batch result is not equal: " << name; + } Mat ref2 = ref; if (ref.dims == 2 && out2.dims == 3) @@ -328,12 +335,10 @@ TEST_P(Test_Darknet_nets, YoloVoc) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); // need to update check function @@ -355,12 +360,28 @@ TEST_P(Test_Darknet_nets, YoloVoc) scoreDiff = 1e-2; iouDiff = 0.018; } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = std::numeric_limits::quiet_NaN(); } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } #endif std::string config_file = "yolo-voc.cfg"; @@ -371,12 +392,25 @@ TEST_P(Test_Darknet_nets, YoloVoc) testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); } +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Exception: input != output + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: input != output + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif { SCOPED_TRACE("batch size 2"); testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, nmsThreshold); } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -444,14 +478,15 @@ TEST_P(Test_Darknet_nets_async, Accuracy) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); -#if defined(INF_ENGINE_RELEASE) -#if INF_ENGINE_VER_MAJOR_GE(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov3") // NC_OUT_OF_MEMORY - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#else + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov3") // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") // NC_OUT_OF_MEMORY applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif #endif const int numInputs = 2; @@ -506,6 +541,11 @@ TEST_P(Test_Darknet_nets_async, Accuracy) l1 = 0.001; lInf = 0.005; } + if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") + { + l1 = 0.005; + lInf = 1.5f; // |ref| = 0.95431125164031982 + } } #endif @@ -610,7 +650,7 @@ TEST_P(Test_Darknet_nets, YOLOv4) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif -#if defined(INF_ENGINE_RELEASE) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -640,14 +680,24 @@ TEST_P(Test_Darknet_nets, YOLOv4) std::string weights_file = "yolov4.weights"; -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (batch 1): no detections + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy (batch 1) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = std::numeric_limits::quiet_NaN(); } -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) @@ -665,14 +715,24 @@ TEST_P(Test_Darknet_nets, YOLOv4) { SCOPED_TRACE("batch size 2"); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // accuracy (batch 1) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (batch 2) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = 0.05f; + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy (batch 2) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = 0.45f; } -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_OPENCL) @@ -688,7 +748,14 @@ TEST_P(Test_Darknet_nets, YOLOv4) testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -842,8 +909,27 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) { testDarknetLayer("shortcut"); +} +TEST_P(Test_Darknet_layers, shortcut_leaky) +{ testDarknetLayer("shortcut_leaky"); +} +TEST_P(Test_Darknet_layers, shortcut_unequal) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testDarknetLayer("shortcut_unequal"); +} +TEST_P(Test_Darknet_layers, shortcut_unequal_2) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testDarknetLayer("shortcut_unequal_2"); } @@ -878,7 +964,19 @@ TEST_P(Test_Darknet_layers, region) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy on CPU, OpenCL + // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05 + // |ref| = 1.207319974899292 + // Expected: (normInf) <= (lInf), actual: 0.763223 vs 0.0001 + // |ref| = 1.207319974899292 + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy on CPU, OpenCL // Expected: (normInf) <= (lInf), actual: 0.763223 vs 0.0001 // |ref| = 1.207319974899292 @@ -915,10 +1013,12 @@ TEST_P(Test_Darknet_layers, maxpool) TEST_P(Test_Darknet_layers, convolutional) { +#if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) { default_l1 = 0.01f; } +#endif testDarknetLayer("convolutional", true); } diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 405587eec7..c744d0feaa 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -170,6 +170,23 @@ TEST_P(Deconvolution, Accuracy) Backend backendId = get<0>(get<7>(GetParam())); Target targetId = get<1>(get<7>(GetParam())); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + && inChannels == 6 && outChannels == 4 && group == 1 + && kernel == Size(3, 1) && pad == Size(0, 1) + && stride == Size(1, 1) && dilation == Size(1, 1)) + applyTestTag(targetId == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + && inChannels == 6 && outChannels == 4 && group == 1 + && kernel == Size(1, 3) && pad == Size(1, 0) + && stride == Size(1, 1) && dilation == Size(1, 1)) + applyTestTag(targetId == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X @@ -618,9 +635,16 @@ TEST_P(NoParamActivation, Accuracy) { Backend backendId = get<0>(get<1>(GetParam())); Target targetId = get<1>(get<1>(GetParam())); + std::string layer_type = get<0>(GetParam()); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_CPU && layer_type == "BNLL") + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif LayerParams lp; - lp.type = get<0>(GetParam()); + lp.type = layer_type; lp.name = "testLayer"; testInPlaceActivation(lp, backendId, targetId); } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 29e8172793..b04a30fe64 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1431,6 +1431,12 @@ static void test_dldt_fused_output(Backend backend, Target target) TEST_P(Test_DLDT_layers, fused_output) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + CV_DNN_REGISTER_LAYER_CLASS(Unsupported, UnsupportedLayer); try { @@ -1590,7 +1596,16 @@ TEST_P(Test_Caffe_layers, Interp) TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (available in OpenCV source tree only) #endif { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception #endif diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index b3cbcebf21..afe80efaf1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -103,6 +103,12 @@ TEST_P(Test_ONNX_layers, MaxPooling) } TEST_P(Test_ONNX_layers, MaxPooling_2) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("two_maxpooling", npy, 0, 0, false, false); } @@ -141,6 +147,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight) TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // openvino/src/plugins/intel_myriad/common/src/ngraph/transformations/extract_dynamic_batch/slice_convolution.cpp:14 Expecting operation v1::GroupConvolution GroupConvolution_6904725 (Reshape_17[0]:f32{1,4,5,5}, Reshape_6904719[0]:f32{4,1,1,2,2}) -> (f32{1,4,4,4}) to have constant kernel, got Reshape_6904719[0]:f32{4,1,1,2,2} + // openvino\src\plugins\intel_myriad\common\src\ngraph\transformations\extract_dynamic_batch\slice_convolution.cpp:15 Expecting operation v1::GroupConvolution GroupConvolution_6904692 (Reshape_17[0]:f32{1,4,5,5}, Reshape_6904686[0]:f32{4,1,1,2,2}) -> (f32{1,4,4,4}) to have constant kernel, got Reshape_6904686[0]:f32{4,1,1,2,2} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy (depends on OpenCL version / HW) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); @@ -148,6 +165,7 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); @@ -216,7 +234,15 @@ TEST_P(Test_ONNX_layers, Deconvolution) TEST_P(Test_ONNX_layers, Deconvolution3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "2": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 2@weights Const data got different desc and content byte sizes (162 and 486 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -234,7 +260,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D) TEST_P(Test_ONNX_layers, Deconvolution3D_bias) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (270 and 810 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -252,7 +286,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D_bias) TEST_P(Test_ONNX_layers, Deconvolution3D_pad) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (108 and 432 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -270,7 +312,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D_pad) TEST_P(Test_ONNX_layers, Deconvolution3D_adjpad) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (90 and 180 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -412,7 +462,19 @@ TEST_P(Test_ONNX_layers, AveragePooling) TEST_P(Test_ONNX_layers, MaxPooling3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // accuracy + if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: [ GENERAL_ERROR ] AssertionFailed: !expired() + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // accuracy @@ -536,8 +598,14 @@ TEST_P(Test_ONNX_layers, MatMul) TEST_P(Test_ONNX_layers, MatMulAdd) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); +#endif if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); testONNXModels("matmul_add"); @@ -723,6 +791,17 @@ TEST_P(Test_ONNX_layers, Slice_Steps_2DInput) TEST_P(Test_ONNX_layers, Slice_Steps_3DInput) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + testONNXModels("slice_opset_11_steps_3d"); } @@ -733,6 +812,20 @@ TEST_P(Test_ONNX_layers, Slice_Steps_4DInput) TEST_P(Test_ONNX_layers, Slice_Steps_5DInput) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("slice_opset_11_steps_5d"); } @@ -754,7 +847,16 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) TEST_P(Test_ONNX_layers, LSTM_Activations) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node Block1326/lstm/reshape_0/permute was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name Block1237_Output_0_before_reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, @@ -784,12 +886,27 @@ TEST_P(Test_ONNX_layers, LSTM_hidden) TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node Transpose_45 was not assigned on any pointed device. + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); } TEST_P(Test_ONNX_layers, LSTM_cell_forward) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -800,7 +917,11 @@ TEST_P(Test_ONNX_layers, LSTM_cell_forward) } TEST_P(Test_ONNX_layers, LSTM_cell_bidirectional) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -990,14 +1111,19 @@ TEST_P(Test_ONNX_layers, DynamicAxes_gather) TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1139,7 +1265,10 @@ TEST_P(Test_ONNX_layers, MaxPool1d) TEST_P(Test_ONNX_layers, MaxPoolSigmoid1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1154,7 +1283,16 @@ TEST_P(Test_ONNX_layers, MaxPoolSigmoid1d) TEST_P(Test_ONNX_layers, MaxPool1d_Twise) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1169,7 +1307,11 @@ TEST_P(Test_ONNX_layers, MaxPool1d_Twise) TEST_P(Test_ONNX_layers, AvePool1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1184,7 +1326,19 @@ TEST_P(Test_ONNX_layers, AvePool1d) TEST_P(Test_ONNX_layers, PoolConv1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1278,18 +1432,21 @@ TEST_P(Test_ONNX_nets, Squeezenet) TEST_P(Test_ONNX_nets, Googlenet) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // accuracy - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif const String model = _tf("models/googlenet.onnx", false); @@ -1558,19 +1715,27 @@ TEST_P(Test_ONNX_nets, Shufflenet) TEST_P(Test_ONNX_nets, Resnet34_kinetics) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Only CPU on DLIE backend is supported -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Failed to allocate graph: MYRIAD device is not opened + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // IE exception: Function contains several inputs and outputs with one friendly name! if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Only CPU on DLIE backend is supported #endif if (backend == DNN_BACKEND_OPENCV && target != DNN_TARGET_CPU) throw SkipTestException("Only CPU is supported"); // FIXIT use tags diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 073a2f3395..d3ee5d3300 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -206,7 +206,14 @@ TEST_P(Test_TensorFlow_layers, conv_keras_atrous_conv2d_same) } TEST_P(Test_TensorFlow_layers, conv_pool_nchw) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -266,13 +273,25 @@ TEST_P(Test_TensorFlow_layers, padding_asymmetric_3) TEST_P(Test_TensorFlow_layers, padding_asymmetric_4) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Unsupported pad value + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) // Exception: Unsupported pad value + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) // Exception: Unsupported pad value applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) // Exception: Unsupported pad value applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif #endif runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc"); } @@ -456,7 +475,14 @@ TEST_P(Test_TensorFlow_layers, slim_batch_norm) TEST_P(Test_TensorFlow_layers, pooling_max_pool_even) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -464,7 +490,14 @@ TEST_P(Test_TensorFlow_layers, pooling_max_pool_even) } TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_valid) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -472,7 +505,14 @@ TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_valid) } TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_same) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -591,7 +631,16 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same) TEST_P(Test_TensorFlow_layers, MaxPooling3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // accuracy @@ -603,8 +652,7 @@ TEST_P(Test_TensorFlow_layers, MaxPooling3D) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) @@ -740,14 +788,19 @@ TEST_P(Test_TensorFlow_layers, BiasAdd) // TODO: fix it and add to l2_normalize TEST_P(Test_TensorFlow_layers, l2_normalize_3d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 - && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - ) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) @@ -945,15 +998,28 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN_resnet50_coco_2018_01_28) CV_TEST_TAG_DEBUG_VERYLONG ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep++ < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); // IE exception: Ngraph operation Transpose with name FirstStageBoxPredictor/ClassPredictor/reshape_1/nhwc has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); -#endif - -#ifdef INF_ENGINE_RELEASE +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU)) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -961,12 +1027,12 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN_resnet50_coco_2018_01_28) if (INF_ENGINE_VER_MAJOR_GT(2019030000) && backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) // segfault: inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp:111: // Assertion `prior_height > 0' failed. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif #endif checkBackend(); @@ -1143,7 +1209,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_single_conv) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_same) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1167,7 +1240,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_padding_valid) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_even) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1187,7 +1267,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_deconvolution) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_valid) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1218,13 +1305,11 @@ TEST_P(Test_TensorFlow_layers, quantized) TEST_P(Test_TensorFlow_layers, lstm) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Xlink, Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // Exception: Ngraph operation Reshape with name Reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape @@ -1236,6 +1321,11 @@ TEST_P(Test_TensorFlow_layers, lstm) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); @@ -1265,7 +1355,20 @@ TEST_P(Test_TensorFlow_layers, split_equals) TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif runTensorFlowNet("resize_nearest_neighbor"); +} +TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor_keras_upsampling2d) +{ runTensorFlowNet("keras_upsampling2d"); } @@ -1289,25 +1392,30 @@ TEST_P(Test_TensorFlow_layers, fused_resize_conv) runTensorFlowNet("fused_resize_conv"); } -TEST_P(Test_TensorFlow_layers, slice) +TEST_P(Test_TensorFlow_layers, slice_crop2d) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && - (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); double l1 = target == DNN_TARGET_MYRIAD ? 4.9e-3 : default_l1; runTensorFlowNet("crop2d", false, l1); +} +TEST_P(Test_TensorFlow_layers, slice_4d) +{ runTensorFlowNet("slice_4d"); +} +TEST_P(Test_TensorFlow_layers, slice_strided) +{ runTensorFlowNet("strided_slice"); } -TEST_P(Test_TensorFlow_layers, softmax) +TEST_P(Test_TensorFlow_layers, softmax_keras) { runTensorFlowNet("keras_softmax"); +} +TEST_P(Test_TensorFlow_layers, softmax_slim) +{ runTensorFlowNet("slim_softmax"); } -TEST_P(Test_TensorFlow_layers, slim_softmax_v2) +TEST_P(Test_TensorFlow_layers, softmax_slim_v2) { #if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD && @@ -1326,10 +1434,19 @@ TEST_P(Test_TensorFlow_layers, relu6) TEST_P(Test_TensorFlow_layers, subpixel) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif runTensorFlowNet("subpixel"); } @@ -1418,13 +1535,16 @@ TEST_P(Test_TensorFlow_layers, clip_by_value) TEST_P(Test_TensorFlow_layers, tf2_prelu) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Eltwise executor got invalid input/output dims configuration + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Input prelu:StatefulPartitionedCall/StatefulPartitionedCall/sequential/p_re_lu/add hasn't been found in primitiveIDs map + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // IE exception: Input prelu:StatefulPartitionedCall/StatefulPartitionedCall/sequential/p_re_lu/add hasn't been found in primitiveIDs map @@ -1436,6 +1556,11 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu) if (target == DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif runTensorFlowNet("tf2_prelu"); diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index fdd5a4b923..9205a3fbfa 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -255,6 +255,19 @@ TEST_P(Test_Torch_layers, net_inception_block) TEST_P(Test_Torch_layers, net_normalize) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif runTorchNet("net_normalize", "", false, true); } @@ -508,6 +521,8 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total(); if (target == DNN_TARGET_MYRIAD) EXPECT_LE(normL1, 4.0f); + else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + EXPECT_LE(normL1, 1.0f); else EXPECT_LE(normL1, 0.6f); } From 8ac88cf069371d6f51d62da9c49ee6fd15e1cf21 Mon Sep 17 00:00:00 2001 From: Christine Poerschke <6458642+cpoerschke@users.noreply.github.com> Date: Sun, 3 Apr 2022 20:35:55 +0100 Subject: [PATCH 62/84] add BackgroundSubtractor(KNN|MOG2).getDefaultName() implementation --- modules/video/src/bgfg_KNN.cpp | 2 ++ modules/video/src/bgfg_gaussmix2.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/modules/video/src/bgfg_KNN.cpp b/modules/video/src/bgfg_KNN.cpp index 1ddf1b7d51..5ec2266921 100644 --- a/modules/video/src/bgfg_KNN.cpp +++ b/modules/video/src/bgfg_KNN.cpp @@ -214,6 +214,8 @@ public: } } + virtual String getDefaultName() const CV_OVERRIDE { return "BackgroundSubtractor_KNN"; } + virtual int getHistory() const CV_OVERRIDE { return history; } virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; } diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index 69e4baf657..f7b26ef06b 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -236,6 +236,8 @@ public: } } + virtual String getDefaultName() const CV_OVERRIDE { return "BackgroundSubtractor_MOG2"; } + virtual int getHistory() const CV_OVERRIDE { return history; } virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; } From 84b517f5a0cffea09c549c77027ea27540850029 Mon Sep 17 00:00:00 2001 From: sivanov-work Date: Mon, 4 Apr 2022 08:25:03 +0300 Subject: [PATCH 63/84] Fix warning --- modules/gapi/src/backends/ie/giebackend.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 4969b79860..6d851d3547 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -618,6 +618,7 @@ size_t IECallContext::releaseKeepAliveFrame(req_key_t key) { } elapsed_count = keep_alive_pp_frames.size(); } + cv::util::suppress_unused_warning(prev_slot); GAPI_LOG_DEBUG(nullptr, "Release keep alive frame, slot: " << prev_slot << ", reserved frames count: " << elapsed_count); return elapsed_count; From 91a5e75151013f33a9d970d79956989e4b3b74f2 Mon Sep 17 00:00:00 2001 From: "Anastasiya(Asya) Pronina" Date: Mon, 4 Apr 2022 20:39:02 +0300 Subject: [PATCH 64/84] Merge pull request #21731 from AsyaPronina:asyadev/fix_new_stream_event Fixed handling of new stream, especially for stateful OCV kernels * Fixed handling of new stream, especially for stateful OCV kernels * Removed duplication from StateInitOnce tests * Addressed review comments for PR #21731 - Fixed explanation comments - Expanded test for stateful OCV kernels in Regular mode * Addressed review comments for PR #21731 - Moved notification about new stream to the constructor - Added test on state reset for Regular mode * Addresed review comments * Addressed review comments Co-authored-by: Ruslan Garnov --- modules/gapi/src/backends/cpu/gcpubackend.cpp | 32 ++- modules/gapi/src/backends/cpu/gcpubackend.hpp | 4 +- modules/gapi/src/executor/gexecutor.cpp | 18 +- .../gapi/src/executor/gstreamingexecutor.cpp | 14 +- .../cpu/gapi_ocv_stateful_kernel_tests.cpp | 184 +++++++++++++++++- 5 files changed, 213 insertions(+), 39 deletions(-) diff --git a/modules/gapi/src/backends/cpu/gcpubackend.cpp b/modules/gapi/src/backends/cpu/gcpubackend.cpp index b1e716f3ba..f50f8ecd28 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.cpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.cpp @@ -27,6 +27,7 @@ #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK! #include "utils/itt.hpp" +#include "logger.hpp" // FIXME: Is there a way to take a typed graph (our GModel), // and create a new typed graph _ATOP_ of that (by extending with a couple of @@ -113,8 +114,6 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g, } } makeReshape(); - // For each stateful kernel call 'setup' user callback to initialize state. - setupKernelStates(); } // FIXME: Document what it does @@ -190,18 +189,23 @@ void cv::gimpl::GCPUExecutable::makeReshape() { void cv::gimpl::GCPUExecutable::reshape(ade::Graph&, const GCompileArgs& args) { m_compileArgs = args; makeReshape(); - // Signal to reset stateful kernels` state. - // There can be no handleNewStream() call to set this flag - // if user didn't call GCompiled`s prepareForNewStream() - m_newStreamStarted = true; + // TODO: Add an input meta sensitivity flag to stateful kernels. + // When reshape() happens, reset state for meta-sensitive kernels only + if (!m_nodesToStates.empty()) { + std::call_once(m_warnFlag, + [](){ + GAPI_LOG_WARNING(NULL, + "\nGCPUExecutable::reshape was called. Resetting states of stateful kernels."); + }); + setupKernelStates(); + } } void cv::gimpl::GCPUExecutable::handleNewStream() { - // Signal to reset stateful kernels` state. - // No need to call reshape() here since it'll - // be called automatically if input meta was changed - m_newStreamStarted = true; + // In case if new video-stream happens - for each stateful kernel + // call 'setup' user callback to re-initialize state. + setupKernelStates(); } void cv::gimpl::GCPUExecutable::run(std::vector &&input_objs, @@ -231,14 +235,6 @@ void cv::gimpl::GCPUExecutable::run(std::vector &&input_objs, } } - // In case if new video-stream happens - for each stateful kernel - // call 'setup' user callback to re-initialize state. - if (m_newStreamStarted) - { - setupKernelStates(); - m_newStreamStarted = false; - } - // OpenCV backend execution is not a rocket science at all. // Simply invoke our kernels in the proper order. GConstGCPUModel gcm(m_g); diff --git a/modules/gapi/src/backends/cpu/gcpubackend.hpp b/modules/gapi/src/backends/cpu/gcpubackend.hpp index 6a7b41e3d4..c8bad6c84f 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.hpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.hpp @@ -56,8 +56,8 @@ class GCPUExecutable final: public GIslandExecutable // Actual data of all resources in graph (both internal and external) Mag m_res; - // Flag which identifies if new stream was started - bool m_newStreamStarted = false; + // A flag for call_once() (used for log warnings) + std::once_flag m_warnFlag; GArg packArg(const GArg &arg); void setupKernelStates(); diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp index b7b0b5c2d0..a8abde27b1 100644 --- a/modules/gapi/src/executor/gexecutor.cpp +++ b/modules/gapi/src/executor/gexecutor.cpp @@ -30,10 +30,11 @@ cv::gimpl::GExecutor::GExecutor(std::unique_ptr &&g_model) // 1. Allocate all internal resources first (NB - CPU plugin doesn't do it) // 2. Put input/output GComputation arguments to the storage // 3. For every Island, prepare vectors of input/output parameter descs - // 4. Iterate over a list of operations (sorted in the topological order) - // 5. For every operation, form a list of input/output data objects - // 6. Run GIslandExecutable - // 7. writeBack + // 4. Ask every GIslandExecutable to prepare its internal states for a new stream + // 5. Iterate over a list of operations (sorted in the topological order) + // 6. For every operation, form a list of input/output data objects + // 7. Run GIslandExecutable + // 8. writeBack auto sorted = m_gim.metadata().get(); for (auto nh : sorted.nodes()) @@ -82,6 +83,9 @@ cv::gimpl::GExecutor::GExecutor(std::unique_ptr &&g_model) break; } // switch(kind) } // for(gim nodes) + + // (4) + prepareForNewStream(); } namespace cv { @@ -401,10 +405,10 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) magazine::resetInternalData(m_res, data); } - // Run the script + // Run the script (5) for (auto &op : m_ops) { - // (5), (6) + // (6), (7) Input i{m_res, op.in_objects}; Output o{m_res, op.out_objects}; op.isl_exec->run(i, o); @@ -412,7 +416,7 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) o.verify(); } - // (7) + // (8) for (auto it : ade::util::zip(ade::util::toRange(proto.outputs), ade::util::toRange(args.outObjs))) { diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 34424cb94b..6c8c56852c 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -1564,7 +1564,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) } } }; - bool islandsRecompiled = false; + const auto new_meta = cv::descr_of(ins); // 0 if (gm.metadata().contains()) // (1) { @@ -1586,8 +1586,6 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) } update_int_metas(); // (7) m_reshapable = util::make_optional(is_reshapable); - - islandsRecompiled = true; } else // (8) { @@ -1709,14 +1707,8 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) island_meta_info = GIslandModel::traceIslandName(op.nh, m_gim); #endif // OPENCV_WITH_ITT - // If Island Executable is recompiled, all its stuff including internal kernel states - // are recreated and re-initialized automatically. - // But if not, we should notify Island Executable about new started stream to let it update - // its internal variables. - if (!islandsRecompiled) - { - op.isl_exec->handleNewStream(); - } + // Notify island executable about a new stream to let it update its internal variables. + op.isl_exec->handleNewStream(); m_threads.emplace_back(islandActorThread, op.in_objects, diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp index cf03430d55..17c4e2f447 100644 --- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp +++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp @@ -14,6 +14,7 @@ #include #endif +#include // required by std::shared_ptr namespace opencv_test { @@ -21,6 +22,11 @@ namespace opencv_test { std::string method; }; + + struct CountStateSetupsParams + { + std::shared_ptr pSetupsCount; + }; } // namespace opencv_test namespace cv @@ -34,6 +40,14 @@ namespace cv return "org.opencv.test.background_substractor_state_params"; } }; + + template<> struct CompileArgTag + { + static const char* tag() + { + return "org.opencv.test.count_state_setups_params"; + } + }; } // namespace detail } // namespace cv @@ -127,8 +141,101 @@ namespace } }; #endif + + G_TYPED_KERNEL(GCountStateSetups, (GMat)>, + "org.opencv.test.count_state_setups") + { + static GOpaqueDesc outMeta(GMatDesc /* in */) { return empty_gopaque_desc(); } + }; + + GAPI_OCV_KERNEL_ST(GOCVCountStateSetups, GCountStateSetups, int) + { + static void setup(const cv::GMatDesc &, std::shared_ptr &, + const cv::GCompileArgs &compileArgs) + { + auto params = cv::gapi::getCompileArg(compileArgs) + .value_or(CountStateSetupsParams { }); + if (params.pSetupsCount != nullptr) { + (*params.pSetupsCount)++; + } + } + + static void run(const cv::Mat & , bool &out, int &) + { + out = true; + } + }; }; +TEST(StatefulKernel, StateInitOnceInRegularMode) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + // Testing for 100 frames + bool result { }; + for (int i = 0; i < 100; ++i) { + c.apply(cv::gin(inputData), cv::gout(result), + cv::compile_args(cv::gapi::kernels(), params)); + EXPECT_TRUE(result); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } +}; + +struct StateInitOnce : public ::testing::TestWithParam{}; +TEST_P(StateInitOnce, StreamingCompiledWithMeta) +{ + bool compileWithMeta = GetParam(); + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + // Compilation & testing + auto ccomp = (compileWithMeta) + ? c.compileStreaming(cv::descr_of(inputData), + cv::compile_args(cv::gapi::kernels(), + params)) + : c.compileStreaming( + cv::compile_args(cv::gapi::kernels(), + params)); + + ccomp.setSource(cv::gin(inputData)); + + ccomp.start(); + EXPECT_TRUE(ccomp.running()); + + int counter { }; + bool result; + // Process mat 100 times + while (ccomp.pull(cv::gout(result)) && (counter++ < 100)) { + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } + + ccomp.stop(); + EXPECT_FALSE(ccomp.running()); +} + +INSTANTIATE_TEST_CASE_P(StatefulKernel, StateInitOnce, ::testing::Bool()); + TEST(StatefulKernel, StateIsMutableInRuntime) { constexpr int expectedCallsCount = 10; @@ -163,7 +270,43 @@ TEST(StatefulKernel, StateIsMutableInRuntime) } -TEST(StatefulKernel, StateIsAutoResetForNewStream) +TEST(StateIsResetOnNewStream, RegularMode) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + auto setupsCounter = c.compile(cv::descr_of(inputData), + cv::compile_args(cv::gapi::kernels(), + params)); + + bool result { }; + for (int i = 0; i < 2; ++i) { + setupsCounter(cv::gin(inputData), cv::gout(result)); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } + + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + setupsCounter.prepareForNewStream(); + + for (int i = 0; i < 2; ++i) { + setupsCounter(cv::gin(inputData), cv::gout(result)); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(2, *params.pSetupsCount); + } +} + +TEST(StateIsResetOnNewStream, StreamingMode) { cv::GMat in; cv::GOpaque out = GIsStateUpToDate::on(in); @@ -387,6 +530,45 @@ TEST(StatefulKernel, StateIsChangedViaCompArgsOnReshape) run("cv/video/768x576.avi", "knn"); run("cv/video/1920x1080.avi", "mog2"); } + +TEST(StatefulKernel, StateIsResetOnceOnReshapeInStreaming) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + auto ccomp = c.compileStreaming( + cv::compile_args(cv::gapi::kernels(), params)); + + auto run = [&ccomp, ¶ms](const std::string& videoPath, int expectedSetupsCount) { + auto path = findDataFile(videoPath); + try { + ccomp.setSource(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + ccomp.start(); + + int frames = 0; + bool result = false; + while (ccomp.pull(cv::gout(result)) && (frames++ < 10)) { + EXPECT_TRUE(result); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(expectedSetupsCount, *params.pSetupsCount); + } + ccomp.stop(); + }; + + run("cv/video/768x576.avi", 1); + // FIXME: it should be 2, not 3 for expectedSetupsCount here. + // With current implemention both GCPUExecutable reshape() and + // handleNewStream() call setupKernelStates() + run("cv/video/1920x1080.avi", 3); +} #endif TEST(StatefulKernel, StateIsAutoResetOnReshape) From 1b3a06a02acedb8514a54b5d667d4b61630eba49 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 1 Apr 2022 18:02:14 +0000 Subject: [PATCH 65/84] videoio(ffmpeg): avoid memory leaks --- modules/videoio/src/cap_ffmpeg_impl.hpp | 28 ++++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index a724d7f724..e69655dde9 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -2210,17 +2210,13 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int /// close video output stream and free associated memory void CvVideoWriter_FFMPEG::close() { - // nothing to do if already released - if ( !picture ) - return; - /* no more frame to compress. The codec has a latency of a few frames if using B frames, so we get the last frames by passing the same picture again */ // TODO -- do we need to account for latency here? /* write the trailer, if any */ - if(ok && oc) + if (picture && ok && oc) { #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0) if (!(oc->oformat->flags & AVFMT_RAWPICTURE)) @@ -2244,32 +2240,34 @@ void CvVideoWriter_FFMPEG::close() // free pictures #if LIBAVFORMAT_BUILD > 4628 - if( video_st->codec->pix_fmt != input_pix_fmt) + if (picture && video_st && video_st->codec->pix_fmt != input_pix_fmt) #else - if( video_st->codec.pix_fmt != input_pix_fmt) + if (picture && video_st && video_st->codec.pix_fmt != input_pix_fmt) #endif { if(picture->data[0]) free(picture->data[0]); picture->data[0] = 0; } - av_free(picture); + av_freep(&picture); - if (input_picture) - av_free(input_picture); + av_freep(&input_picture); - /* close codec */ + if (video_st && video_st->codec) + { + /* close codec */ #if LIBAVFORMAT_BUILD > 4628 - avcodec_close(video_st->codec); + avcodec_close(video_st->codec); #else - avcodec_close(&(video_st->codec)); + avcodec_close(&(video_st->codec)); #endif + } - av_free(outbuf); + av_freep(&outbuf); if (oc) { - if (!(fmt->flags & AVFMT_NOFILE)) + if (fmt && !(fmt->flags & AVFMT_NOFILE)) { /* close the output file */ From 84b4a5a495054457c6dad387fcc21171d317ec27 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 5 Apr 2022 14:29:36 +0300 Subject: [PATCH 66/84] Merge pull request #21821 from alalek:update_zlib * 3rdparty: zlib 1.2.11 => 1.2.12 https://github.com/madler/zlib/releases/tag/v1.2.12 * 3rdparty(zlib): re-apply patch 20190330-ununitialized-use-state-check.diff --- 3rdparty/readme.txt | 2 +- 3rdparty/zlib/CMakeLists.txt | 1 + 3rdparty/zlib/ChangeLog | 161 +- 3rdparty/zlib/README | 11 +- 3rdparty/zlib/crc32.c | 1294 +++-- 3rdparty/zlib/crc32.h | 9877 ++++++++++++++++++++++++++++++++-- 3rdparty/zlib/deflate.c | 108 +- 3rdparty/zlib/deflate.h | 27 +- 3rdparty/zlib/gzguts.h | 5 +- 3rdparty/zlib/gzlib.c | 8 +- 3rdparty/zlib/gzread.c | 12 +- 3rdparty/zlib/gzwrite.c | 38 +- 3rdparty/zlib/infback.c | 3 +- 3rdparty/zlib/inffast.c | 28 +- 3rdparty/zlib/inflate.c | 47 +- 3rdparty/zlib/inflate.h | 5 +- 3rdparty/zlib/inftrees.c | 6 +- 3rdparty/zlib/trees.c | 75 +- 3rdparty/zlib/zlib.h | 223 +- 3rdparty/zlib/zutil.c | 2 +- 3rdparty/zlib/zutil.h | 25 +- 21 files changed, 10899 insertions(+), 1059 deletions(-) diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt index 4e4a6ba0a6..e67304c5ef 100644 --- a/3rdparty/readme.txt +++ b/3rdparty/readme.txt @@ -37,7 +37,7 @@ libtiff Tag Image File Format (TIFF) Software WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs. ------------------------------------------------------------------------------------ zlib General purpose LZ77 compression library - Copyright (C) 1995-2012 Jean-loup Gailly and Mark Adler. + Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler. See zlib home page http://www.zlib.net for details and links to the source code ------------------------------------------------------------------------------------ diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 9758861a6b..709e293c28 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -83,6 +83,7 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wshorten-64-to-32 -Wattributes -Wstrict-prot -Wundef # _LFS64_LARGEFILE is not defined /wd4267 # MSVS 2015 (x64) + zlib 1.2.11 -Wimplicit-fallthrough + /wd4244 # MSVS + zlib 1.2.12: warning C4244: '=': conversion from 'ush' to 'uchf', possible loss of data ) set_target_properties(${ZLIB_LIBRARY} PROPERTIES diff --git a/3rdparty/zlib/ChangeLog b/3rdparty/zlib/ChangeLog index 30199a65a0..f0b0e61809 100644 --- a/3rdparty/zlib/ChangeLog +++ b/3rdparty/zlib/ChangeLog @@ -1,6 +1,69 @@ ChangeLog file for zlib +Changes in 1.2.12 (27 Mar 2022) +- Cygwin does not have _wopen(), so do not create gzopen_w() there +- Permit a deflateParams() parameter change as soon as possible +- Limit hash table inserts after switch from stored deflate +- Fix bug when window full in deflate_stored() +- Fix CLEAR_HASH macro to be usable as a single statement +- Avoid a conversion error in gzseek when off_t type too small +- Have Makefile return non-zero error code on test failure +- Avoid some conversion warnings in gzread.c and gzwrite.c +- Update use of errno for newer Windows CE versions +- Small speedup to inflate [psumbera] +- Return an error if the gzputs string length can't fit in an int +- Add address checking in clang to -w option of configure +- Don't compute check value for raw inflate if asked to validate +- Handle case where inflateSync used when header never processed +- Avoid the use of ptrdiff_t +- Avoid an undefined behavior of memcpy() in gzappend() +- Avoid undefined behaviors of memcpy() in gz*printf() +- Avoid an undefined behavior of memcpy() in _tr_stored_block() +- Make the names in functions declarations identical to definitions +- Remove old assembler code in which bugs have manifested +- Fix deflateEnd() to not report an error at start of raw deflate +- Add legal disclaimer to README +- Emphasize the need to continue decompressing gzip members +- Correct the initialization requirements for deflateInit2() +- Fix a bug that can crash deflate on some input when using Z_FIXED +- Assure that the number of bits for deflatePrime() is valid +- Use a structure to make globals in enough.c evident +- Use a macro for the printf format of big_t in enough.c +- Clean up code style in enough.c, update version +- Use inline function instead of macro for index in enough.c +- Clarify that prefix codes are counted in enough.c +- Show all the codes for the maximum tables size in enough.c +- Add gznorm.c example, which normalizes gzip files +- Fix the zran.c example to work on a multiple-member gzip file +- Add tables for crc32_combine(), to speed it up by a factor of 200 +- Add crc32_combine_gen() and crc32_combine_op() for fast combines +- Speed up software CRC-32 computation by a factor of 1.5 to 3 +- Use atomic test and set, if available, for dynamic CRC tables +- Don't bother computing check value after successful inflateSync() +- Correct comment in crc32.c +- Add use of the ARMv8 crc32 instructions when requested +- Use ARM crc32 instructions if the ARM architecture has them +- Explicitly note that the 32-bit check values are 32 bits +- Avoid adding empty gzip member after gzflush with Z_FINISH +- Fix memory leak on error in gzlog.c +- Fix error in comment on the polynomial representation of a byte +- Clarify gz* function interfaces, referring to parameter names +- Change macro name in inflate.c to avoid collision in VxWorks +- Correct typo in blast.c +- Improve portability of contrib/minizip +- Fix indentation in minizip's zip.c +- Replace black/white with allow/block. (theresa-m) +- minizip warning fix if MAXU32 already defined. (gvollant) +- Fix unztell64() in minizip to work past 4GB. (Daniël Hörchner) +- Clean up minizip to reduce warnings for testing +- Add fallthrough comments for gcc +- Eliminate use of ULL constants +- Separate out address sanitizing from warnings in configure +- Remove destructive aspects of make distclean +- Check for cc masquerading as gcc or clang in configure +- Fix crc32.c to compile local functions only if used + Changes in 1.2.11 (15 Jan 2017) - Fix deflate stored bug when pulling last block from window - Permit immediate deflateParams changes before any deflate input @@ -511,7 +574,7 @@ Changes in 1.2.3.5 (8 Jan 2010) - Don't use _vsnprintf on later versions of MSVC [Lowman] - Add CMake build script and input file [Lowman] - Update contrib/minizip to 1.1 [Svensson, Vollant] -- Moved nintendods directory from contrib to . +- Moved nintendods directory from contrib to root - Replace gzio.c with a new set of routines with the same functionality - Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above - Update contrib/minizip to 1.1b @@ -685,7 +748,7 @@ Changes in 1.2.2.4 (11 July 2005) - Be more strict on incomplete code sets in inflate_table() and increase ENOUGH and MAXD -- this repairs a possible security vulnerability for invalid inflate input. Thanks to Tavis Ormandy and Markus Oberhumer for - discovering the vulnerability and providing test cases. + discovering the vulnerability and providing test cases - Add ia64 support to configure for HP-UX [Smith] - Add error return to gzread() for format or i/o error [Levin] - Use malloc.h for OS/2 [Necasek] @@ -721,7 +784,7 @@ Changes in 1.2.2.2 (30 December 2004) - Add Z_FIXED strategy option to deflateInit2() to force fixed trees - Add updated make_vms.com [Coghlan], update README - Create a new "examples" directory, move gzappend.c there, add zpipe.c, - fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html. + fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html - Add FAQ entry and comments in deflate.c on uninitialized memory access - Add Solaris 9 make options in configure [Gilbert] - Allow strerror() usage in gzio.c for STDC @@ -792,7 +855,7 @@ Changes in 1.2.1.1 (9 January 2004) - Fix a big fat bug in inftrees.c that prevented decoding valid dynamic blocks with only literals and no distance codes -- Thanks to "Hot Emu" for the bug report and sample file -- Add a note to puff.c on no distance codes case. +- Add a note to puff.c on no distance codes case Changes in 1.2.1 (17 November 2003) - Remove a tab in contrib/gzappend/gzappend.c @@ -1036,14 +1099,14 @@ Changes in 1.2.0 (9 March 2003) - Add contrib/puff/ simple inflate for deflate format description Changes in 1.1.4 (11 March 2002) -- ZFREE was repeated on same allocation on some error conditions. +- ZFREE was repeated on same allocation on some error conditions This creates a security problem described in http://www.zlib.org/advisory-2002-03-11.txt - Returned incorrect error (Z_MEM_ERROR) on some invalid data - Avoid accesses before window for invalid distances with inflate window - less than 32K. + less than 32K - force windowBits > 8 to avoid a bug in the encoder for a window size - of 256 bytes. (A complete fix will be available in 1.1.5). + of 256 bytes. (A complete fix will be available in 1.1.5) Changes in 1.1.3 (9 July 1998) - fix "an inflate input buffer bug that shows up on rare but persistent @@ -1117,7 +1180,7 @@ Changes in 1.1.1 (27 Feb 98) - remove block truncation heuristic which had very marginal effect for zlib (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the compression ratio on some files. This also allows inlining _tr_tally for - matches in deflate_slow. + matches in deflate_slow - added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier) Changes in 1.1.0 (24 Feb 98) @@ -1162,7 +1225,7 @@ Changes in 1.0.8 (27 Jan 1998) - include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong) - use constant arrays for the static trees in trees.c instead of computing them at run time (thanks to Ken Raeburn for this suggestion). To create - trees.h, compile with GEN_TREES_H and run "make test". + trees.h, compile with GEN_TREES_H and run "make test" - check return code of example in "make test" and display result - pass minigzip command line options to file_compress - simplifying code of inflateSync to avoid gcc 2.8 bug @@ -1201,12 +1264,12 @@ Changes in 1.0.6 (19 Jan 1998) - add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code) - Fix a deflate bug occurring only with compression level 0 (thanks to - Andy Buckler for finding this one). -- In minigzip, pass transparently also the first byte for .Z files. + Andy Buckler for finding this one) +- In minigzip, pass transparently also the first byte for .Z files - return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress() - check Z_FINISH in inflate (thanks to Marc Schluper) - Implement deflateCopy (thanks to Adam Costello) -- make static libraries by default in configure, add --shared option. +- make static libraries by default in configure, add --shared option - move MSDOS or Windows specific files to directory msdos - suppress the notion of partial flush to simplify the interface (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4) @@ -1218,7 +1281,7 @@ Changes in 1.0.6 (19 Jan 1998) - added Makefile.nt (thanks to Stephen Williams) - added the unsupported "contrib" directory: contrib/asm386/ by Gilles Vollant - 386 asm code replacing longest_match(). + 386 asm code replacing longest_match() contrib/iostream/ by Kevin Ruland A C++ I/O streams interface to the zlib gz* functions contrib/iostream2/ by Tyge Løvset @@ -1226,7 +1289,7 @@ Changes in 1.0.6 (19 Jan 1998) contrib/untgz/ by "Pedro A. Aranda Guti\irrez" A very simple tar.gz file extractor using zlib contrib/visual-basic.txt by Carlos Rios - How to use compress(), uncompress() and the gz* functions from VB. + How to use compress(), uncompress() and the gz* functions from VB - pass params -f (filtered data), -h (huffman only), -1 to -9 (compression level) in minigzip (thanks to Tom Lane) @@ -1235,8 +1298,8 @@ Changes in 1.0.6 (19 Jan 1998) - add undocumented function inflateSyncPoint() (hack for Paul Mackerras) - add undocumented function zError to convert error code to string (for Tim Smithers) -- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code. -- Use default memcpy for Symantec MSDOS compiler. +- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code +- Use default memcpy for Symantec MSDOS compiler - Add EXPORT keyword for check_func (needed for Windows DLL) - add current directory to LD_LIBRARY_PATH for "make test" - create also a link for libz.so.1 @@ -1249,7 +1312,7 @@ Changes in 1.0.6 (19 Jan 1998) - allow compilation with ANSI keywords only enabled for TurboC in large model - avoid "versionString"[0] (Borland bug) - add NEED_DUMMY_RETURN for Borland -- use variable z_verbose for tracing in debug mode (L. Peter Deutsch). +- use variable z_verbose for tracing in debug mode (L. Peter Deutsch) - allow compilation with CC - defined STDC for OS/2 (David Charlap) - limit external names to 8 chars for MVS (Thomas Lund) @@ -1259,7 +1322,7 @@ Changes in 1.0.6 (19 Jan 1998) - use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau) - added makelcc.bat for lcc-win32 (Tom St Denis) - in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe) -- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion. +- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion - check for unistd.h in configure (for off_t) - remove useless check parameter in inflate_blocks_free - avoid useless assignment of s->check to itself in inflate_blocks_new @@ -1280,7 +1343,7 @@ Changes in 1.0.5 (3 Jan 98) Changes in 1.0.4 (24 Jul 96) - In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF bit, so the decompressor could decompress all the correct data but went - on to attempt decompressing extra garbage data. This affected minigzip too. + on to attempt decompressing extra garbage data. This affected minigzip too - zlibVersion and gzerror return const char* (needed for DLL) - port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno) - use z_error only for DEBUG (avoid problem with DLLs) @@ -1310,7 +1373,7 @@ Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion] - fix array overlay in deflate.c which sometimes caused bad compressed data - fix inflate bug with empty stored block - fix MSDOS medium model which was broken in 0.99 -- fix deflateParams() which could generate bad compressed data. +- fix deflateParams() which could generate bad compressed data - Bytef is define'd instead of typedef'ed (work around Borland bug) - added an INDEX file - new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32), @@ -1331,7 +1394,7 @@ Changes in 0.99 (27 Jan 96) - allow preset dictionary shared between compressor and decompressor - allow compression level 0 (no compression) - add deflateParams in zlib.h: allow dynamic change of compression level - and compression strategy. + and compression strategy - test large buffers and deflateParams in example.c - add optional "configure" to build zlib as a shared library - suppress Makefile.qnx, use configure instead @@ -1373,30 +1436,30 @@ Changes in 0.99 (27 Jan 96) - use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc... - use Z_BINARY instead of BINARY - document that gzclose after gzdopen will close the file -- allow "a" as mode in gzopen. +- allow "a" as mode in gzopen - fix error checking in gzread - allow skipping .gz extra-field on pipes - added reference to Perl interface in README - put the crc table in FAR data (I dislike more and more the medium model :) - added get_crc_table -- added a dimension to all arrays (Borland C can't count). +- added a dimension to all arrays (Borland C can't count) - workaround Borland C bug in declaration of inflate_codes_new & inflate_fast - guard against multiple inclusion of *.h (for precompiled header on Mac) -- Watcom C pretends to be Microsoft C small model even in 32 bit mode. +- Watcom C pretends to be Microsoft C small model even in 32 bit mode - don't use unsized arrays to avoid silly warnings by Visual C++: warning C4746: 'inflate_mask' : unsized array treated as '__far' - (what's wrong with far data in far model?). + (what's wrong with far data in far model?) - define enum out of inflate_blocks_state to allow compilation with C++ Changes in 0.95 (16 Aug 95) - fix MSDOS small and medium model (now easier to adapt to any compiler) - inlined send_bits - fix the final (:-) bug for deflate with flush (output was correct but - not completely flushed in rare occasions). + not completely flushed in rare occasions) - default window size is same for compression and decompression - (it's now sufficient to set MAX_WBITS in zconf.h). + (it's now sufficient to set MAX_WBITS in zconf.h) - voidp -> voidpf and voidnp -> voidp (for consistency with other - typedefs and because voidnp was not near in large model). + typedefs and because voidnp was not near in large model) Changes in 0.94 (13 Aug 95) - support MSDOS medium model @@ -1405,12 +1468,12 @@ Changes in 0.94 (13 Aug 95) - added support for VMS - allow a compression level in gzopen() - gzflush now calls fflush -- For deflate with flush, flush even if no more input is provided. +- For deflate with flush, flush even if no more input is provided - rename libgz.a as libz.a - avoid complex expression in infcodes.c triggering Turbo C bug - work around a problem with gcc on Alpha (in INSERT_STRING) - don't use inline functions (problem with some gcc versions) -- allow renaming of Byte, uInt, etc... with #define. +- allow renaming of Byte, uInt, etc... with #define - avoid warning about (unused) pointer before start of array in deflate.c - avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c - avoid reserved word 'new' in trees.c @@ -1429,7 +1492,7 @@ Changes in 0.92 (3 May 95) - no memcpy on Pyramid - suppressed inftest.c - optimized fill_window, put longest_match inline for gcc -- optimized inflate on stored blocks. +- optimized inflate on stored blocks - untabify all sources to simplify patches Changes in 0.91 (2 May 95) @@ -1447,7 +1510,7 @@ Changes in 0.9 (1 May 95) - let again gzread copy uncompressed data unchanged (was working in 0.71) - deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented - added a test of inflateSync in example.c -- moved MAX_WBITS to zconf.h because users might want to change that. +- moved MAX_WBITS to zconf.h because users might want to change that - document explicitly that zalloc(64K) on MSDOS must return a normalized pointer (zero offset) - added Makefiles for Microsoft C, Turbo C, Borland C++ @@ -1456,7 +1519,7 @@ Changes in 0.9 (1 May 95) Changes in 0.8 (29 April 95) - added fast inflate (inffast.c) - deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this - is incompatible with previous versions of zlib which returned Z_OK. + is incompatible with previous versions of zlib which returned Z_OK - work around a TurboC compiler bug (bad code for b << 0, see infutil.h) (actually that was not a compiler bug, see 0.81 above) - gzread no longer reads one extra byte in certain cases @@ -1466,50 +1529,50 @@ Changes in 0.8 (29 April 95) Changes in 0.71 (14 April 95) - Fixed more MSDOS compilation problems :( There is still a bug with - TurboC large model. + TurboC large model Changes in 0.7 (14 April 95) -- Added full inflate support. +- Added full inflate support - Simplified the crc32() interface. The pre- and post-conditioning (one's complement) is now done inside crc32(). WARNING: this is - incompatible with previous versions; see zlib.h for the new usage. + incompatible with previous versions; see zlib.h for the new usage Changes in 0.61 (12 April 95) -- workaround for a bug in TurboC. example and minigzip now work on MSDOS. +- workaround for a bug in TurboC. example and minigzip now work on MSDOS Changes in 0.6 (11 April 95) - added minigzip.c - added gzdopen to reopen a file descriptor as gzFile -- added transparent reading of non-gziped files in gzread. +- added transparent reading of non-gziped files in gzread - fixed bug in gzread (don't read crc as data) -- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose). +- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose) - don't allocate big arrays in the stack (for MSDOS) - fix some MSDOS compilation problems Changes in 0.5: - do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but - not yet Z_FULL_FLUSH. + not yet Z_FULL_FLUSH - support decompression but only in a single step (forced Z_FINISH) -- added opaque object for zalloc and zfree. +- added opaque object for zalloc and zfree - added deflateReset and inflateReset -- added a variable zlib_version for consistency checking. -- renamed the 'filter' parameter of deflateInit2 as 'strategy'. - Added Z_FILTERED and Z_HUFFMAN_ONLY constants. +- added a variable zlib_version for consistency checking +- renamed the 'filter' parameter of deflateInit2 as 'strategy' + Added Z_FILTERED and Z_HUFFMAN_ONLY constants Changes in 0.4: -- avoid "zip" everywhere, use zlib instead of ziplib. +- avoid "zip" everywhere, use zlib instead of ziplib - suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush - if compression method == 8. + if compression method == 8 - added adler32 and crc32 - renamed deflateOptions as deflateInit2, call one or the other but not both -- added the method parameter for deflateInit2. +- added the method parameter for deflateInit2 - added inflateInit2 - simplied considerably deflateInit and inflateInit by not supporting user-provided history buffer. This is supported only in deflateInit2 - and inflateInit2. + and inflateInit2 Changes in 0.3: - prefix all macro names with Z_ -- use Z_FINISH instead of deflateEnd to finish compression. +- use Z_FINISH instead of deflateEnd to finish compression - added Z_HUFFMAN_ONLY - added gzerror() diff --git a/3rdparty/zlib/README b/3rdparty/zlib/README index 51106de475..024b79d3d8 100644 --- a/3rdparty/zlib/README +++ b/3rdparty/zlib/README @@ -1,6 +1,6 @@ ZLIB DATA COMPRESSION LIBRARY -zlib 1.2.11 is a general purpose data compression library. All the code is +zlib 1.2.12 is a general purpose data compression library. All the code is thread safe. The data format used by the zlib library is described by RFCs (Request for Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and @@ -31,7 +31,7 @@ Mark Nelson wrote an article about zlib for the Jan. 1997 issue of Dr. Dobb's Journal; a copy of the article is available at http://marknelson.us/1997/01/01/zlib-engine/ . -The changes made in version 1.2.11 are documented in the file ChangeLog. +The changes made in version 1.2.12 are documented in the file ChangeLog. Unsupported third party contributions are provided in directory contrib/ . @@ -84,7 +84,7 @@ Acknowledgments: Copyright notice: - (C) 1995-2017 Jean-loup Gailly and Mark Adler + (C) 1995-2022 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -108,7 +108,10 @@ Copyright notice: If you use the zlib library in a product, we would appreciate *not* receiving lengthy legal documents to sign. The sources are provided for free but without warranty of any kind. The library has been entirely written by Jean-loup -Gailly and Mark Adler; it does not include third-party code. +Gailly and Mark Adler; it does not include third-party code. We make all +contributions to and distributions of this project solely in our personal +capacity, and are not conveying any rights to any intellectual property of +any third parties. If you redistribute modified sources, we would appreciate that you include in the file ChangeLog history information documenting your changes. Please read diff --git a/3rdparty/zlib/crc32.c b/3rdparty/zlib/crc32.c index 9580440c0e..a1bdce5c23 100644 --- a/3rdparty/zlib/crc32.c +++ b/3rdparty/zlib/crc32.c @@ -1,12 +1,10 @@ /* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * - * Thanks to Rodney Brown for his contribution of faster - * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. */ /* @(#) $Id$ */ @@ -14,11 +12,12 @@ /* Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore protection on the static variables used to control the first-use generation - of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should first call get_crc_table() to initialize the tables before allowing more than one thread to use crc32(). - DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + MAKECRCH can be #defined to write out crc32.h. A main() routine is also + produced, so that this one source file can be compiled to an executable. */ #ifdef MAKECRCH @@ -28,175 +27,713 @@ # endif /* !DYNAMIC_CRC_TABLE */ #endif /* MAKECRCH */ -#include "zutil.h" /* for STDC and FAR definitions */ +#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ -/* Definitions for doing the crc four data bytes at a time. */ -#if !defined(NOBYFOUR) && defined(Z_U4) -# define BYFOUR -#endif -#ifdef BYFOUR - local unsigned long crc32_little OF((unsigned long, - const unsigned char FAR *, z_size_t)); - local unsigned long crc32_big OF((unsigned long, - const unsigned char FAR *, z_size_t)); -# define TBLS 8 + /* + A CRC of a message is computed on N braids of words in the message, where + each word consists of W bytes (4 or 8). If N is 3, for example, then three + running sparse CRCs are calculated respectively on each braid, at these + indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... + This is done starting at a word boundary, and continues until as many blocks + of N * W bytes as are available have been processed. The results are combined + into a single CRC at the end. For this code, N must be in the range 1..6 and + W must be 4 or 8. The upper limit on N can be increased if desired by adding + more #if blocks, extending the patterns apparent in the code. In addition, + crc32.h would need to be regenerated, if the maximum N value is increased. + + N and W are chosen empirically by benchmarking the execution time on a given + processor. The choices for N and W below were based on testing on Intel Kaby + Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 + Octeon II processors. The Intel, AMD, and ARM processors were all fastest + with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. + They were all tested with either gcc or clang, all using the -O3 optimization + level. Your mileage may vary. + */ + +/* Define N */ +#ifdef Z_TESTN +# define N Z_TESTN #else -# define TBLS 1 -#endif /* BYFOUR */ +# define N 5 +#endif +#if N < 1 || N > 6 +# error N must be in 1..6 +#endif -/* Local functions for crc concatenation */ -local unsigned long gf2_matrix_times OF((unsigned long *mat, - unsigned long vec)); -local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); -local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); +/* + z_crc_t must be at least 32 bits. z_word_t must be at least as long as + z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and + that bytes are eight bits. + */ +/* + Define W and the associated z_word_t type. If W is not defined, then a + braided calculation is not used, and the associated tables and code are not + compiled. + */ +#ifdef Z_TESTW +# if Z_TESTW-1 != -1 +# define W Z_TESTW +# endif +#else +# ifdef MAKECRCH +# define W 8 /* required for MAKECRCH */ +# else +# if defined(__x86_64__) || defined(__aarch64__) +# define W 8 +# else +# define W 4 +# endif +# endif +#endif +#ifdef W +# if W == 8 && defined(Z_U8) + typedef Z_U8 z_word_t; +# elif defined(Z_U4) +# undef W +# define W 4 + typedef Z_U4 z_word_t; +# else +# undef W +# endif +#endif + +/* Local functions. */ +local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); +local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); + +/* If available, use the ARM processor CRC32 instruction. */ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 +# define ARMCRC32 +#endif + +#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) +/* + Swap the bytes in a z_word_t to convert between little and big endian. Any + self-respecting compiler will optimize this to a single machine byte-swap + instruction, if one is available. This assumes that word_t is either 32 bits + or 64 bits. + */ +local z_word_t byte_swap(word) + z_word_t word; +{ +# if W == 8 + return + (word & 0xff00000000000000) >> 56 | + (word & 0xff000000000000) >> 40 | + (word & 0xff0000000000) >> 24 | + (word & 0xff00000000) >> 8 | + (word & 0xff000000) << 8 | + (word & 0xff0000) << 24 | + (word & 0xff00) << 40 | + (word & 0xff) << 56; +# else /* W == 4 */ + return + (word & 0xff000000) >> 24 | + (word & 0xff0000) >> 8 | + (word & 0xff00) << 8 | + (word & 0xff) << 24; +# endif +} +#endif + +/* CRC polynomial. */ +#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ #ifdef DYNAMIC_CRC_TABLE -local volatile int crc_table_empty = 1; -local z_crc_t FAR crc_table[TBLS][256]; +local z_crc_t FAR crc_table[256]; +local z_crc_t FAR x2n_table[32]; local void make_crc_table OF((void)); +#ifdef W + local z_word_t FAR crc_big_table[256]; + local z_crc_t FAR crc_braid_table[W][256]; + local z_word_t FAR crc_braid_big_table[W][256]; + local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); +#endif #ifdef MAKECRCH - local void write_table OF((FILE *, const z_crc_t FAR *)); + local void write_table OF((FILE *, const z_crc_t FAR *, int)); + local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); + local void write_table64 OF((FILE *, const z_word_t FAR *, int)); #endif /* MAKECRCH */ + +/* + Define a once() function depending on the availability of atomics. If this is + compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in + multiple threads, and if atomics are not available, then get_crc_table() must + be called to initialize the tables and must return before any threads are + allowed to compute or combine CRCs. + */ + +/* Definition of once functionality. */ +typedef struct once_s once_t; +local void once OF((once_t *, void (*)(void))); + +/* Check for the availability of atomics. */ +#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ + !defined(__STDC_NO_ATOMICS__) + +#include + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + atomic_flag begun; + atomic_int done; +}; +#define ONCE_INIT {ATOMIC_FLAG_INIT, 0} + +/* + Run the provided init() function exactly once, even if multiple threads + invoke once() at the same time. The state must be a once_t initialized with + ONCE_INIT. + */ +local void once(state, init) + once_t *state; + void (*init)(void); +{ + if (!atomic_load(&state->done)) { + if (atomic_flag_test_and_set(&state->begun)) + while (!atomic_load(&state->done)) + ; + else { + init(); + atomic_store(&state->done, 1); + } + } +} + +#else /* no atomics */ + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + volatile int begun; + volatile int done; +}; +#define ONCE_INIT {0, 0} + +/* Test and set. Alas, not atomic, but tries to minimize the period of + vulnerability. */ +local int test_and_set OF((int volatile *)); +local int test_and_set(flag) + int volatile *flag; +{ + int was; + + was = *flag; + *flag = 1; + return was; +} + +/* Run the provided init() function once. This is not thread-safe. */ +local void once(state, init) + once_t *state; + void (*init)(void); +{ + if (!state->done) { + if (test_and_set(&state->begun)) + while (!state->done) + ; + else { + init(); + state->done = 1; + } + } +} + +#endif + +/* State for once(). */ +local once_t made = ONCE_INIT; + /* Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials + with the lowest powers in the most significant bit. Then adding polynomials is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the + one. If we call the above polynomial p, and represent a byte as the polynomial q, also with the lowest power in the most significant bit (so the - byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, where a mod b means the remainder after dividing a by b. This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each + taking the remainder. The register is initialized to zero, and for each incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x + (which is shifting right by one and adding x^32 mod p if the bit shifted out + is a one). We start with the highest power (least significant bit) of q and + repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all the + information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. + */ - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. -*/ local void make_crc_table() { - z_crc_t c; - int n, k; - z_crc_t poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static volatile int first = 1; /* flag to limit concurrent making */ - static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + unsigned i, j, n; + z_crc_t p; - /* See if another task is already doing this (not thread-safe, but better - than nothing -- significantly reduces duration of vulnerability in - case the advice about DYNAMIC_CRC_TABLE is ignored) */ - if (first) { - first = 0; - - /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0; - for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) - poly |= (z_crc_t)1 << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (z_crc_t)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; - } - -#ifdef BYFOUR - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = ZSWAP32(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = ZSWAP32(c); - } - } -#endif /* BYFOUR */ - - crc_table_empty = 0; - } - else { /* not first */ - /* wait for the other guy to finish (not efficient, but rare) */ - while (crc_table_empty) - ; + /* initialize the CRC of bytes tables */ + for (i = 0; i < 256; i++) { + p = i; + for (j = 0; j < 8; j++) + p = p & 1 ? (p >> 1) ^ POLY : p >> 1; + crc_table[i] = p; +#ifdef W + crc_big_table[i] = byte_swap(p); +#endif } + /* initialize the x^2^n mod p(x) table */ + p = (z_crc_t)1 << 30; /* x^1 */ + x2n_table[0] = p; + for (n = 1; n < 32; n++) + x2n_table[n] = p = multmodp(p, p); + +#ifdef W + /* initialize the braiding tables -- needs x2n_table[] */ + braid(crc_braid_table, crc_braid_big_table, N, W); +#endif + #ifdef MAKECRCH - /* write out CRC tables to crc32.h */ { + /* + The crc32.h header file contains tables for both 32-bit and 64-bit + z_word_t's, and so requires a 64-bit type be available. In that case, + z_word_t must be defined to be 64-bits. This code then also generates + and writes out the tables for the case that z_word_t is 32 bits. + */ +#if !defined(W) || W != 8 +# error Need a 64-bit integer type in order to generate crc32.h. +#endif FILE *out; + int k, n; + z_crc_t ltl[8][256]; + z_word_t big[8][256]; out = fopen("crc32.h", "w"); if (out == NULL) return; - fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); - fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const z_crc_t FAR "); - fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); - write_table(out, crc_table[0]); -# ifdef BYFOUR - fprintf(out, "#ifdef BYFOUR\n"); - for (k = 1; k < 8; k++) { - fprintf(out, " },\n {\n"); - write_table(out, crc_table[k]); + + /* write out little-endian CRC table to crc32.h */ + fprintf(out, + "/* crc32.h -- tables for rapid CRC calculation\n" + " * Generated automatically by crc32.c\n */\n" + "\n" + "local const z_crc_t FAR crc_table[] = {\n" + " "); + write_table(out, crc_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#ifdef W\n" + "\n" + "#if W == 8\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table64(out, crc_big_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table32hi(out, crc_big_table, 256); + fprintf(out, + "};\n" + "\n" + "#endif\n"); + + /* write out braid tables for each value of N */ + for (n = 1; n <= 6; n++) { + fprintf(out, + "\n" + "#if N == %d\n", n); + + /* compute braid tables for this N and 64-bit word_t */ + braid(ltl, big, n, 8); + + /* write out braid tables for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#if W == 8\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table64(out, big[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n"); + + /* compute braid tables for this N and 32-bit word_t */ + braid(ltl, big, n, 4); + + /* write out braid tables for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table32hi(out, big[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "#endif\n" + "\n" + "#endif\n"); } - fprintf(out, "#endif\n"); -# endif /* BYFOUR */ - fprintf(out, " }\n};\n"); + fprintf(out, + "\n" + "#endif\n"); + + /* write out zeros operator table to crc32.h */ + fprintf(out, + "\n" + "local const z_crc_t FAR x2n_table[] = {\n" + " "); + write_table(out, x2n_table, 32); + fprintf(out, + "};\n"); fclose(out); } #endif /* MAKECRCH */ } #ifdef MAKECRCH -local void write_table(out, table) + +/* + Write the 32-bit values in table[0..k-1] to out, five per line in + hexadecimal separated by commas. + */ +local void write_table(out, table, k) FILE *out; const z_crc_t FAR *table; + int k; { int n; - for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", (unsigned long)(table[n]), - n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); } + +/* + Write the high 32-bits of each value in table[0..k-1] to out, five per line + in hexadecimal separated by commas. + */ +local void write_table32hi(out, table, k) +FILE *out; +const z_word_t FAR *table; +int k; +{ + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n] >> 32), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} + +/* + Write the 64-bit values in table[0..k-1] to out, three per line in + hexadecimal separated by commas. This assumes that if there is a 64-bit + type, then there is also a long long integer type, and it is at least 64 + bits. If not, then the type cast and format string can be adjusted + accordingly. + */ +local void write_table64(out, table, k) + FILE *out; + const z_word_t FAR *table; + int k; +{ + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", + (unsigned long long)(table[n]), + n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); +} + +/* Actually do the deed. */ +int main() +{ + make_crc_table(); + return 0; +} + #endif /* MAKECRCH */ +#ifdef W +/* + Generate the little and big-endian braid tables for the given n and z_word_t + size w. Each array must have room for w blocks of 256 elements. + */ +local void braid(ltl, big, n, w) + z_crc_t ltl[][256]; + z_word_t big[][256]; + int n; + int w; +{ + int k; + z_crc_t i, p, q; + for (k = 0; k < w; k++) { + p = x2nmodp((n * w + 3 - k) << 3, 0); + ltl[k][0] = 0; + big[w - 1 - k][0] = 0; + for (i = 1; i < 256; i++) { + ltl[k][i] = q = multmodp(i << 24, p); + big[w - 1 - k][i] = byte_swap(q); + } + } +} +#endif + #else /* !DYNAMIC_CRC_TABLE */ /* ======================================================================== - * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + * Tables for byte-wise and braided CRC-32 calculations, and a table of powers + * of x for combining CRC-32s, all made by make_crc_table(). */ #include "crc32.h" #endif /* DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Routines used for CRC calculation. Some are also required for the table + * generation above. + */ + +/* + Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, + reflected. For speed, this requires that a not be zero. + */ +local z_crc_t multmodp(a, b) + z_crc_t a; + z_crc_t b; +{ + z_crc_t m, p; + + m = (z_crc_t)1 << 31; + p = 0; + for (;;) { + if (a & m) { + p ^= b; + if ((a & (m - 1)) == 0) + break; + } + m >>= 1; + b = b & 1 ? (b >> 1) ^ POLY : b >> 1; + } + return p; +} + +/* + Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been + initialized. + */ +local z_crc_t x2nmodp(n, k) + z_off64_t n; + unsigned k; +{ + z_crc_t p; + + p = (z_crc_t)1 << 31; /* x^0 == 1 */ + while (n) { + if (n & 1) + p = multmodp(x2n_table[k & 31], p); + n >>= 1; + k++; + } + return p; +} + /* ========================================================================= - * This function can be used by asm versions of crc32() + * This function can be used by asm versions of crc32(), and to force the + * generation of the CRC tables in a threaded application. */ const z_crc_t FAR * ZEXPORT get_crc_table() { #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ return (const z_crc_t FAR *)crc_table; } -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 +/* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for + * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will + * only be defined if the compilation specifies an ARM processor architecture + * that has the instructions. For example, compiling with -march=armv8.1-a or + * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 + * instructions. + */ +#ifdef ARMCRC32 + +/* + Constants empirically determined to maximize speed. These values are from + measurements on a Cortex-A57. Your mileage may vary. + */ +#define Z_BATCH 3990 /* number of words in a batch */ +#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ +#define Z_BATCH_MIN 800 /* fewest words in a final batch */ + +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + z_crc_t val; + z_word_t crc1, crc2; + const z_word_t *word; + z_word_t val0, val1, val2; + z_size_t last, last2, i; + z_size_t num; + + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + + /* Pre-condition the CRC */ + crc ^= 0xffffffff; + + /* Compute the CRC up to a word boundary. */ + while (len && ((z_size_t)buf & 7) != 0) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ + word = (z_word_t const *)buf; + num = len >> 3; + len &= 7; + + /* Do three interleaved CRCs to realize the throughput of one crc32x + instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three + CRCs are combined into a single CRC after each set of batches. */ + while (num >= 3 * Z_BATCH) { + crc1 = 0; + crc2 = 0; + for (i = 0; i < Z_BATCH; i++) { + val0 = word[i]; + val1 = word[i + Z_BATCH]; + val2 = word[i + 2 * Z_BATCH]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * Z_BATCH; + num -= 3 * Z_BATCH; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; + } + + /* Do one last smaller batch with the remaining words, if there are enough + to pay for the combination of CRCs. */ + last = num / 3; + if (last >= Z_BATCH_MIN) { + last2 = last << 1; + crc1 = 0; + crc2 = 0; + for (i = 0; i < last; i++) { + val0 = word[i]; + val1 = word[i + last]; + val2 = word[i + last2]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * last; + num -= 3 * last; + val = x2nmodp(last, 6); + crc = multmodp(val, crc) ^ crc1; + crc = multmodp(val, crc) ^ crc2; + } + + /* Compute the CRC on any remaining words. */ + for (i = 0; i < num; i++) { + val0 = word[i]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + } + word += num; + + /* Complete the CRC on any remaining bytes. */ + buf = (const unsigned char FAR *)word; + while (len) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; +} + +#else + +#ifdef W + +/* + Return the CRC of the W bytes in the word_t data, taking the + least-significant byte of the word as the first byte of data, without any pre + or post conditioning. This is used to combine the CRCs of each braid. + */ +local z_crc_t crc_word(data) + z_word_t data; +{ + int k; + for (k = 0; k < W; k++) + data = (data >> 8) ^ crc_table[data & 0xff]; + return (z_crc_t)data; +} + +local z_word_t crc_word_big(data) + z_word_t data; +{ + int k; + for (k = 0; k < W; k++) + data = (data << 8) ^ + crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; + return data; +} + +#endif /* ========================================================================= */ unsigned long ZEXPORT crc32_z(crc, buf, len) @@ -204,35 +741,324 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) const unsigned char FAR *buf; z_size_t len; { - if (buf == Z_NULL) return 0UL; + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - z_crc_t endian; + /* Pre-condition the CRC */ + crc ^= 0xffffffff; +#ifdef W + + /* If provided enough bytes, do a braided CRC calculation. */ + if (len >= N * W + W - 1) { + z_size_t blks; + z_word_t const *words; + unsigned endian; + int k; + + /* Compute the CRC up to a z_word_t boundary. */ + while (len && ((z_size_t)buf & (W - 1)) != 0) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Compute the CRC on as many N z_word_t blocks as are available. */ + blks = len / (N * W); + len -= blks * N * W; + words = (z_word_t const *)buf; + + /* Do endian check at execution time instead of compile time, since ARM + processors can change the endianess at execution time. If the + compiler knows what the endianess will be, it can optimize out the + check and the unused branch. */ endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); + if (*(unsigned char *)&endian) { + /* Little endian. */ + + z_crc_t crc0; + z_word_t word0; +#if N > 1 + z_crc_t crc1; + z_word_t word1; +#if N > 2 + z_crc_t crc2; + z_word_t word2; +#if N > 3 + z_crc_t crc3; + z_word_t word3; +#if N > 4 + z_crc_t crc4; + z_word_t word4; +#if N > 5 + z_crc_t crc5; + z_word_t word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = crc; +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + crc = crc_word(crc0 ^ words[0]); +#if N > 1 + crc = crc_word(crc1 ^ words[1] ^ crc); +#if N > 2 + crc = crc_word(crc2 ^ words[2] ^ crc); +#if N > 3 + crc = crc_word(crc3 ^ words[3] ^ crc); +#if N > 4 + crc = crc_word(crc4 ^ words[4] ^ crc); +#if N > 5 + crc = crc_word(crc5 ^ words[5] ^ crc); +#endif +#endif +#endif +#endif +#endif + words += N; + } + else { + /* Big endian. */ + + z_word_t crc0, word0, comb; +#if N > 1 + z_word_t crc1, word1; +#if N > 2 + z_word_t crc2, word2; +#if N > 3 + z_word_t crc3, word3; +#if N > 4 + z_word_t crc4, word4; +#if N > 5 + z_word_t crc5, word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = byte_swap(crc); +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_big_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_big_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_big_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_big_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_big_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_big_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + comb = crc_word_big(crc0 ^ words[0]); +#if N > 1 + comb = crc_word_big(crc1 ^ words[1] ^ comb); +#if N > 2 + comb = crc_word_big(crc2 ^ words[2] ^ comb); +#if N > 3 + comb = crc_word_big(crc3 ^ words[3] ^ comb); +#if N > 4 + comb = crc_word_big(crc4 ^ words[4] ^ comb); +#if N > 5 + comb = crc_word_big(crc5 ^ words[5] ^ comb); +#endif +#endif +#endif +#endif +#endif + words += N; + crc = byte_swap(comb); + } + + /* + Update the pointer to the remaining bytes to process. + */ + buf = (unsigned char const *)words; } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; + +#endif /* W */ + + /* Complete the computation of the CRC on any remaining bytes. */ while (len >= 8) { - DO8; len -= 8; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; + while (len) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; } +#endif + /* ========================================================================= */ unsigned long ZEXPORT crc32(crc, buf, len) unsigned long crc; @@ -242,186 +1068,16 @@ unsigned long ZEXPORT crc32(crc, buf, len) return crc32_z(crc, buf, len); } -#ifdef BYFOUR - -/* - This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit - integer pointer type. This violates the strict aliasing rule, where a - compiler can assume, for optimization purposes, that two pointers to - fundamentally different types won't ever point to the same memory. This can - manifest as a problem only if one of the pointers is written to. This code - only reads from those pointers. So long as this code remains isolated in - this compilation unit, there won't be a problem. For this reason, this code - should not be copied and pasted into a compilation unit in which other code - writes to the buffer that is passed to these routines. - */ - /* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 - -/* ========================================================================= */ -local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = (z_crc_t)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; -} - -/* ========================================================================= */ -#define DOBIG4 c ^= *buf4++; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 - -/* ========================================================================= */ -local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = ZSWAP32((z_crc_t)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOBIG32; - len -= 32; - } - while (len >= 4) { - DOBIG4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(ZSWAP32(c)); -} - -#endif /* BYFOUR */ - -#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ - -/* ========================================================================= */ -local unsigned long gf2_matrix_times(mat, vec) - unsigned long *mat; - unsigned long vec; -{ - unsigned long sum; - - sum = 0; - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - return sum; -} - -/* ========================================================================= */ -local void gf2_matrix_square(square, mat) - unsigned long *square; - unsigned long *mat; -{ - int n; - - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); -} - -/* ========================================================================= */ -local uLong crc32_combine_(crc1, crc2, len2) +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) uLong crc1; uLong crc2; z_off64_t len2; { - int n; - unsigned long row; - unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ - unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - - /* degenerate case (also disallow negative lengths) */ - if (len2 <= 0) - return crc1; - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ - row = 1; - for (n = 1; n < GF2_DIM; n++) { - odd[n] = row; - row <<= 1; - } - - /* put operator for two zero bits in even */ - gf2_matrix_square(even, odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(odd, even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - do { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - if (len2 == 0) - break; - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - } while (len2 != 0); - - /* return combined crc */ - crc1 ^= crc2; - return crc1; +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return multmodp(x2nmodp(len2, 3), crc1) ^ crc2; } /* ========================================================================= */ @@ -430,13 +1086,31 @@ uLong ZEXPORT crc32_combine(crc1, crc2, len2) uLong crc2; z_off_t len2; { - return crc32_combine_(crc1, crc2, len2); + return crc32_combine64(crc1, crc2, len2); } -uLong ZEXPORT crc32_combine64(crc1, crc2, len2) - uLong crc1; - uLong crc2; +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen64(len2) z_off64_t len2; { - return crc32_combine_(crc1, crc2, len2); +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return x2nmodp(len2, 3); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen(len2) + z_off_t len2; +{ + return crc32_combine_gen64(len2); +} + +/* ========================================================================= */ +uLong crc32_combine_op(crc1, crc2, op) + uLong crc1; + uLong crc2; + uLong op; +{ + return multmodp(op, crc1) ^ crc2; } diff --git a/3rdparty/zlib/crc32.h b/3rdparty/zlib/crc32.h index 9e0c778102..137df68d61 100644 --- a/3rdparty/zlib/crc32.h +++ b/3rdparty/zlib/crc32.h @@ -2,440 +2,9445 @@ * Generated automatically by crc32.c */ -local const z_crc_t FAR crc_table[TBLS][256] = -{ - { - 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, - 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, - 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, - 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, - 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, - 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, - 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, - 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, - 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, - 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, - 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, - 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, - 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, - 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, - 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, - 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, - 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, - 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, - 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, - 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, - 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, - 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, - 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, - 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, - 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, - 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, - 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, - 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, - 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, - 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, - 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, - 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, - 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, - 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, - 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, - 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, - 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, - 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, - 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, - 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, - 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, - 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, - 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, - 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, - 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, - 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, - 0x2d02ef8dUL -#ifdef BYFOUR - }, - { - 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, - 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, - 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, - 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, - 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, - 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, - 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, - 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, - 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, - 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, - 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, - 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, - 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, - 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, - 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, - 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, - 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, - 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, - 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, - 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, - 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, - 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, - 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, - 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, - 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, - 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, - 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, - 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, - 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, - 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, - 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, - 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, - 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, - 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, - 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, - 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, - 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, - 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, - 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, - 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, - 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, - 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, - 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, - 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, - 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, - 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, - 0x9324fd72UL - }, - { - 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, - 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, - 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, - 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, - 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, - 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, - 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, - 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, - 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, - 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, - 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, - 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, - 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, - 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, - 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, - 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, - 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, - 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, - 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, - 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, - 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, - 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, - 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, - 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, - 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, - 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, - 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, - 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, - 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, - 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, - 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, - 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, - 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, - 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, - 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, - 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, - 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, - 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, - 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, - 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, - 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, - 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, - 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, - 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, - 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, - 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, - 0xbe9834edUL - }, - { - 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, - 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, - 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, - 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, - 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, - 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, - 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, - 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, - 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, - 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, - 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, - 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, - 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, - 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, - 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, - 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, - 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, - 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, - 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, - 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, - 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, - 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, - 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, - 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, - 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, - 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, - 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, - 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, - 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, - 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, - 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, - 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, - 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, - 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, - 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, - 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, - 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, - 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, - 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, - 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, - 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, - 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, - 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, - 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, - 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, - 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, - 0xde0506f1UL - }, - { - 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, - 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, - 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, - 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, - 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, - 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, - 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, - 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, - 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, - 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, - 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, - 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, - 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, - 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, - 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, - 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, - 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, - 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, - 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, - 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, - 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, - 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, - 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, - 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, - 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, - 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, - 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, - 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, - 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, - 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, - 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, - 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, - 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, - 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, - 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, - 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, - 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, - 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, - 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, - 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, - 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, - 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, - 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, - 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, - 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, - 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, - 0x8def022dUL - }, - { - 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, - 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, - 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, - 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, - 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, - 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, - 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, - 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, - 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, - 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, - 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, - 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, - 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, - 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, - 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, - 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, - 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, - 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, - 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, - 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, - 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, - 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, - 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, - 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, - 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, - 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, - 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, - 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, - 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, - 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, - 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, - 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, - 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, - 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, - 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, - 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, - 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, - 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, - 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, - 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, - 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, - 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, - 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, - 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, - 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, - 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, - 0x72fd2493UL - }, - { - 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, - 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, - 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, - 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, - 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, - 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, - 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, - 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, - 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, - 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, - 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, - 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, - 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, - 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, - 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, - 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, - 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, - 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, - 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, - 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, - 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, - 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, - 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, - 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, - 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, - 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, - 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, - 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, - 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, - 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, - 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, - 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, - 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, - 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, - 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, - 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, - 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, - 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, - 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, - 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, - 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, - 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, - 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, - 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, - 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, - 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, - 0xed3498beUL - }, - { - 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, - 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, - 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, - 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, - 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, - 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, - 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, - 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, - 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, - 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, - 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, - 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, - 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, - 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, - 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, - 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, - 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, - 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, - 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, - 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, - 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, - 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, - 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, - 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, - 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, - 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, - 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, - 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, - 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, - 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, - 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, - 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, - 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, - 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, - 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, - 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, - 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, - 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, - 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, - 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, - 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, - 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, - 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, - 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, - 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, - 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, - 0xf10605deUL +local const z_crc_t FAR crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}; + +#ifdef W + +#if W == 8 + +local const z_word_t FAR crc_big_table[] = { + 0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}; + +#else /* W == 4 */ + +local const z_word_t FAR crc_big_table[] = { + 0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}; + #endif - } -}; + +#if N == 1 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}, + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}, + {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000, + 0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000, + 0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000, + 0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000, + 0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000, + 0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000, + 0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000, + 0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000, + 0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000, + 0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000, + 0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000, + 0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000, + 0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000, + 0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000, + 0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000, + 0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000, + 0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000, + 0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000, + 0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000, + 0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000, + 0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000, + 0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000, + 0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000, + 0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000, + 0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000, + 0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000, + 0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000, + 0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000, + 0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000, + 0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000, + 0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000, + 0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000, + 0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000, + 0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000, + 0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000, + 0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000, + 0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000, + 0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000, + 0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000, + 0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000, + 0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000, + 0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000, + 0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000, + 0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000, + 0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000, + 0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000, + 0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000, + 0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000, + 0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000, + 0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000, + 0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000, + 0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000, + 0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000, + 0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000, + 0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000, + 0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000, + 0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000, + 0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000, + 0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000, + 0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000, + 0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000, + 0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000, + 0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000, + 0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000, + 0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000, + 0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000, + 0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000, + 0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000, + 0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000, + 0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000, + 0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000, + 0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000, + 0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000, + 0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000, + 0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000, + 0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000, + 0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000, + 0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000, + 0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000, + 0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000, + 0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000, + 0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000, + 0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000, + 0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000, + 0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000, + 0x72fd249300000000}, + {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000, + 0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000, + 0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000, + 0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000, + 0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000, + 0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000, + 0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000, + 0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000, + 0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000, + 0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000, + 0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000, + 0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000, + 0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000, + 0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000, + 0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000, + 0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000, + 0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000, + 0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000, + 0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000, + 0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000, + 0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000, + 0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000, + 0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000, + 0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000, + 0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000, + 0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000, + 0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000, + 0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000, + 0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000, + 0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000, + 0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000, + 0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000, + 0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000, + 0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000, + 0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000, + 0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000, + 0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000, + 0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000, + 0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000, + 0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000, + 0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000, + 0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000, + 0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000, + 0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000, + 0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000, + 0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000, + 0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000, + 0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000, + 0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000, + 0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000, + 0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000, + 0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000, + 0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000, + 0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000, + 0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000, + 0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000, + 0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000, + 0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000, + 0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000, + 0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000, + 0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000, + 0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000, + 0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000, + 0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000, + 0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000, + 0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000, + 0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000, + 0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000, + 0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000, + 0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000, + 0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000, + 0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000, + 0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000, + 0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000, + 0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000, + 0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000, + 0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000, + 0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000, + 0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000, + 0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000, + 0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000, + 0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000, + 0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000, + 0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000, + 0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000, + 0xed3498be00000000}, + {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000, + 0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000, + 0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000, + 0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000, + 0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000, + 0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000, + 0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000, + 0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000, + 0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000, + 0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000, + 0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000, + 0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000, + 0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000, + 0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000, + 0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000, + 0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000, + 0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000, + 0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000, + 0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000, + 0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000, + 0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000, + 0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000, + 0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000, + 0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000, + 0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000, + 0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000, + 0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000, + 0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000, + 0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000, + 0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000, + 0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000, + 0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000, + 0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000, + 0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000, + 0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000, + 0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000, + 0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000, + 0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000, + 0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000, + 0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000, + 0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000, + 0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000, + 0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000, + 0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000, + 0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000, + 0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000, + 0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000, + 0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000, + 0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000, + 0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000, + 0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000, + 0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000, + 0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000, + 0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000, + 0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000, + 0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000, + 0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000, + 0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000, + 0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000, + 0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000, + 0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000, + 0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000, + 0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000, + 0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000, + 0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000, + 0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000, + 0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000, + 0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000, + 0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000, + 0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000, + 0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000, + 0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000, + 0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000, + 0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000, + 0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000, + 0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000, + 0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000, + 0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000, + 0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000, + 0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000, + 0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000, + 0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000, + 0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000, + 0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000, + 0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000, + 0xf10605de00000000}, + {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000, + 0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000, + 0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000, + 0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000, + 0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000, + 0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000, + 0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000, + 0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000, + 0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000, + 0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000, + 0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000, + 0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000, + 0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000, + 0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000, + 0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000, + 0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000, + 0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000, + 0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000, + 0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000, + 0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000, + 0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000, + 0x572f712300000000, 0x4958f35800000000, 0xf971936500000000, + 0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000, + 0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000, + 0x8813836800000000, 0x383ae35500000000, 0xe840431200000000, + 0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000, + 0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000, + 0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000, + 0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000, + 0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000, + 0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000, + 0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000, + 0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000, + 0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000, + 0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000, + 0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000, + 0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000, + 0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000, + 0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000, + 0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000, + 0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000, + 0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000, + 0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000, + 0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000, + 0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000, + 0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000, + 0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000, + 0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000, + 0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000, + 0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000, + 0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000, + 0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000, + 0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000, + 0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000, + 0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000, + 0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000, + 0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000, + 0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000, + 0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000, + 0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000, + 0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000, + 0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000, + 0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000, + 0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000, + 0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000, + 0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000, + 0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000, + 0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000, + 0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000, + 0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000, + 0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000, + 0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000, + 0x983485b900000000, 0x281de58400000000, 0xf86745c300000000, + 0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000, + 0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000, + 0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000, + 0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000, + 0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000, + 0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000, + 0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000, + 0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000, + 0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000, + 0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000, + 0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000, + 0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000, + 0x8cc764ca00000000}, + {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000, + 0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000, + 0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000, + 0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000, + 0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000, + 0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000, + 0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000, + 0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000, + 0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000, + 0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000, + 0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000, + 0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000, + 0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000, + 0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000, + 0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000, + 0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000, + 0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000, + 0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000, + 0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000, + 0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000, + 0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000, + 0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000, + 0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000, + 0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000, + 0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000, + 0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000, + 0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000, + 0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000, + 0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000, + 0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000, + 0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000, + 0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000, + 0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000, + 0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000, + 0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000, + 0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000, + 0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000, + 0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000, + 0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000, + 0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000, + 0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000, + 0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000, + 0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000, + 0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000, + 0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000, + 0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000, + 0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000, + 0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000, + 0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000, + 0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000, + 0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000, + 0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000, + 0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000, + 0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000, + 0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000, + 0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000, + 0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000, + 0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000, + 0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000, + 0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000, + 0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000, + 0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000, + 0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000, + 0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000, + 0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000, + 0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000, + 0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000, + 0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000, + 0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000, + 0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000, + 0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000, + 0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000, + 0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000, + 0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000, + 0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000, + 0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000, + 0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000, + 0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000, + 0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000, + 0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000, + 0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000, + 0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000, + 0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000, + 0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000, + 0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000, + 0xccabc4e400000000}, + {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000, + 0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000, + 0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000, + 0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000, + 0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000, + 0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000, + 0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000, + 0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000, + 0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000, + 0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000, + 0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000, + 0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000, + 0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000, + 0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000, + 0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000, + 0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000, + 0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000, + 0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000, + 0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000, + 0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000, + 0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000, + 0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000, + 0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000, + 0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000, + 0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000, + 0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000, + 0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000, + 0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000, + 0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000, + 0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000, + 0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000, + 0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000, + 0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000, + 0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000, + 0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000, + 0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000, + 0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000, + 0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000, + 0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000, + 0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000, + 0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000, + 0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000, + 0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000, + 0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000, + 0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000, + 0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000, + 0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000, + 0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000, + 0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000, + 0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000, + 0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000, + 0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000, + 0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000, + 0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000, + 0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000, + 0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000, + 0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000, + 0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000, + 0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000, + 0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000, + 0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000, + 0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000, + 0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000, + 0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000, + 0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000, + 0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000, + 0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000, + 0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000, + 0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000, + 0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000, + 0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000, + 0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000, + 0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000, + 0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000, + 0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000, + 0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000, + 0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000, + 0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000, + 0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000, + 0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000, + 0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000, + 0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000, + 0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000, + 0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000, + 0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000, + 0x304a369200000000}, + {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000, + 0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000, + 0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000, + 0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000, + 0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000, + 0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000, + 0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000, + 0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000, + 0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000, + 0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000, + 0x1923316900000000, 0x87239ba500000000, 0x566276f900000000, + 0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000, + 0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000, + 0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000, + 0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000, + 0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000, + 0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000, + 0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000, + 0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000, + 0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000, + 0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000, + 0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000, + 0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000, + 0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000, + 0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000, + 0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000, + 0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000, + 0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000, + 0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000, + 0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000, + 0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000, + 0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000, + 0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000, + 0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000, + 0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000, + 0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000, + 0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000, + 0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000, + 0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000, + 0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000, + 0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000, + 0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000, + 0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000, + 0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000, + 0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000, + 0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000, + 0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000, + 0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000, + 0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000, + 0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000, + 0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000, + 0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000, + 0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000, + 0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000, + 0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000, + 0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000, + 0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000, + 0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000, + 0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000, + 0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000, + 0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000, + 0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000, + 0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000, + 0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000, + 0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000, + 0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000, + 0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000, + 0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000, + 0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000, + 0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000, + 0x6171384400000000, 0xff71928800000000, 0xe678578200000000, + 0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000, + 0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000, + 0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000, + 0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000, + 0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000, + 0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000, + 0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000, + 0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000, + 0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000, + 0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000, + 0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000, + 0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000, + 0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000, + 0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000, + 0xe6064b2600000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}, + {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, + 0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1, + 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, + 0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, + 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82, + 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, + 0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7, + 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da, + 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, + 0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6, + 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, + 0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, + 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0, + 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, + 0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, + 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177, + 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, + 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b, + 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, + 0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, + 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad, + 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, + 0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d, + 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60, + 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, + 0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822, + 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, + 0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, + 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171, + 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, + 0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b, + 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6, + 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, + 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f, + 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, + 0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, + 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c, + 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, + 0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, + 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84, + 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, + 0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398, + 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, + 0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, + 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e, + 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, + 0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4, + 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9, + 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, + 0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5, + 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, + 0x72fd2493}, + {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, + 0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f, + 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, + 0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, + 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512, + 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, + 0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b, + 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d, + 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, + 0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925, + 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, + 0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, + 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70, + 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, + 0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d, + 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b, + 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, + 0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63, + 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, + 0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, + 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446, + 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, + 0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557, + 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51, + 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, + 0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0, + 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, + 0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, + 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd, + 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, + 0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0, + 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6, + 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, + 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6, + 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, + 0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, + 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb, + 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, + 0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92, + 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094, + 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, + 0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c, + 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, + 0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, + 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9, + 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, + 0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4, + 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2, + 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, + 0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba, + 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, + 0xed3498be}, + {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, + 0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d, + 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, + 0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, + 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95, + 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, + 0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, + 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d, + 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, + 0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748, + 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, + 0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, + 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b, + 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, + 0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, + 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3, + 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, + 0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6, + 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, + 0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, + 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe, + 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, + 0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1, + 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6, + 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, + 0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555, + 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, + 0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, + 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd, + 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, + 0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, + 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5, + 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, + 0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330, + 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, + 0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, + 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8, + 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, + 0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, + 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb, + 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, + 0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e, + 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, + 0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, + 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6, + 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, + 0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, + 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e, + 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, + 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb, + 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, + 0xf10605de}}; + +#endif + +#endif + +#if N == 2 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}, + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000, + 0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000, + 0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000, + 0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000, + 0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000, + 0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000, + 0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000, + 0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000, + 0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000, + 0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000, + 0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000, + 0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000, + 0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000, + 0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000, + 0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000, + 0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000, + 0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000, + 0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000, + 0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000, + 0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000, + 0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000, + 0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000, + 0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000, + 0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000, + 0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000, + 0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000, + 0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000, + 0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000, + 0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000, + 0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000, + 0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000, + 0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000, + 0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000, + 0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000, + 0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000, + 0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000, + 0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000, + 0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000, + 0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000, + 0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000, + 0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000, + 0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000, + 0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000, + 0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000, + 0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000, + 0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000, + 0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000, + 0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000, + 0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000, + 0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000, + 0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000, + 0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000, + 0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000, + 0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000, + 0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000, + 0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000, + 0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000, + 0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000, + 0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000, + 0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000, + 0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000, + 0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000, + 0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000, + 0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000, + 0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000, + 0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000, + 0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000, + 0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000, + 0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000, + 0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000, + 0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000, + 0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000, + 0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000, + 0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000, + 0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000, + 0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000, + 0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000, + 0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000, + 0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000, + 0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000, + 0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000, + 0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000, + 0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000, + 0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000, + 0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000, + 0x4b0c4f4900000000}, + {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000, + 0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000, + 0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000, + 0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000, + 0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000, + 0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000, + 0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000, + 0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000, + 0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000, + 0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000, + 0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000, + 0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000, + 0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000, + 0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000, + 0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000, + 0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000, + 0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000, + 0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000, + 0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000, + 0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000, + 0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000, + 0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000, + 0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000, + 0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000, + 0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000, + 0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000, + 0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000, + 0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000, + 0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000, + 0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000, + 0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000, + 0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000, + 0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000, + 0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000, + 0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000, + 0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000, + 0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000, + 0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000, + 0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000, + 0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000, + 0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000, + 0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000, + 0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000, + 0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000, + 0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000, + 0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000, + 0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000, + 0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000, + 0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000, + 0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000, + 0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000, + 0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000, + 0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000, + 0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000, + 0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000, + 0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000, + 0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000, + 0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000, + 0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000, + 0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000, + 0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000, + 0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000, + 0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000, + 0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000, + 0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000, + 0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000, + 0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000, + 0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000, + 0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000, + 0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000, + 0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000, + 0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000, + 0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000, + 0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000, + 0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000, + 0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000, + 0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000, + 0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000, + 0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000, + 0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000, + 0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000, + 0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000, + 0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000, + 0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000, + 0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000, + 0x14d747e100000000}, + {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000, + 0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000, + 0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000, + 0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000, + 0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000, + 0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000, + 0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000, + 0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000, + 0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000, + 0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000, + 0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000, + 0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000, + 0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000, + 0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000, + 0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000, + 0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000, + 0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000, + 0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000, + 0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000, + 0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000, + 0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000, + 0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000, + 0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000, + 0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000, + 0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000, + 0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000, + 0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000, + 0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000, + 0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000, + 0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000, + 0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000, + 0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000, + 0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000, + 0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000, + 0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000, + 0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000, + 0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000, + 0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000, + 0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000, + 0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000, + 0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000, + 0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000, + 0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000, + 0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000, + 0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000, + 0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000, + 0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000, + 0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000, + 0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000, + 0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000, + 0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000, + 0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000, + 0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000, + 0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000, + 0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000, + 0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000, + 0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000, + 0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000, + 0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000, + 0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000, + 0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000, + 0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000, + 0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000, + 0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000, + 0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000, + 0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000, + 0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000, + 0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000, + 0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000, + 0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000, + 0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000, + 0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000, + 0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000, + 0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000, + 0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000, + 0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000, + 0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000, + 0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000, + 0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000, + 0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000, + 0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000, + 0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000, + 0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000, + 0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000, + 0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000, + 0xaa933b1a00000000}, + {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000, + 0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000, + 0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000, + 0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000, + 0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000, + 0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000, + 0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000, + 0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000, + 0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000, + 0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000, + 0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000, + 0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000, + 0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000, + 0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000, + 0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000, + 0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000, + 0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000, + 0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000, + 0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000, + 0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000, + 0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000, + 0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000, + 0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000, + 0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000, + 0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000, + 0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000, + 0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000, + 0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000, + 0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000, + 0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000, + 0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000, + 0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000, + 0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000, + 0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000, + 0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000, + 0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000, + 0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000, + 0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000, + 0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000, + 0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000, + 0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000, + 0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000, + 0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000, + 0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000, + 0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000, + 0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000, + 0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000, + 0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000, + 0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000, + 0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000, + 0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000, + 0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000, + 0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000, + 0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000, + 0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000, + 0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000, + 0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000, + 0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000, + 0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000, + 0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000, + 0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000, + 0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000, + 0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000, + 0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000, + 0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000, + 0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000, + 0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000, + 0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000, + 0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000, + 0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000, + 0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000, + 0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000, + 0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000, + 0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000, + 0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000, + 0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000, + 0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000, + 0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000, + 0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000, + 0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000, + 0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000, + 0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000, + 0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000, + 0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000, + 0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000, + 0x6571193600000000}, + {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000, + 0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000, + 0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000, + 0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000, + 0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000, + 0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000, + 0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000, + 0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000, + 0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000, + 0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000, + 0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000, + 0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000, + 0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000, + 0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000, + 0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000, + 0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000, + 0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000, + 0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000, + 0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000, + 0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000, + 0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000, + 0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000, + 0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000, + 0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000, + 0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000, + 0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000, + 0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000, + 0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000, + 0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000, + 0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000, + 0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000, + 0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000, + 0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000, + 0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000, + 0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000, + 0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000, + 0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000, + 0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000, + 0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000, + 0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000, + 0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000, + 0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000, + 0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000, + 0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000, + 0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000, + 0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000, + 0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000, + 0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000, + 0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000, + 0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000, + 0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000, + 0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000, + 0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000, + 0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000, + 0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000, + 0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000, + 0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000, + 0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000, + 0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000, + 0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000, + 0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000, + 0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000, + 0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000, + 0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000, + 0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000, + 0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000, + 0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000, + 0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000, + 0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000, + 0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000, + 0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000, + 0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000, + 0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000, + 0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000, + 0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000, + 0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000, + 0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000, + 0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000, + 0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000, + 0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000, + 0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000, + 0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000, + 0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000, + 0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000, + 0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000, + 0xa68cee3d00000000}, + {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000, + 0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000, + 0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000, + 0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000, + 0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000, + 0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000, + 0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000, + 0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000, + 0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000, + 0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000, + 0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000, + 0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000, + 0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000, + 0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000, + 0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000, + 0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000, + 0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000, + 0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000, + 0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000, + 0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000, + 0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000, + 0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000, + 0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000, + 0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000, + 0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000, + 0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000, + 0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000, + 0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000, + 0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000, + 0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000, + 0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000, + 0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000, + 0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000, + 0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000, + 0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000, + 0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000, + 0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000, + 0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000, + 0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000, + 0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000, + 0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000, + 0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000, + 0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000, + 0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000, + 0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000, + 0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000, + 0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000, + 0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000, + 0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000, + 0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000, + 0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000, + 0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000, + 0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000, + 0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000, + 0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000, + 0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000, + 0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000, + 0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000, + 0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000, + 0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000, + 0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000, + 0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000, + 0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000, + 0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000, + 0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000, + 0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000, + 0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000, + 0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000, + 0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000, + 0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000, + 0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000, + 0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000, + 0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000, + 0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000, + 0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000, + 0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000, + 0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000, + 0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000, + 0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000, + 0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000, + 0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000, + 0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000, + 0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000, + 0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000, + 0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000, + 0x51e8883f00000000}, + {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000, + 0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000, + 0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000, + 0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000, + 0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000, + 0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000, + 0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000, + 0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000, + 0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000, + 0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000, + 0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000, + 0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000, + 0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000, + 0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000, + 0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000, + 0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000, + 0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000, + 0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000, + 0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000, + 0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000, + 0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000, + 0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000, + 0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000, + 0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000, + 0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000, + 0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000, + 0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000, + 0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000, + 0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000, + 0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000, + 0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000, + 0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000, + 0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000, + 0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000, + 0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000, + 0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000, + 0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000, + 0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000, + 0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000, + 0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000, + 0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000, + 0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000, + 0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000, + 0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000, + 0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000, + 0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000, + 0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000, + 0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000, + 0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000, + 0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000, + 0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000, + 0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000, + 0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000, + 0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000, + 0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000, + 0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000, + 0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000, + 0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000, + 0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000, + 0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000, + 0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000, + 0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000, + 0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000, + 0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000, + 0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000, + 0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000, + 0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000, + 0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000, + 0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000, + 0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000, + 0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000, + 0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000, + 0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000, + 0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000, + 0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000, + 0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000, + 0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000, + 0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000, + 0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000, + 0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000, + 0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000, + 0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000, + 0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000, + 0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000, + 0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000, + 0x8ae9531c00000000}, + {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000, + 0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000, + 0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000, + 0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000, + 0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000, + 0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000, + 0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000, + 0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000, + 0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000, + 0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000, + 0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000, + 0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000, + 0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000, + 0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000, + 0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000, + 0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000, + 0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000, + 0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000, + 0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000, + 0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000, + 0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000, + 0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000, + 0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000, + 0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000, + 0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000, + 0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000, + 0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000, + 0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000, + 0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000, + 0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000, + 0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000, + 0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000, + 0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000, + 0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000, + 0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000, + 0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000, + 0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000, + 0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000, + 0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000, + 0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000, + 0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000, + 0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000, + 0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000, + 0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000, + 0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000, + 0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000, + 0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000, + 0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000, + 0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000, + 0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000, + 0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000, + 0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000, + 0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000, + 0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000, + 0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000, + 0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000, + 0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000, + 0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000, + 0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000, + 0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000, + 0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000, + 0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000, + 0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000, + 0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000, + 0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000, + 0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000, + 0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000, + 0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000, + 0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000, + 0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000, + 0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000, + 0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000, + 0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000, + 0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000, + 0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000, + 0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000, + 0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000, + 0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000, + 0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000, + 0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000, + 0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000, + 0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000, + 0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000, + 0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000, + 0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000, + 0xd739710d00000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, + 0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d, + 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, + 0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, + 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050, + 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, + 0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb, + 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173, + 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, + 0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c, + 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, + 0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, + 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358, + 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, + 0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312, + 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da, + 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, + 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335, + 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, + 0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, + 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9, + 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, + 0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, + 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b, + 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, + 0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c, + 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, + 0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, + 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1, + 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, + 0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b, + 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653, + 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, + 0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d, + 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, + 0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, + 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720, + 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, + 0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, + 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b, + 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, + 0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4, + 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, + 0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, + 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428, + 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, + 0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462, + 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa, + 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, + 0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445, + 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, + 0x8cc764ca}, + {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, + 0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27, + 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, + 0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, + 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef, + 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, + 0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a, + 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a, + 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, + 0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf, + 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, + 0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, + 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f, + 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, + 0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae, + 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe, + 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, + 0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b, + 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, + 0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, + 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4, + 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, + 0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46, + 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716, + 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, + 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5, + 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, + 0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, + 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d, + 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, + 0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc, + 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec, + 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, + 0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e, + 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, + 0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, + 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6, + 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, + 0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c, + 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c, + 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, + 0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9, + 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, + 0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, + 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416, + 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, + 0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, + 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7, + 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, + 0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32, + 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, + 0xccabc4e4}, + {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, + 0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895, + 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, + 0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, + 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154, + 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, + 0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258, + 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a, + 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, + 0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e, + 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, + 0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, + 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44, + 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, + 0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0, + 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2, + 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, + 0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6, + 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, + 0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, + 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8, + 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, + 0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, + 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29, + 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, + 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e, + 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, + 0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, + 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef, + 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, + 0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b, + 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019, + 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, + 0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2, + 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, + 0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, + 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13, + 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, + 0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, + 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09, + 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, + 0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d, + 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, + 0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, + 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003, + 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, + 0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7, + 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5, + 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, + 0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1, + 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, + 0x304a3692}, + {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, + 0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f, + 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, + 0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, + 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf, + 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, + 0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb, + 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3, + 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, + 0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749, + 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, + 0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, + 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29, + 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, + 0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8, + 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0, + 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, + 0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a, + 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, + 0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, + 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03, + 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, + 0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee, + 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6, + 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, + 0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f, + 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, + 0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, + 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f, + 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, + 0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be, + 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6, + 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, + 0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5, + 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, + 0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, + 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305, + 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, + 0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338, + 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370, + 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, + 0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca, + 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, + 0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, + 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083, + 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, + 0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012, + 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a, + 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, + 0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0, + 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea, + 0xe6064b26}}; + +#endif + +#endif + +#if N == 3 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}, + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000, + 0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000, + 0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000, + 0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000, + 0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000, + 0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000, + 0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000, + 0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000, + 0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000, + 0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000, + 0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000, + 0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000, + 0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000, + 0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000, + 0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000, + 0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000, + 0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000, + 0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000, + 0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000, + 0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000, + 0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000, + 0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000, + 0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000, + 0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000, + 0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000, + 0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000, + 0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000, + 0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000, + 0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000, + 0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000, + 0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000, + 0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000, + 0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000, + 0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000, + 0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000, + 0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000, + 0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000, + 0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000, + 0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000, + 0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000, + 0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000, + 0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000, + 0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000, + 0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000, + 0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000, + 0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000, + 0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000, + 0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000, + 0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000, + 0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000, + 0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000, + 0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000, + 0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000, + 0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000, + 0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000, + 0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000, + 0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000, + 0x08eda52100000000, 0x4391370100000000, 0x005a918600000000, + 0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000, + 0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000, + 0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000, + 0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000, + 0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000, + 0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000, + 0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000, + 0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000, + 0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000, + 0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000, + 0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000, + 0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000, + 0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000, + 0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000, + 0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000, + 0x7b23114500000000, 0x305f836500000000, 0x739425e200000000, + 0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000, + 0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000, + 0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000, + 0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000, + 0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000, + 0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000, + 0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000, + 0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000, + 0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000, + 0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000, + 0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000, + 0x4e36ba1800000000}, + {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000, + 0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000, + 0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000, + 0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000, + 0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000, + 0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000, + 0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000, + 0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000, + 0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000, + 0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000, + 0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000, + 0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000, + 0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000, + 0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000, + 0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000, + 0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000, + 0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000, + 0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000, + 0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000, + 0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000, + 0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000, + 0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000, + 0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000, + 0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000, + 0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000, + 0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000, + 0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000, + 0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000, + 0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000, + 0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000, + 0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000, + 0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000, + 0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000, + 0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000, + 0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000, + 0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000, + 0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000, + 0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000, + 0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000, + 0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000, + 0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000, + 0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000, + 0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000, + 0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000, + 0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000, + 0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000, + 0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000, + 0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000, + 0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000, + 0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000, + 0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000, + 0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000, + 0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000, + 0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000, + 0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000, + 0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000, + 0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000, + 0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000, + 0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000, + 0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000, + 0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000, + 0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000, + 0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000, + 0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000, + 0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000, + 0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000, + 0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000, + 0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000, + 0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000, + 0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000, + 0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000, + 0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000, + 0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000, + 0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000, + 0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000, + 0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000, + 0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000, + 0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000, + 0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000, + 0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000, + 0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000, + 0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000, + 0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000, + 0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000, + 0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000, + 0xa1d67c9100000000}, + {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000, + 0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000, + 0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000, + 0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000, + 0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000, + 0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000, + 0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000, + 0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000, + 0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000, + 0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000, + 0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000, + 0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000, + 0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000, + 0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000, + 0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000, + 0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000, + 0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000, + 0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000, + 0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000, + 0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000, + 0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000, + 0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000, + 0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000, + 0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000, + 0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000, + 0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000, + 0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000, + 0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000, + 0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000, + 0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000, + 0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000, + 0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000, + 0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000, + 0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000, + 0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000, + 0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000, + 0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000, + 0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000, + 0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000, + 0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000, + 0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000, + 0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000, + 0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000, + 0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000, + 0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000, + 0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000, + 0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000, + 0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000, + 0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000, + 0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000, + 0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000, + 0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000, + 0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000, + 0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000, + 0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000, + 0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000, + 0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000, + 0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000, + 0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000, + 0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000, + 0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000, + 0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000, + 0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000, + 0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000, + 0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000, + 0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000, + 0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000, + 0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000, + 0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000, + 0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000, + 0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000, + 0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000, + 0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000, + 0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000, + 0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000, + 0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000, + 0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000, + 0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000, + 0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000, + 0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000, + 0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000, + 0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000, + 0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000, + 0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000, + 0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000, + 0xa8ef40a100000000}, + {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000, + 0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000, + 0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000, + 0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000, + 0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000, + 0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000, + 0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000, + 0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000, + 0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000, + 0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000, + 0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000, + 0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000, + 0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000, + 0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000, + 0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000, + 0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000, + 0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000, + 0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000, + 0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000, + 0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000, + 0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000, + 0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000, + 0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000, + 0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000, + 0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000, + 0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000, + 0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000, + 0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000, + 0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000, + 0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000, + 0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000, + 0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000, + 0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000, + 0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000, + 0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000, + 0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000, + 0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000, + 0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000, + 0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000, + 0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000, + 0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000, + 0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000, + 0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000, + 0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000, + 0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000, + 0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000, + 0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000, + 0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000, + 0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000, + 0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000, + 0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000, + 0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000, + 0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000, + 0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000, + 0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000, + 0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000, + 0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000, + 0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000, + 0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000, + 0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000, + 0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000, + 0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000, + 0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000, + 0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000, + 0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000, + 0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000, + 0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000, + 0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000, + 0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000, + 0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000, + 0x933d017400000000, 0xd506661100000000, 0x46a022f000000000, + 0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000, + 0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000, + 0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000, + 0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000, + 0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000, + 0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000, + 0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000, + 0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000, + 0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000, + 0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000, + 0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000, + 0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000, + 0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000, + 0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000, + 0x356bacd800000000}, + {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000, + 0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000, + 0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000, + 0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000, + 0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000, + 0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000, + 0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000, + 0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000, + 0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000, + 0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000, + 0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000, + 0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000, + 0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000, + 0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000, + 0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000, + 0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000, + 0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000, + 0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000, + 0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000, + 0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000, + 0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000, + 0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000, + 0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000, + 0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000, + 0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000, + 0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000, + 0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000, + 0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000, + 0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000, + 0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000, + 0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000, + 0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000, + 0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000, + 0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000, + 0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000, + 0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000, + 0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000, + 0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000, + 0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000, + 0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000, + 0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000, + 0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000, + 0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000, + 0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000, + 0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000, + 0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000, + 0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000, + 0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000, + 0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000, + 0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000, + 0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000, + 0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000, + 0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000, + 0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000, + 0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000, + 0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000, + 0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000, + 0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000, + 0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000, + 0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000, + 0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000, + 0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000, + 0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000, + 0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000, + 0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000, + 0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000, + 0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000, + 0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000, + 0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000, + 0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000, + 0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000, + 0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000, + 0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000, + 0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000, + 0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000, + 0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000, + 0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000, + 0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000, + 0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000, + 0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000, + 0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000, + 0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000, + 0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000, + 0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000, + 0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000, + 0x48686b5600000000}, + {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000, + 0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000, + 0x805af17200000000, 0x403ed96500000000, 0x002643b900000000, + 0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000, + 0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000, + 0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000, + 0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000, + 0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000, + 0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000, + 0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000, + 0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000, + 0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000, + 0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000, + 0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000, + 0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000, + 0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000, + 0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000, + 0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000, + 0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000, + 0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000, + 0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000, + 0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000, + 0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000, + 0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000, + 0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000, + 0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000, + 0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000, + 0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000, + 0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000, + 0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000, + 0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000, + 0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000, + 0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000, + 0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000, + 0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000, + 0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000, + 0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000, + 0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000, + 0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000, + 0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000, + 0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000, + 0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000, + 0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000, + 0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000, + 0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000, + 0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000, + 0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000, + 0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000, + 0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000, + 0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000, + 0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000, + 0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000, + 0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000, + 0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000, + 0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000, + 0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000, + 0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000, + 0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000, + 0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000, + 0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000, + 0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000, + 0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000, + 0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000, + 0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000, + 0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000, + 0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000, + 0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000, + 0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000, + 0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000, + 0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000, + 0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000, + 0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000, + 0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000, + 0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000, + 0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000, + 0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000, + 0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000, + 0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000, + 0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000, + 0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000, + 0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000, + 0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000, + 0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000, + 0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000, + 0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000, + 0xcaa2517800000000}, + {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000, + 0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000, + 0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000, + 0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000, + 0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000, + 0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000, + 0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000, + 0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000, + 0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000, + 0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000, + 0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000, + 0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000, + 0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000, + 0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000, + 0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000, + 0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000, + 0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000, + 0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000, + 0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000, + 0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000, + 0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000, + 0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000, + 0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000, + 0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000, + 0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000, + 0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000, + 0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000, + 0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000, + 0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000, + 0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000, + 0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000, + 0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000, + 0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000, + 0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000, + 0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000, + 0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000, + 0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000, + 0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000, + 0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000, + 0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000, + 0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000, + 0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000, + 0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000, + 0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000, + 0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000, + 0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000, + 0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000, + 0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000, + 0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000, + 0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000, + 0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000, + 0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000, + 0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000, + 0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000, + 0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000, + 0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000, + 0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000, + 0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000, + 0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000, + 0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000, + 0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000, + 0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000, + 0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000, + 0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000, + 0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000, + 0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000, + 0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000, + 0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000, + 0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000, + 0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000, + 0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000, + 0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000, + 0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000, + 0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000, + 0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000, + 0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000, + 0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000, + 0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000, + 0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000, + 0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000, + 0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000, + 0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000, + 0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000, + 0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000, + 0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000, + 0x0c7ac97b00000000}, + {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000, + 0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000, + 0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000, + 0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000, + 0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000, + 0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000, + 0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000, + 0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000, + 0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000, + 0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000, + 0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000, + 0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000, + 0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000, + 0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000, + 0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000, + 0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000, + 0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000, + 0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000, + 0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000, + 0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000, + 0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000, + 0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000, + 0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000, + 0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000, + 0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000, + 0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000, + 0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000, + 0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000, + 0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000, + 0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000, + 0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000, + 0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000, + 0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000, + 0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000, + 0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000, + 0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000, + 0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000, + 0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000, + 0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000, + 0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000, + 0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000, + 0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000, + 0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000, + 0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000, + 0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000, + 0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000, + 0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000, + 0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000, + 0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000, + 0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000, + 0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000, + 0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000, + 0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000, + 0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000, + 0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000, + 0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000, + 0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000, + 0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000, + 0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000, + 0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000, + 0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000, + 0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000, + 0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000, + 0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000, + 0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000, + 0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000, + 0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000, + 0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000, + 0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000, + 0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000, + 0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000, + 0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000, + 0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000, + 0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000, + 0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000, + 0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000, + 0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000, + 0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000, + 0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000, + 0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000, + 0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000, + 0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000, + 0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000, + 0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000, + 0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000, + 0x5185cd0900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d, + 0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac, + 0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8, + 0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95, + 0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817, + 0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d, + 0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac, + 0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6, + 0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564, + 0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39, + 0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d, + 0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac, + 0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de, + 0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594, + 0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b, + 0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01, + 0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f, + 0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de, + 0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba, + 0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65, + 0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7, + 0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad, + 0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de, + 0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294, + 0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716, + 0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71, + 0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15, + 0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4, + 0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca, + 0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280, + 0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f, + 0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15, + 0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9, + 0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748, + 0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c, + 0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971, + 0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3, + 0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9, + 0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196, + 0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc, + 0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e, + 0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03, + 0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67, + 0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296, + 0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a, + 0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170, + 0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af, + 0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5, + 0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb, + 0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a, + 0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e, + 0x4b0c4f49}, + {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09, + 0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc, + 0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e, + 0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc, + 0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934, + 0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2, + 0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b, + 0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad, + 0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155, + 0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187, + 0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65, + 0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390, + 0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e, + 0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378, + 0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889, + 0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f, + 0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0, + 0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145, + 0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7, + 0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a, + 0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2, + 0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924, + 0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2, + 0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514, + 0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec, + 0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709, + 0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb, + 0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e, + 0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1, + 0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227, + 0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6, + 0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030, + 0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0, + 0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55, + 0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7, + 0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165, + 0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d, + 0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b, + 0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c, + 0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a, + 0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362, + 0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0, + 0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52, + 0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7, + 0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237, + 0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1, + 0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020, + 0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6, + 0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719, + 0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec, + 0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e, + 0x14d747e1}, + {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0, + 0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b, + 0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652, + 0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437, + 0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514, + 0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265, + 0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de, + 0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af, + 0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c, + 0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9, + 0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0, + 0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b, + 0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6, + 0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7, + 0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734, + 0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045, + 0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8, + 0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303, + 0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a, + 0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9, + 0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea, + 0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b, + 0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6, + 0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7, + 0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4, + 0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6, + 0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f, + 0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054, + 0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9, + 0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8, + 0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b, + 0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a, + 0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441, + 0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a, + 0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3, + 0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6, + 0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5, + 0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94, + 0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9, + 0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288, + 0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab, + 0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce, + 0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7, + 0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c, + 0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527, + 0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256, + 0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5, + 0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4, + 0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39, + 0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2, + 0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db, + 0xaa933b1a}, + {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603, + 0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d, + 0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9, + 0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b, + 0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a, + 0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792, + 0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4, + 0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c, + 0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d, + 0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f, + 0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb, + 0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65, + 0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330, + 0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8, + 0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da, + 0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742, + 0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f, + 0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1, + 0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5, + 0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f, + 0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e, + 0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6, + 0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8, + 0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250, + 0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021, + 0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb, + 0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f, + 0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511, + 0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c, + 0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4, + 0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886, + 0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e, + 0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b, + 0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5, + 0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791, + 0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003, + 0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272, + 0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea, + 0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc, + 0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24, + 0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55, + 0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7, + 0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3, + 0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d, + 0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548, + 0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0, + 0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2, + 0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a, + 0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47, + 0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9, + 0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad, + 0x65711936}}; + +#endif + +#endif + +#if N == 4 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a, + 0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe, + 0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b, + 0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656, + 0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd, + 0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d, + 0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7, + 0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47, + 0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac, + 0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691, + 0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404, + 0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0, + 0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4, + 0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424, + 0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5, + 0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65, + 0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67, + 0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3, + 0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626, + 0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9, + 0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222, + 0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2, + 0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a, + 0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a, + 0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1, + 0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2, + 0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077, + 0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3, + 0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1, + 0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621, + 0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0, + 0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60, + 0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0, + 0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64, + 0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1, + 0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc, + 0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027, + 0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7, + 0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9, + 0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79, + 0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292, + 0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af, + 0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a, + 0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee, + 0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e, + 0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe, + 0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f, + 0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff, + 0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd, + 0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29, + 0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc, + 0xe3c45916}, + {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344, + 0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59, + 0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e, + 0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463, + 0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98, + 0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d, + 0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3, + 0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656, + 0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad, + 0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0, + 0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397, + 0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a, + 0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2, + 0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357, + 0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8, + 0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d, + 0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696, + 0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b, + 0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc, + 0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0, + 0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b, + 0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be, + 0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811, + 0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384, + 0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f, + 0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955, + 0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362, + 0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f, + 0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94, + 0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701, + 0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe, + 0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b, + 0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1, + 0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc, + 0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b, + 0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986, + 0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d, + 0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8, + 0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4, + 0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371, + 0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a, + 0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87, + 0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0, + 0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad, + 0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527, + 0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2, + 0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d, + 0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998, + 0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73, + 0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e, + 0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59, + 0xa7520488}, + {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20, + 0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09, + 0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431, + 0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a, + 0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203, + 0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b, + 0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14, + 0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c, + 0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25, + 0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e, + 0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36, + 0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f, + 0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649, + 0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961, + 0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58, + 0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170, + 0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b, + 0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742, + 0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a, + 0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55, + 0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c, + 0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64, + 0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f, + 0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77, + 0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e, + 0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a, + 0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2, + 0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b, + 0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090, + 0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8, + 0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881, + 0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9, + 0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6, + 0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f, + 0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7, + 0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c, + 0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695, + 0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd, + 0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb, + 0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3, + 0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa, + 0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1, + 0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9, + 0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0, + 0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df, + 0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7, + 0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace, + 0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6, + 0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd, + 0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4, + 0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec, + 0x3522e9e4}, + {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1, + 0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86, + 0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b, + 0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669, + 0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7, + 0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352, + 0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03, + 0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6, + 0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38, + 0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a, + 0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7, + 0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80, + 0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7, + 0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522, + 0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d, + 0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8, + 0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103, + 0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54, + 0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9, + 0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0, + 0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e, + 0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb, + 0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1, + 0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624, + 0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea, + 0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a, + 0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37, + 0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360, + 0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab, + 0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e, + 0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741, + 0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4, + 0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334, + 0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63, + 0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de, + 0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c, + 0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942, + 0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7, + 0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131, + 0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4, + 0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a, + 0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758, + 0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5, + 0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2, + 0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32, + 0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7, + 0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8, + 0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d, + 0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6, + 0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1, + 0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c, + 0x97411e28}, + {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474, + 0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5, + 0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6, + 0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7, + 0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938, + 0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051, + 0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a, + 0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3, + 0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c, + 0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d, + 0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e, + 0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf, + 0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740, + 0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29, + 0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592, + 0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb, + 0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4, + 0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365, + 0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036, + 0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7, + 0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08, + 0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561, + 0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a, + 0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663, + 0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac, + 0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d, + 0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce, + 0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f, + 0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50, + 0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639, + 0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82, + 0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb, + 0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954, + 0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5, + 0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86, + 0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7, + 0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418, + 0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71, + 0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa, + 0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93, + 0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c, + 0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d, + 0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e, + 0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df, + 0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60, + 0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309, + 0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2, + 0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db, + 0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4, + 0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45, + 0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16, + 0x93c7a00b}, + {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45, + 0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb, + 0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d, + 0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696, + 0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf, + 0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb, + 0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028, + 0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c, + 0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65, + 0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be, + 0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038, + 0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6, + 0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15, + 0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11, + 0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d, + 0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19, + 0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05, + 0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b, + 0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d, + 0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c, + 0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35, + 0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31, + 0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068, + 0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c, + 0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25, + 0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a, + 0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac, + 0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22, + 0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e, + 0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a, + 0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36, + 0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32, + 0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84, + 0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a, + 0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c, + 0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057, + 0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e, + 0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a, + 0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc, + 0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8, + 0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1, + 0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a, + 0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec, + 0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62, + 0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4, + 0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0, + 0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc, + 0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8, + 0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4, + 0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a, + 0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc, + 0xce5f968d}, + {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de, + 0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b, + 0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d, + 0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680, + 0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4, + 0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d, + 0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde, + 0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97, + 0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3, + 0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e, + 0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678, + 0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d, + 0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723, + 0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a, + 0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0, + 0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9, + 0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85, + 0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770, + 0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56, + 0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a, + 0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e, + 0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67, + 0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785, + 0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc, + 0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788, + 0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90, + 0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6, + 0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843, + 0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f, + 0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336, + 0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac, + 0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5, + 0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68, + 0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d, + 0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb, + 0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36, + 0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72, + 0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b, + 0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b, + 0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402, + 0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446, + 0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb, + 0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed, + 0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418, + 0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95, + 0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc, + 0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946, + 0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f, + 0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233, + 0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6, + 0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0, + 0x3e721277}, + {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb, + 0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9, + 0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11, + 0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d, + 0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9, + 0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c, + 0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881, + 0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274, + 0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790, + 0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc, + 0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514, + 0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56, + 0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9, + 0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c, + 0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13, + 0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6, + 0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c, + 0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e, + 0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386, + 0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376, + 0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692, + 0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67, + 0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416, + 0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3, + 0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07, + 0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd, + 0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15, + 0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457, + 0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd, + 0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28, + 0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337, + 0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2, + 0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594, + 0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6, + 0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e, + 0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52, + 0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6, + 0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143, + 0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17, + 0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2, + 0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306, + 0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a, + 0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182, + 0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0, + 0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496, + 0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63, + 0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c, + 0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89, + 0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903, + 0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041, + 0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9, + 0x1c65ace7}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000, + 0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000, + 0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000, + 0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000, + 0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000, + 0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000, + 0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000, + 0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000, + 0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000, + 0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000, + 0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000, + 0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000, + 0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000, + 0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000, + 0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000, + 0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000, + 0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000, + 0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000, + 0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000, + 0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000, + 0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000, + 0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000, + 0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000, + 0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000, + 0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000, + 0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000, + 0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000, + 0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000, + 0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000, + 0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000, + 0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000, + 0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000, + 0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000, + 0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000, + 0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000, + 0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000, + 0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000, + 0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000, + 0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000, + 0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000, + 0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000, + 0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000, + 0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000, + 0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000, + 0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000, + 0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000, + 0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000, + 0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000, + 0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000, + 0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000, + 0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000, + 0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000, + 0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000, + 0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000, + 0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000, + 0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000, + 0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000, + 0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000, + 0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000, + 0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000, + 0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000, + 0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000, + 0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000, + 0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000, + 0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000, + 0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000, + 0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000, + 0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000, + 0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000, + 0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000, + 0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000, + 0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000, + 0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000, + 0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000, + 0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000, + 0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000, + 0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000, + 0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000, + 0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000, + 0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000, + 0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000, + 0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000, + 0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000, + 0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000, + 0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000, + 0xe7ac651c00000000}, + {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000, + 0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000, + 0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000, + 0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000, + 0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000, + 0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000, + 0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000, + 0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000, + 0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000, + 0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000, + 0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000, + 0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000, + 0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000, + 0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000, + 0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000, + 0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000, + 0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000, + 0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000, + 0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000, + 0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000, + 0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000, + 0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000, + 0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000, + 0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000, + 0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000, + 0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000, + 0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000, + 0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000, + 0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000, + 0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000, + 0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000, + 0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000, + 0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000, + 0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000, + 0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000, + 0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000, + 0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000, + 0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000, + 0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000, + 0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000, + 0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000, + 0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000, + 0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000, + 0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000, + 0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000, + 0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000, + 0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000, + 0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000, + 0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000, + 0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000, + 0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000, + 0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000, + 0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000, + 0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000, + 0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000, + 0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000, + 0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000, + 0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000, + 0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000, + 0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000, + 0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000, + 0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000, + 0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000, + 0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000, + 0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000, + 0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000, + 0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000, + 0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000, + 0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000, + 0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000, + 0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000, + 0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000, + 0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000, + 0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000, + 0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000, + 0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000, + 0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000, + 0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000, + 0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000, + 0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000, + 0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000, + 0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000, + 0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000, + 0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000, + 0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000, + 0x7712723e00000000}, + {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000, + 0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000, + 0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000, + 0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000, + 0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000, + 0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000, + 0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000, + 0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000, + 0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000, + 0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000, + 0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000, + 0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000, + 0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000, + 0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000, + 0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000, + 0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000, + 0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000, + 0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000, + 0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000, + 0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000, + 0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000, + 0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000, + 0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000, + 0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000, + 0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000, + 0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000, + 0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000, + 0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000, + 0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000, + 0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000, + 0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000, + 0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000, + 0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000, + 0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000, + 0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000, + 0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000, + 0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000, + 0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000, + 0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000, + 0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000, + 0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000, + 0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000, + 0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000, + 0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000, + 0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000, + 0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000, + 0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000, + 0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000, + 0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000, + 0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000, + 0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000, + 0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000, + 0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000, + 0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000, + 0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000, + 0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000, + 0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000, + 0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000, + 0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000, + 0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000, + 0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000, + 0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000, + 0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000, + 0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000, + 0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000, + 0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000, + 0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000, + 0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000, + 0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000, + 0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000, + 0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000, + 0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000, + 0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000, + 0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000, + 0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000, + 0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000, + 0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000, + 0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000, + 0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000, + 0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000, + 0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000, + 0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000, + 0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000, + 0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000, + 0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000, + 0x8d965fce00000000}, + {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000, + 0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000, + 0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000, + 0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000, + 0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000, + 0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000, + 0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000, + 0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000, + 0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000, + 0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000, + 0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000, + 0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000, + 0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000, + 0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000, + 0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000, + 0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000, + 0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000, + 0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000, + 0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000, + 0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000, + 0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000, + 0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000, + 0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000, + 0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000, + 0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000, + 0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000, + 0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000, + 0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000, + 0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000, + 0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000, + 0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000, + 0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000, + 0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000, + 0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000, + 0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000, + 0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000, + 0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000, + 0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000, + 0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000, + 0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000, + 0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000, + 0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000, + 0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000, + 0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000, + 0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000, + 0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000, + 0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000, + 0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000, + 0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000, + 0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000, + 0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000, + 0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000, + 0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000, + 0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000, + 0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000, + 0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000, + 0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000, + 0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000, + 0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000, + 0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000, + 0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000, + 0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000, + 0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000, + 0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000, + 0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000, + 0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000, + 0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000, + 0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000, + 0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000, + 0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000, + 0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000, + 0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000, + 0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000, + 0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000, + 0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000, + 0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000, + 0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000, + 0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000, + 0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000, + 0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000, + 0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000, + 0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000, + 0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000, + 0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000, + 0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000, + 0x0ba0c79300000000}, + {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000, + 0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000, + 0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000, + 0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000, + 0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000, + 0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000, + 0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000, + 0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000, + 0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000, + 0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000, + 0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000, + 0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000, + 0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000, + 0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000, + 0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000, + 0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000, + 0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000, + 0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000, + 0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000, + 0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000, + 0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000, + 0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000, + 0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000, + 0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000, + 0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000, + 0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000, + 0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000, + 0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000, + 0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000, + 0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000, + 0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000, + 0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000, + 0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000, + 0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000, + 0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000, + 0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000, + 0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000, + 0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000, + 0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000, + 0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000, + 0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000, + 0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000, + 0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000, + 0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000, + 0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000, + 0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000, + 0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000, + 0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000, + 0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000, + 0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000, + 0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000, + 0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000, + 0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000, + 0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000, + 0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000, + 0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000, + 0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000, + 0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000, + 0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000, + 0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000, + 0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000, + 0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000, + 0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000, + 0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000, + 0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000, + 0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000, + 0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000, + 0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000, + 0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000, + 0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000, + 0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000, + 0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000, + 0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000, + 0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000, + 0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000, + 0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000, + 0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000, + 0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000, + 0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000, + 0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000, + 0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000, + 0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000, + 0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000, + 0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000, + 0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000, + 0x281e419700000000}, + {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000, + 0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000, + 0x304a428900000000, 0x38a922b500000000, 0x011e763800000000, + 0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000, + 0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000, + 0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000, + 0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000, + 0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000, + 0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000, + 0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000, + 0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000, + 0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000, + 0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000, + 0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000, + 0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000, + 0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000, + 0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000, + 0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000, + 0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000, + 0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000, + 0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000, + 0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000, + 0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000, + 0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000, + 0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000, + 0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000, + 0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000, + 0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000, + 0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000, + 0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000, + 0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000, + 0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000, + 0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000, + 0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000, + 0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000, + 0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000, + 0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000, + 0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000, + 0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000, + 0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000, + 0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000, + 0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000, + 0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000, + 0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000, + 0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000, + 0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000, + 0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000, + 0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000, + 0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000, + 0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000, + 0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000, + 0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000, + 0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000, + 0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000, + 0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000, + 0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000, + 0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000, + 0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000, + 0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000, + 0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000, + 0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000, + 0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000, + 0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000, + 0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000, + 0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000, + 0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000, + 0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000, + 0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000, + 0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000, + 0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000, + 0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000, + 0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000, + 0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000, + 0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000, + 0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000, + 0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000, + 0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000, + 0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000, + 0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000, + 0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000, + 0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000, + 0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000, + 0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000, + 0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000, + 0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000, + 0xe4e9223500000000}, + {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000, + 0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000, + 0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000, + 0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000, + 0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000, + 0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000, + 0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000, + 0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000, + 0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000, + 0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000, + 0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000, + 0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000, + 0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000, + 0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000, + 0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000, + 0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000, + 0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000, + 0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000, + 0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000, + 0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000, + 0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000, + 0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000, + 0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000, + 0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000, + 0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000, + 0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000, + 0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000, + 0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000, + 0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000, + 0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000, + 0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000, + 0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000, + 0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000, + 0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000, + 0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000, + 0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000, + 0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000, + 0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000, + 0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000, + 0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000, + 0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000, + 0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000, + 0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000, + 0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000, + 0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000, + 0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000, + 0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000, + 0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000, + 0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000, + 0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000, + 0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000, + 0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000, + 0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000, + 0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000, + 0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000, + 0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000, + 0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000, + 0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000, + 0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000, + 0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000, + 0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000, + 0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000, + 0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000, + 0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000, + 0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000, + 0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000, + 0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000, + 0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000, + 0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000, + 0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000, + 0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000, + 0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000, + 0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000, + 0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000, + 0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000, + 0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000, + 0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000, + 0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000, + 0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000, + 0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000, + 0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000, + 0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000, + 0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000, + 0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000, + 0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000, + 0x880452a700000000}, + {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000, + 0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000, + 0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000, + 0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000, + 0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000, + 0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000, + 0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000, + 0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000, + 0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000, + 0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000, + 0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000, + 0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000, + 0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000, + 0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000, + 0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000, + 0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000, + 0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000, + 0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000, + 0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000, + 0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000, + 0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000, + 0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000, + 0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000, + 0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000, + 0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000, + 0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000, + 0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000, + 0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000, + 0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000, + 0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000, + 0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000, + 0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000, + 0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000, + 0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000, + 0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000, + 0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000, + 0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000, + 0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000, + 0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000, + 0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000, + 0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000, + 0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000, + 0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000, + 0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000, + 0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000, + 0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000, + 0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000, + 0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000, + 0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000, + 0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000, + 0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000, + 0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000, + 0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000, + 0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000, + 0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000, + 0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000, + 0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000, + 0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000, + 0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000, + 0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000, + 0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000, + 0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000, + 0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000, + 0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000, + 0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000, + 0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000, + 0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000, + 0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000, + 0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000, + 0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000, + 0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000, + 0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000, + 0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000, + 0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000, + 0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000, + 0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000, + 0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000, + 0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000, + 0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000, + 0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000, + 0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000, + 0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000, + 0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000, + 0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000, + 0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000, + 0x1659c4e300000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0, + 0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587, + 0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa, + 0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09, + 0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee, + 0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3, + 0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3, + 0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce, + 0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429, + 0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda, + 0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7, + 0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0, + 0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd, + 0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0, + 0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287, + 0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a, + 0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9, + 0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e, + 0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3, + 0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3, + 0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054, + 0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49, + 0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da, + 0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7, + 0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20, + 0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d, + 0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00, + 0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347, + 0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14, + 0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209, + 0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e, + 0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33, + 0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3, + 0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194, + 0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9, + 0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a, + 0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd, + 0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0, + 0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d, + 0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460, + 0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87, + 0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674, + 0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509, + 0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e, + 0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae, + 0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3, + 0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694, + 0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989, + 0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da, + 0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d, + 0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0, + 0xa68cee3d}, + {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19, + 0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae, + 0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb, + 0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a, + 0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55, + 0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1, + 0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c, + 0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8, + 0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7, + 0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936, + 0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453, + 0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4, + 0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941, + 0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5, + 0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93, + 0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17, + 0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e, + 0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89, + 0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec, + 0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0, + 0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf, + 0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b, + 0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b, + 0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f, + 0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0, + 0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e, + 0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b, + 0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc, + 0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5, + 0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261, + 0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637, + 0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3, + 0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57, + 0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0, + 0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85, + 0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454, + 0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b, + 0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f, + 0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423, + 0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7, + 0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8, + 0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739, + 0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c, + 0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb, + 0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f, + 0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b, + 0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd, + 0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59, + 0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070, + 0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7, + 0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2, + 0x51e8883f}, + {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a, + 0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276, + 0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed, + 0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55, + 0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b, + 0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8, + 0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320, + 0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413, + 0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd, + 0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75, + 0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee, + 0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312, + 0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca, + 0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9, + 0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad, + 0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e, + 0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504, + 0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8, + 0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63, + 0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353, + 0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d, + 0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be, + 0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae, + 0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d, + 0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943, + 0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7, + 0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c, + 0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390, + 0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a, + 0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239, + 0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d, + 0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e, + 0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c, + 0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0, + 0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b, + 0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93, + 0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d, + 0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e, + 0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c, + 0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f, + 0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1, + 0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579, + 0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2, + 0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e, + 0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c, + 0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f, + 0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b, + 0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158, + 0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2, + 0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e, + 0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5, + 0x8ae9531c}, + {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4, + 0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd, + 0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220, + 0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf, + 0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495, + 0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def, + 0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90, + 0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea, + 0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0, + 0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f, + 0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2, + 0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab, + 0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e, + 0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754, + 0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda, + 0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0, + 0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c, + 0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215, + 0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8, + 0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910, + 0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a, + 0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30, + 0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658, + 0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22, + 0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478, + 0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2, + 0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f, + 0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606, + 0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba, + 0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0, + 0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e, + 0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034, + 0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f, + 0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996, + 0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b, + 0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84, + 0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de, + 0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4, + 0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5, + 0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f, + 0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5, + 0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a, + 0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7, + 0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce, + 0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65, + 0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f, + 0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91, + 0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb, + 0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57, + 0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e, + 0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3, + 0xd739710d}}; + +#endif + +#endif + +#if N == 5 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df, + 0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8, + 0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef, + 0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376, + 0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201, + 0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399, + 0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372, + 0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea, + 0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d, + 0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004, + 0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353, + 0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334, + 0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a, + 0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2, + 0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a, + 0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2, + 0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b, + 0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c, + 0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b, + 0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f, + 0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338, + 0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0, + 0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6, + 0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e, + 0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319, + 0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3, + 0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4, + 0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783, + 0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a, + 0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492, + 0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a, + 0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2, + 0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496, + 0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1, + 0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6, + 0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f, + 0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548, + 0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0, + 0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741, + 0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9, + 0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae, + 0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437, + 0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760, + 0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707, + 0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433, + 0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab, + 0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703, + 0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b, + 0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412, + 0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475, + 0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722, + 0xe9947565}, + {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5, + 0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22, + 0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c, + 0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed, + 0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d, + 0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1, + 0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e, + 0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32, + 0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142, + 0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93, + 0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d, + 0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a, + 0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58, + 0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14, + 0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81, + 0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd, + 0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab, + 0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c, + 0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72, + 0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f, + 0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff, + 0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3, + 0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30, + 0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c, + 0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c, + 0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558, + 0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146, + 0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581, + 0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7, + 0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab, + 0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e, + 0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272, + 0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838, + 0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff, + 0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1, + 0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330, + 0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840, + 0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c, + 0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb, + 0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7, + 0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7, + 0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616, + 0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208, + 0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf, + 0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85, + 0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9, + 0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c, + 0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10, + 0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76, + 0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1, + 0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf, + 0xf7d05006}, + {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b, + 0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774, + 0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58, + 0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a, + 0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb, + 0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952, + 0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e, + 0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7, + 0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746, + 0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14, + 0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338, + 0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907, + 0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777, + 0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de, + 0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064, + 0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd, + 0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951, + 0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e, + 0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42, + 0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b, + 0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a, + 0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3, + 0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904, + 0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad, + 0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c, + 0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d, + 0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861, + 0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e, + 0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2, + 0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b, + 0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1, + 0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78, + 0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f, + 0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40, + 0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c, + 0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e, + 0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf, + 0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166, + 0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d, + 0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4, + 0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805, + 0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157, + 0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b, + 0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644, + 0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43, + 0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea, + 0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850, + 0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9, + 0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165, + 0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a, + 0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676, + 0xb2075b94}, + {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf, + 0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61, + 0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be, + 0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd, + 0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3, + 0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063, + 0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105, + 0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5, + 0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb, + 0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8, + 0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07, + 0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9, + 0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5, + 0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515, + 0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4, + 0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014, + 0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7, + 0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269, + 0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6, + 0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af, + 0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1, + 0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111, + 0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d, + 0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad, + 0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3, + 0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75, + 0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa, + 0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74, + 0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7, + 0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477, + 0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6, + 0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176, + 0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af, + 0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71, + 0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae, + 0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd, + 0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3, + 0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073, + 0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0, + 0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400, + 0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e, + 0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d, + 0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2, + 0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c, + 0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5, + 0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505, + 0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4, + 0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004, + 0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7, + 0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279, + 0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6, + 0xba50bcb9}, + {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897, + 0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb, + 0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2, + 0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2, + 0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372, + 0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70, + 0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92, + 0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190, + 0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40, + 0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430, + 0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759, + 0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75, + 0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2, + 0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0, + 0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7, + 0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5, + 0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39, + 0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215, + 0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c, + 0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5, + 0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625, + 0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27, + 0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c, + 0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e, + 0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee, + 0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71, + 0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18, + 0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134, + 0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8, + 0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba, + 0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd, + 0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff, + 0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a, + 0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6, + 0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf, + 0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf, + 0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f, + 0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d, + 0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d, + 0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f, + 0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af, + 0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df, + 0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6, + 0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a, + 0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef, + 0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed, + 0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa, + 0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8, + 0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624, + 0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08, + 0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861, + 0x808abcf4}, + {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2, + 0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd, + 0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76, + 0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52, + 0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e, + 0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124, + 0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147, + 0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d, + 0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31, + 0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15, + 0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae, + 0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1, + 0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d, + 0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307, + 0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9, + 0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3, + 0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084, + 0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb, + 0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850, + 0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2, + 0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe, + 0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94, + 0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261, + 0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b, + 0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917, + 0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53, + 0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8, + 0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787, + 0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0, + 0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba, + 0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404, + 0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e, + 0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af, + 0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0, + 0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b, + 0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f, + 0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543, + 0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129, + 0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627, + 0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d, + 0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51, + 0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75, + 0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce, + 0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1, + 0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760, + 0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a, + 0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4, + 0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde, + 0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089, + 0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6, + 0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d, + 0xefdb3f95}, + {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8, + 0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7, + 0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945, + 0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9, + 0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652, + 0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc, + 0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a, + 0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4, + 0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f, + 0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3, + 0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51, + 0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e, + 0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c, + 0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362, + 0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11, + 0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff, + 0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7, + 0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8, + 0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a, + 0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690, + 0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b, + 0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5, + 0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05, + 0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb, + 0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740, + 0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f, + 0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded, + 0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2, + 0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa, + 0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714, + 0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67, + 0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89, + 0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7, + 0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8, + 0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a, + 0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6, + 0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d, + 0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3, + 0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9, + 0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57, + 0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc, + 0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540, + 0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2, + 0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd, + 0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93, + 0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d, + 0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e, + 0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0, + 0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8, + 0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7, + 0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75, + 0x0e2fbf43}, + {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc, + 0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a, + 0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3, + 0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7, + 0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b, + 0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154, + 0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3, + 0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc, + 0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330, + 0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264, + 0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd, + 0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b, + 0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a, + 0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175, + 0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275, + 0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a, + 0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234, + 0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2, + 0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b, + 0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a, + 0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6, + 0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189, + 0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b, + 0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204, + 0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8, + 0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226, + 0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff, + 0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219, + 0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167, + 0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258, + 0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158, + 0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267, + 0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c, + 0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da, + 0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003, + 0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157, + 0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b, + 0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4, + 0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179, + 0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246, + 0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a, + 0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de, + 0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107, + 0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1, + 0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba, + 0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285, + 0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185, + 0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba, + 0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4, + 0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322, + 0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb, + 0xf4377108}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000, + 0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000, + 0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000, + 0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000, + 0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000, + 0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000, + 0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000, + 0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000, + 0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000, + 0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000, + 0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000, + 0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000, + 0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000, + 0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000, + 0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000, + 0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000, + 0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000, + 0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000, + 0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000, + 0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000, + 0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000, + 0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000, + 0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000, + 0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000, + 0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000, + 0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000, + 0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000, + 0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000, + 0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000, + 0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000, + 0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000, + 0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000, + 0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000, + 0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000, + 0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000, + 0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000, + 0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000, + 0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000, + 0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000, + 0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000, + 0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000, + 0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000, + 0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000, + 0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000, + 0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000, + 0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000, + 0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000, + 0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000, + 0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000, + 0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000, + 0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000, + 0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000, + 0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000, + 0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000, + 0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000, + 0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000, + 0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000, + 0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000, + 0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000, + 0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000, + 0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000, + 0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000, + 0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000, + 0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000, + 0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000, + 0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000, + 0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000, + 0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000, + 0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000, + 0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000, + 0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000, + 0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000, + 0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000, + 0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000, + 0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000, + 0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000, + 0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000, + 0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000, + 0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000, + 0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000, + 0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000, + 0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000, + 0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000, + 0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000, + 0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000, + 0x087137f400000000}, + {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000, + 0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000, + 0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000, + 0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000, + 0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000, + 0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000, + 0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000, + 0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000, + 0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000, + 0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000, + 0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000, + 0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000, + 0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000, + 0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000, + 0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000, + 0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000, + 0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000, + 0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000, + 0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000, + 0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000, + 0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000, + 0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000, + 0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000, + 0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000, + 0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000, + 0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000, + 0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000, + 0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000, + 0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000, + 0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000, + 0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000, + 0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000, + 0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000, + 0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000, + 0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000, + 0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000, + 0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000, + 0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000, + 0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000, + 0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000, + 0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000, + 0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000, + 0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000, + 0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000, + 0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000, + 0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000, + 0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000, + 0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000, + 0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000, + 0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000, + 0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000, + 0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000, + 0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000, + 0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000, + 0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000, + 0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000, + 0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000, + 0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000, + 0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000, + 0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000, + 0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000, + 0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000, + 0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000, + 0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000, + 0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000, + 0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000, + 0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000, + 0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000, + 0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000, + 0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000, + 0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000, + 0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000, + 0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000, + 0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000, + 0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000, + 0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000, + 0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000, + 0x1129fad400000000, 0x621116d400000000, 0x544094f000000000, + 0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000, + 0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000, + 0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000, + 0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000, + 0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000, + 0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000, + 0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000, + 0x43bf2f0e00000000}, + {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000, + 0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000, + 0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000, + 0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000, + 0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000, + 0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000, + 0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000, + 0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000, + 0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000, + 0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000, + 0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000, + 0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000, + 0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000, + 0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000, + 0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000, + 0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000, + 0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000, + 0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000, + 0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000, + 0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000, + 0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000, + 0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000, + 0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000, + 0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000, + 0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000, + 0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000, + 0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000, + 0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000, + 0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000, + 0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000, + 0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000, + 0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000, + 0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000, + 0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000, + 0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000, + 0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000, + 0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000, + 0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000, + 0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000, + 0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000, + 0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000, + 0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000, + 0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000, + 0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000, + 0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000, + 0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000, + 0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000, + 0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000, + 0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000, + 0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000, + 0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000, + 0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000, + 0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000, + 0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000, + 0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000, + 0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000, + 0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000, + 0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000, + 0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000, + 0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000, + 0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000, + 0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000, + 0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000, + 0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000, + 0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000, + 0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000, + 0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000, + 0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000, + 0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000, + 0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000, + 0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000, + 0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000, + 0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000, + 0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000, + 0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000, + 0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000, + 0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000, + 0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000, + 0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000, + 0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000, + 0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000, + 0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000, + 0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000, + 0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000, + 0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000, + 0x953fdbef00000000}, + {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000, + 0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000, + 0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000, + 0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000, + 0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000, + 0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000, + 0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000, + 0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000, + 0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000, + 0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000, + 0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000, + 0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000, + 0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000, + 0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000, + 0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000, + 0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000, + 0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000, + 0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000, + 0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000, + 0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000, + 0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000, + 0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000, + 0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000, + 0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000, + 0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000, + 0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000, + 0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000, + 0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000, + 0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000, + 0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000, + 0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000, + 0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000, + 0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000, + 0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000, + 0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000, + 0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000, + 0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000, + 0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000, + 0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000, + 0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000, + 0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000, + 0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000, + 0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000, + 0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000, + 0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000, + 0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000, + 0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000, + 0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000, + 0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000, + 0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000, + 0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000, + 0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000, + 0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000, + 0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000, + 0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000, + 0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000, + 0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000, + 0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000, + 0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000, + 0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000, + 0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000, + 0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000, + 0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000, + 0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000, + 0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000, + 0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000, + 0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000, + 0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000, + 0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000, + 0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000, + 0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000, + 0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000, + 0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000, + 0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000, + 0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000, + 0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000, + 0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000, + 0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000, + 0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000, + 0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000, + 0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000, + 0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000, + 0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000, + 0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000, + 0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000, + 0xf4bc8a8000000000}, + {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000, + 0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000, + 0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000, + 0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000, + 0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000, + 0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000, + 0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000, + 0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000, + 0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000, + 0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000, + 0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000, + 0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000, + 0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000, + 0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000, + 0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000, + 0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000, + 0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000, + 0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000, + 0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000, + 0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000, + 0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000, + 0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000, + 0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000, + 0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000, + 0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000, + 0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000, + 0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000, + 0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000, + 0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000, + 0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000, + 0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000, + 0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000, + 0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000, + 0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000, + 0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000, + 0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000, + 0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000, + 0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000, + 0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000, + 0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000, + 0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000, + 0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000, + 0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000, + 0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000, + 0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000, + 0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000, + 0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000, + 0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000, + 0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000, + 0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000, + 0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000, + 0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000, + 0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000, + 0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000, + 0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000, + 0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000, + 0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000, + 0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000, + 0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000, + 0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000, + 0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000, + 0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000, + 0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000, + 0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000, + 0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000, + 0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000, + 0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000, + 0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000, + 0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000, + 0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000, + 0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000, + 0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000, + 0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000, + 0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000, + 0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000, + 0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000, + 0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000, + 0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000, + 0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000, + 0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000, + 0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000, + 0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000, + 0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000, + 0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000, + 0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000, + 0xb9bc50ba00000000}, + {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000, + 0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000, + 0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000, + 0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000, + 0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000, + 0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000, + 0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000, + 0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000, + 0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000, + 0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000, + 0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000, + 0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000, + 0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000, + 0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000, + 0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000, + 0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000, + 0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000, + 0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000, + 0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000, + 0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000, + 0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000, + 0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000, + 0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000, + 0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000, + 0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000, + 0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000, + 0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000, + 0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000, + 0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000, + 0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000, + 0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000, + 0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000, + 0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000, + 0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000, + 0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000, + 0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000, + 0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000, + 0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000, + 0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000, + 0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000, + 0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000, + 0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000, + 0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000, + 0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000, + 0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000, + 0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000, + 0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000, + 0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000, + 0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000, + 0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000, + 0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000, + 0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000, + 0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000, + 0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000, + 0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000, + 0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000, + 0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000, + 0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000, + 0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000, + 0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000, + 0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000, + 0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000, + 0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000, + 0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000, + 0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000, + 0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000, + 0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000, + 0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000, + 0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000, + 0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000, + 0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000, + 0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000, + 0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000, + 0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000, + 0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000, + 0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000, + 0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000, + 0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000, + 0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000, + 0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000, + 0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000, + 0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000, + 0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000, + 0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000, + 0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000, + 0x945b07b200000000}, + {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000, + 0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000, + 0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000, + 0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000, + 0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000, + 0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000, + 0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000, + 0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000, + 0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000, + 0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000, + 0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000, + 0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000, + 0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000, + 0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000, + 0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000, + 0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000, + 0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000, + 0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000, + 0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000, + 0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000, + 0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000, + 0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000, + 0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000, + 0x149f066100000000, 0xef839db200000000, 0x468814fc00000000, + 0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000, + 0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000, + 0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000, + 0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000, + 0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000, + 0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000, + 0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000, + 0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000, + 0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000, + 0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000, + 0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000, + 0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000, + 0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000, + 0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000, + 0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000, + 0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000, + 0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000, + 0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000, + 0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000, + 0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000, + 0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000, + 0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000, + 0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000, + 0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000, + 0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000, + 0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000, + 0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000, + 0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000, + 0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000, + 0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000, + 0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000, + 0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000, + 0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000, + 0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000, + 0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000, + 0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000, + 0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000, + 0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000, + 0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000, + 0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000, + 0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000, + 0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000, + 0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000, + 0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000, + 0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000, + 0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000, + 0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000, + 0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000, + 0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000, + 0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000, + 0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000, + 0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000, + 0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000, + 0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000, + 0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000, + 0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000, + 0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000, + 0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000, + 0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000, + 0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000, + 0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000, + 0x0650d0f700000000}, + {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000, + 0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000, + 0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000, + 0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000, + 0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000, + 0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000, + 0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000, + 0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000, + 0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000, + 0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000, + 0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000, + 0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000, + 0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000, + 0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000, + 0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000, + 0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000, + 0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000, + 0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000, + 0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000, + 0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000, + 0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000, + 0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000, + 0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000, + 0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000, + 0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000, + 0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000, + 0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000, + 0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000, + 0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000, + 0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000, + 0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000, + 0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000, + 0xc702c15700000000, 0x809085f800000000, 0x082039d200000000, + 0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000, + 0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000, + 0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000, + 0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000, + 0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000, + 0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000, + 0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000, + 0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000, + 0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000, + 0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000, + 0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000, + 0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000, + 0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000, + 0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000, + 0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000, + 0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000, + 0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000, + 0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000, + 0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000, + 0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000, + 0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000, + 0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000, + 0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000, + 0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000, + 0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000, + 0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000, + 0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000, + 0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000, + 0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000, + 0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000, + 0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000, + 0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000, + 0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000, + 0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000, + 0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000, + 0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000, + 0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000, + 0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000, + 0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000, + 0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000, + 0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000, + 0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000, + 0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000, + 0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000, + 0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000, + 0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000, + 0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000, + 0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000, + 0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000, + 0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000, + 0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000, + 0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000, + 0x657594e900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873, + 0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661, + 0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441, + 0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44, + 0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1, + 0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05, + 0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa, + 0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e, + 0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb, + 0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be, + 0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e, + 0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c, + 0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d, + 0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9, + 0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f, + 0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b, + 0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39, + 0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b, + 0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b, + 0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20, + 0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595, + 0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61, + 0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0, + 0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644, + 0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1, + 0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d, + 0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d, + 0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f, + 0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad, + 0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359, + 0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f, + 0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b, + 0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7, + 0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5, + 0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5, + 0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0, + 0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65, + 0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091, + 0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633, + 0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7, + 0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272, + 0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77, + 0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57, + 0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145, + 0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9, + 0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d, + 0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb, + 0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f, + 0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad, + 0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf, + 0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f, + 0x4e36ba18}, + {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b, + 0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8, + 0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19, + 0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4, + 0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239, + 0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd, + 0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258, + 0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc, + 0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41, + 0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c, + 0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d, + 0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e, + 0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba, + 0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e, + 0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8, + 0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c, + 0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f, + 0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c, + 0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d, + 0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d, + 0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0, + 0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014, + 0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc, + 0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628, + 0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5, + 0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941, + 0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0, + 0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53, + 0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880, + 0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264, + 0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92, + 0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776, + 0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8, + 0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b, + 0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea, + 0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837, + 0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca, + 0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e, + 0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211, + 0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5, + 0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08, + 0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5, + 0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934, + 0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7, + 0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049, + 0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad, + 0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b, + 0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf, + 0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c, + 0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f, + 0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e, + 0xa1d67c91}, + {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9, + 0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de, + 0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94, + 0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0, + 0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a, + 0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924, + 0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052, + 0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c, + 0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6, + 0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2, + 0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8, + 0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f, + 0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d, + 0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273, + 0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30, + 0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e, + 0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7, + 0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980, + 0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca, + 0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8, + 0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62, + 0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c, + 0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c, + 0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032, + 0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798, + 0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d, + 0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07, + 0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630, + 0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389, + 0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7, + 0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4, + 0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca, + 0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55, + 0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662, + 0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828, + 0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c, + 0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6, + 0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98, + 0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3, + 0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d, + 0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037, + 0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913, + 0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759, + 0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e, + 0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1, + 0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf, + 0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c, + 0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2, + 0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b, + 0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c, + 0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276, + 0xa8ef40a1}, + {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e, + 0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8, + 0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819, + 0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f, + 0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d, + 0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756, + 0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0, + 0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb, + 0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9, + 0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f, + 0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e, + 0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8, + 0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835, + 0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e, + 0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62, + 0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749, + 0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b, + 0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d, + 0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc, + 0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80, + 0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2, + 0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599, + 0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05, + 0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e, + 0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c, + 0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e, + 0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef, + 0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359, + 0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b, + 0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0, + 0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc, + 0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7, + 0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f, + 0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189, + 0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568, + 0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e, + 0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c, + 0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27, + 0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794, + 0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf, + 0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d, + 0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db, + 0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a, + 0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c, + 0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544, + 0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f, + 0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013, + 0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38, + 0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea, + 0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c, + 0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd, + 0x356bacd8}}; + +#endif + +#endif + +#if N == 6 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370, + 0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d, + 0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69, + 0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426, + 0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3, + 0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f, + 0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c, + 0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490, + 0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155, + 0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a, + 0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e, + 0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603, + 0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349, + 0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5, + 0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50, + 0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc, + 0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b, + 0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76, + 0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862, + 0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9, + 0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c, + 0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0, + 0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937, + 0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b, + 0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e, + 0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e, + 0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a, + 0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357, + 0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0, + 0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c, + 0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9, + 0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165, + 0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766, + 0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b, + 0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f, + 0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030, + 0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5, + 0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59, + 0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63, + 0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf, + 0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a, + 0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845, + 0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51, + 0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c, + 0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f, + 0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3, + 0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46, + 0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea, + 0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d, + 0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60, + 0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74, + 0x8568a0a8}, + {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5, + 0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf, + 0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5, + 0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba, + 0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf, + 0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f, + 0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0, + 0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450, + 0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55, + 0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a, + 0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620, + 0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a, + 0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454, + 0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4, + 0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534, + 0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584, + 0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694, + 0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e, + 0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4, + 0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1, + 0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4, + 0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164, + 0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1, + 0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911, + 0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314, + 0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c, + 0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6, + 0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec, + 0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc, + 0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c, + 0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c, + 0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c, + 0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716, + 0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c, + 0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676, + 0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879, + 0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c, + 0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc, + 0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77, + 0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7, + 0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2, + 0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd, + 0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7, + 0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad, + 0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897, + 0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827, + 0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7, + 0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947, + 0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57, + 0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d, + 0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37, + 0x0d907052}, + {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d, + 0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89, + 0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31, + 0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81, + 0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e, + 0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0, + 0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f, + 0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291, + 0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e, + 0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e, + 0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936, + 0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2, + 0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13, + 0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d, + 0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f, + 0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1, + 0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a, + 0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae, + 0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516, + 0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f, + 0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20, + 0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe, + 0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28, + 0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6, + 0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419, + 0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5, + 0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d, + 0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889, + 0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412, + 0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c, + 0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e, + 0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0, + 0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02, + 0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986, + 0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e, + 0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e, + 0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221, + 0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf, + 0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913, + 0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d, + 0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622, + 0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592, + 0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a, + 0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae, + 0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c, + 0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82, + 0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20, + 0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe, + 0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025, + 0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1, + 0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719, + 0xfd1a6c8a}, + {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3, + 0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb, + 0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d, + 0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb, + 0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9, + 0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156, + 0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045, + 0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa, + 0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8, + 0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e, + 0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8, + 0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0, + 0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38, + 0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87, + 0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46, + 0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9, + 0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585, + 0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d, + 0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb, + 0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531, + 0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03, + 0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc, + 0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33, + 0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c, + 0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be, + 0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d, + 0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b, + 0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303, + 0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f, + 0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0, + 0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801, + 0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe, + 0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e, + 0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346, + 0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620, + 0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776, + 0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844, + 0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb, + 0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0, + 0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f, + 0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d, + 0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b, + 0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d, + 0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75, + 0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795, + 0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a, + 0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb, + 0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354, + 0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28, + 0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30, + 0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856, + 0x7895f01a}, + {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188, + 0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33, + 0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d, + 0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445, + 0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2, + 0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058, + 0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43, + 0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9, + 0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e, + 0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06, + 0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228, + 0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93, + 0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e, + 0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4, + 0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b, + 0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371, + 0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265, + 0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede, + 0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0, + 0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f, + 0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8, + 0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32, + 0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae, + 0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544, + 0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3, + 0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f, + 0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911, + 0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa, + 0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be, + 0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54, + 0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b, + 0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1, + 0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652, + 0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9, + 0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7, + 0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f, + 0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68, + 0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782, + 0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797, + 0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d, + 0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a, + 0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2, + 0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc, + 0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647, + 0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4, + 0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e, + 0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41, + 0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab, + 0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf, + 0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904, + 0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a, + 0x9239b848}, + {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad, + 0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0, + 0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40, + 0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b, + 0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d, + 0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b, + 0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb, + 0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d, + 0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b, + 0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0, + 0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840, + 0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d, + 0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b, + 0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d, + 0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6, + 0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0, + 0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580, + 0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd, + 0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d, + 0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b, + 0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d, + 0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b, + 0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6, + 0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0, + 0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6, + 0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c, + 0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c, + 0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461, + 0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841, + 0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317, + 0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac, + 0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa, + 0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7, + 0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba, + 0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a, + 0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161, + 0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777, + 0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21, + 0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a, + 0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc, + 0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da, + 0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1, + 0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01, + 0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c, + 0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241, + 0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917, + 0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac, + 0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa, + 0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da, + 0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397, + 0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537, + 0xeb36d3cc}, + {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b, + 0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059, + 0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251, + 0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d, + 0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9, + 0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c, + 0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41, + 0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4, + 0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10, + 0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c, + 0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54, + 0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476, + 0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8, + 0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d, + 0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92, + 0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307, + 0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad, + 0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f, + 0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87, + 0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17, + 0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3, + 0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46, + 0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197, + 0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02, + 0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6, + 0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e, + 0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96, + 0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4, + 0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e, + 0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b, + 0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934, + 0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1, + 0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7, + 0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5, + 0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd, + 0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1, + 0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475, + 0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0, + 0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155, + 0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0, + 0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304, + 0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348, + 0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140, + 0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862, + 0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14, + 0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181, + 0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e, + 0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab, + 0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01, + 0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523, + 0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b, + 0x38e5f3c5}, + {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06, + 0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad, + 0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509, + 0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba, + 0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414, + 0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3, + 0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733, + 0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994, + 0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a, + 0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889, + 0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d, + 0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386, + 0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621, + 0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886, + 0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e, + 0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389, + 0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f, + 0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294, + 0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30, + 0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3, + 0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d, + 0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba, + 0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a, + 0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad, + 0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03, + 0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2, + 0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306, + 0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad, + 0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b, + 0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc, + 0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914, + 0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3, + 0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435, + 0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e, + 0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a, + 0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589, + 0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27, + 0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080, + 0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21, + 0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586, + 0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28, + 0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b, + 0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f, + 0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94, + 0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12, + 0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5, + 0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d, + 0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba, + 0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c, + 0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7, + 0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103, + 0x3d3101a2}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000, + 0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000, + 0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000, + 0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000, + 0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000, + 0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000, + 0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000, + 0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000, + 0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000, + 0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000, + 0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000, + 0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000, + 0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000, + 0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000, + 0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000, + 0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000, + 0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000, + 0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000, + 0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000, + 0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000, + 0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000, + 0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000, + 0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000, + 0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000, + 0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000, + 0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000, + 0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000, + 0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000, + 0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000, + 0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000, + 0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000, + 0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000, + 0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000, + 0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000, + 0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000, + 0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000, + 0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000, + 0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000, + 0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000, + 0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000, + 0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000, + 0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000, + 0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000, + 0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000, + 0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000, + 0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000, + 0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000, + 0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000, + 0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000, + 0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000, + 0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000, + 0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000, + 0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000, + 0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000, + 0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000, + 0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000, + 0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000, + 0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000, + 0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000, + 0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000, + 0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000, + 0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000, + 0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000, + 0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000, + 0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000, + 0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000, + 0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000, + 0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000, + 0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000, + 0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000, + 0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000, + 0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000, + 0x3688267d00000000, 0x9718319500000000, 0x35af787600000000, + 0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000, + 0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000, + 0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000, + 0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000, + 0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000, + 0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000, + 0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000, + 0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000, + 0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000, + 0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000, + 0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000, + 0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000, + 0xa201313d00000000}, + {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000, + 0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000, + 0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000, + 0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000, + 0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000, + 0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000, + 0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000, + 0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000, + 0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000, + 0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000, + 0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000, + 0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000, + 0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000, + 0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000, + 0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000, + 0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000, + 0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000, + 0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000, + 0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000, + 0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000, + 0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000, + 0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000, + 0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000, + 0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000, + 0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000, + 0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000, + 0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000, + 0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000, + 0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000, + 0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000, + 0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000, + 0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000, + 0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000, + 0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000, + 0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000, + 0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000, + 0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000, + 0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000, + 0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000, + 0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000, + 0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000, + 0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000, + 0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000, + 0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000, + 0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000, + 0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000, + 0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000, + 0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000, + 0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000, + 0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000, + 0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000, + 0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000, + 0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000, + 0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000, + 0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000, + 0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000, + 0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000, + 0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000, + 0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000, + 0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000, + 0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000, + 0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000, + 0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000, + 0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000, + 0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000, + 0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000, + 0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000, + 0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000, + 0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000, + 0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000, + 0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000, + 0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000, + 0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000, + 0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000, + 0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000, + 0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000, + 0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000, + 0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000, + 0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000, + 0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000, + 0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000, + 0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000, + 0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000, + 0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000, + 0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000, + 0xc5f3e53800000000}, + {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000, + 0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000, + 0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000, + 0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000, + 0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000, + 0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000, + 0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000, + 0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000, + 0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000, + 0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000, + 0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000, + 0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000, + 0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000, + 0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000, + 0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000, + 0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000, + 0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000, + 0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000, + 0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000, + 0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000, + 0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000, + 0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000, + 0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000, + 0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000, + 0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000, + 0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000, + 0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000, + 0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000, + 0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000, + 0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000, + 0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000, + 0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000, + 0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000, + 0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000, + 0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000, + 0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000, + 0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000, + 0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000, + 0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000, + 0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000, + 0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000, + 0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000, + 0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000, + 0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000, + 0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000, + 0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000, + 0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000, + 0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000, + 0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000, + 0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000, + 0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000, + 0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000, + 0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000, + 0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000, + 0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000, + 0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000, + 0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000, + 0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000, + 0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000, + 0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000, + 0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000, + 0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000, + 0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000, + 0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000, + 0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000, + 0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000, + 0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000, + 0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000, + 0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000, + 0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000, + 0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000, + 0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000, + 0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000, + 0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000, + 0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000, + 0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000, + 0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000, + 0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000, + 0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000, + 0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000, + 0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000, + 0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000, + 0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000, + 0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000, + 0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000, + 0xccd336eb00000000}, + {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000, + 0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000, + 0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000, + 0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000, + 0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000, + 0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000, + 0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000, + 0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000, + 0xb249204500000000, 0xd071086f00000000, 0x7639701100000000, + 0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000, + 0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000, + 0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000, + 0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000, + 0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000, + 0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000, + 0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000, + 0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000, + 0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000, + 0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000, + 0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000, + 0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000, + 0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000, + 0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000, + 0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000, + 0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000, + 0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000, + 0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000, + 0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000, + 0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000, + 0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000, + 0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000, + 0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000, + 0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000, + 0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000, + 0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000, + 0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000, + 0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000, + 0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000, + 0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000, + 0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000, + 0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000, + 0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000, + 0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000, + 0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000, + 0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000, + 0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000, + 0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000, + 0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000, + 0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000, + 0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000, + 0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000, + 0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000, + 0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000, + 0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000, + 0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000, + 0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000, + 0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000, + 0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000, + 0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000, + 0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000, + 0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000, + 0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000, + 0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000, + 0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000, + 0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000, + 0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000, + 0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000, + 0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000, + 0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000, + 0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000, + 0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000, + 0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000, + 0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000, + 0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000, + 0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000, + 0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000, + 0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000, + 0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000, + 0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000, + 0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000, + 0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000, + 0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000, + 0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000, + 0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000, + 0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000, + 0x48b8399200000000}, + {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000, + 0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000, + 0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000, + 0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000, + 0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000, + 0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000, + 0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000, + 0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000, + 0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000, + 0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000, + 0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000, + 0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000, + 0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000, + 0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000, + 0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000, + 0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000, + 0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000, + 0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000, + 0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000, + 0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000, + 0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000, + 0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000, + 0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000, + 0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000, + 0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000, + 0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000, + 0xb521428400000000, 0xf909d42700000000, 0x762efede00000000, + 0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000, + 0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000, + 0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000, + 0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000, + 0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000, + 0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000, + 0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000, + 0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000, + 0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000, + 0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000, + 0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000, + 0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000, + 0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000, + 0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000, + 0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000, + 0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000, + 0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000, + 0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000, + 0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000, + 0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000, + 0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000, + 0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000, + 0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000, + 0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000, + 0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000, + 0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000, + 0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000, + 0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000, + 0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000, + 0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000, + 0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000, + 0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000, + 0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000, + 0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000, + 0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000, + 0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000, + 0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000, + 0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000, + 0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000, + 0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000, + 0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000, + 0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000, + 0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000, + 0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000, + 0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000, + 0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000, + 0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000, + 0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000, + 0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000, + 0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000, + 0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000, + 0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000, + 0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000, + 0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000, + 0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000, + 0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000, + 0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000, + 0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000, + 0x1af0957800000000}, + {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000, + 0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000, + 0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000, + 0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000, + 0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000, + 0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000, + 0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000, + 0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000, + 0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000, + 0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000, + 0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000, + 0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000, + 0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000, + 0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000, + 0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000, + 0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000, + 0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000, + 0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000, + 0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000, + 0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000, + 0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000, + 0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000, + 0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000, + 0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000, + 0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000, + 0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000, + 0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000, + 0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000, + 0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000, + 0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000, + 0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000, + 0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000, + 0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000, + 0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000, + 0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000, + 0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000, + 0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000, + 0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000, + 0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000, + 0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000, + 0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000, + 0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000, + 0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000, + 0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000, + 0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000, + 0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000, + 0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000, + 0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000, + 0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000, + 0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000, + 0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000, + 0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000, + 0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000, + 0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000, + 0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000, + 0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000, + 0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000, + 0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000, + 0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000, + 0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000, + 0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000, + 0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000, + 0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000, + 0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000, + 0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000, + 0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000, + 0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000, + 0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000, + 0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000, + 0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000, + 0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000, + 0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000, + 0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000, + 0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000, + 0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000, + 0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000, + 0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000, + 0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000, + 0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000, + 0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000, + 0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000, + 0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000, + 0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000, + 0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000, + 0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000, + 0x8a6c1afd00000000}, + {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000, + 0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000, + 0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000, + 0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000, + 0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000, + 0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000, + 0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000, + 0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000, + 0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000, + 0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000, + 0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000, + 0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000, + 0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000, + 0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000, + 0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000, + 0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000, + 0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000, + 0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000, + 0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000, + 0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000, + 0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000, + 0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000, + 0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000, + 0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000, + 0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000, + 0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000, + 0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000, + 0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000, + 0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000, + 0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000, + 0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000, + 0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000, + 0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000, + 0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000, + 0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000, + 0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000, + 0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000, + 0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000, + 0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000, + 0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000, + 0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000, + 0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000, + 0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000, + 0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000, + 0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000, + 0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000, + 0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000, + 0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000, + 0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000, + 0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000, + 0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000, + 0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000, + 0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000, + 0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000, + 0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000, + 0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000, + 0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000, + 0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000, + 0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000, + 0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000, + 0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000, + 0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000, + 0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000, + 0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000, + 0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000, + 0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000, + 0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000, + 0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000, + 0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000, + 0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000, + 0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000, + 0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000, + 0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000, + 0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000, + 0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000, + 0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000, + 0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000, + 0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000, + 0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000, + 0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000, + 0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000, + 0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000, + 0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000, + 0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000, + 0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000, + 0x5270900d00000000}, + {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000, + 0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000, + 0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000, + 0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000, + 0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000, + 0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000, + 0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000, + 0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000, + 0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000, + 0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000, + 0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000, + 0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000, + 0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000, + 0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000, + 0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000, + 0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000, + 0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000, + 0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000, + 0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000, + 0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000, + 0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000, + 0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000, + 0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000, + 0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000, + 0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000, + 0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000, + 0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000, + 0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000, + 0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000, + 0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000, + 0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000, + 0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000, + 0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000, + 0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000, + 0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000, + 0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000, + 0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000, + 0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000, + 0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000, + 0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000, + 0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000, + 0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000, + 0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000, + 0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000, + 0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000, + 0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000, + 0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000, + 0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000, + 0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000, + 0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000, + 0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000, + 0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000, + 0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000, + 0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000, + 0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000, + 0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000, + 0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000, + 0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000, + 0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000, + 0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000, + 0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000, + 0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000, + 0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000, + 0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000, + 0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000, + 0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000, + 0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000, + 0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000, + 0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000, + 0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000, + 0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000, + 0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000, + 0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000, + 0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000, + 0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000, + 0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000, + 0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000, + 0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000, + 0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000, + 0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000, + 0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000, + 0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000, + 0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000, + 0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000, + 0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000, + 0xa8a0688500000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912, + 0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba, + 0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3, + 0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30, + 0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e, + 0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3, + 0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73, + 0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe, + 0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0, + 0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643, + 0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a, + 0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082, + 0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4, + 0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279, + 0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735, + 0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8, + 0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad, + 0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05, + 0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c, + 0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718, + 0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46, + 0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb, + 0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc, + 0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41, + 0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f, + 0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad, + 0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4, + 0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c, + 0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779, + 0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4, + 0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8, + 0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235, + 0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7, + 0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f, + 0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476, + 0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195, + 0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb, + 0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46, + 0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622, + 0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af, + 0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1, + 0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12, + 0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b, + 0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3, + 0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51, + 0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc, + 0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90, + 0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d, + 0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708, + 0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0, + 0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9, + 0x48686b56}, + {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c, + 0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae, + 0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb, + 0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90, + 0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410, + 0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b, + 0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6, + 0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed, + 0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d, + 0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036, + 0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953, + 0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1, + 0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca, + 0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781, + 0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d, + 0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416, + 0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f, + 0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd, + 0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8, + 0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b, + 0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb, + 0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0, + 0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5, + 0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e, + 0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e, + 0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558, + 0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d, + 0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf, + 0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6, + 0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad, + 0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971, + 0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a, + 0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b, + 0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969, + 0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c, + 0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57, + 0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7, + 0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c, + 0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab, + 0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0, + 0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160, + 0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b, + 0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e, + 0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac, + 0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d, + 0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546, + 0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a, + 0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1, + 0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8, + 0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a, + 0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f, + 0xcaa25178}, + {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00, + 0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b, + 0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed, + 0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777, + 0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01, + 0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a, + 0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef, + 0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74, + 0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002, + 0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498, + 0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee, + 0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75, + 0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05, + 0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e, + 0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8, + 0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73, + 0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404, + 0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f, + 0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9, + 0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71, + 0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607, + 0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c, + 0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb, + 0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470, + 0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806, + 0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790, + 0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6, + 0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d, + 0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a, + 0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991, + 0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7, + 0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c, + 0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09, + 0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92, + 0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4, + 0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e, + 0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08, + 0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593, + 0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3, + 0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778, + 0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e, + 0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94, + 0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2, + 0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079, + 0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c, + 0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497, + 0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1, + 0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a, + 0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d, + 0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396, + 0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0, + 0x0c7ac97b}, + {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669, + 0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853, + 0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062, + 0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527, + 0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad, + 0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545, + 0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27, + 0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf, + 0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45, + 0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800, + 0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031, + 0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b, + 0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26, + 0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce, + 0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d, + 0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5, + 0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130, + 0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a, + 0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b, + 0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480, + 0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a, + 0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2, + 0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e, + 0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996, + 0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c, + 0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc, + 0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd, + 0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7, + 0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232, + 0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da, + 0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439, + 0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1, + 0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da, + 0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0, + 0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1, + 0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94, + 0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e, + 0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6, + 0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2, + 0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a, + 0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0, + 0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95, + 0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4, + 0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e, + 0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395, + 0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d, + 0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e, + 0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676, + 0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83, + 0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9, + 0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888, + 0x5185cd09}}; + +#endif + +#endif + +#endif + +local const z_crc_t FAR x2n_table[] = { + 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, + 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, + 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, + 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, + 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, + 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, + 0xc40ba6d0, 0xc4e22c3c}; diff --git a/3rdparty/zlib/deflate.c b/3rdparty/zlib/deflate.c index 1ec761448d..799fb93cc0 100644 --- a/3rdparty/zlib/deflate.c +++ b/3rdparty/zlib/deflate.c @@ -1,5 +1,5 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -52,7 +52,7 @@ #include "deflate.h" const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; + " deflate 1.2.12 Copyright 1995-2022 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -190,8 +190,11 @@ local const config configuration_table[10] = { * prev[] will be initialized on the fly. */ #define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + do { \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, \ + (unsigned)(s->hash_size-1)*sizeof(*s->head)); \ + } while (0) /* =========================================================================== * Slide the hash table when sliding the window down (could be avoided with 32 @@ -252,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, int wrap = 1; static const char my_version[] = ZLIB_VERSION; - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { return Z_VERSION_ERROR; @@ -326,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + /* We overlay pending_buf and sym_buf. This works since the average size + * for length/distance pairs over any compressed block is assured to be 31 + * bits or less. + * + * Analysis: The longest fixed codes are a length code of 8 bits plus 5 + * extra bits, for lengths 131 to 257. The longest fixed distance codes are + * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest + * possible fixed-codes length/distance pair is then 31 bits total. + * + * sym_buf starts one-fourth of the way into pending_buf. So there are + * three bytes in sym_buf for every four bytes in pending_buf. Each symbol + * in sym_buf is three bytes -- two for the distance and one for the + * literal/length. As each symbol is consumed, the pointer to the next + * sym_buf value to read moves forward three bytes. From that symbol, up to + * 31 bits are written to pending_buf. The closest the written pending_buf + * bits gets to the next sym_buf symbol to read is just before the last + * code is written. At that time, 31*(n-2) bits have been written, just + * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at + * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 + * symbols are written.) The closest the writing gets to what is unread is + * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and + * can range from 128 to 32768. + * + * Therefore, at a minimum, there are 142 bits of space between what is + * written and what is read in the overlain buffers, so the symbols cannot + * be overwritten by the compressed data. That space is actually 139 bits, + * due to the three-bit fixed-code block header. + * + * That covers the case where either Z_FIXED is specified, forcing fixed + * codes, or when the use of fixed codes is chosen, because that choice + * results in a smaller compressed block than dynamic codes. That latter + * condition then assures that the above analysis also covers all dynamic + * blocks. A dynamic-code block will only be chosen to be emitted if it has + * fewer bits than a fixed-code block would for the same set of symbols. + * Therefore its average symbol length is assured to be less than 31. So + * the compressed data for a dynamic block also cannot overwrite the + * symbols from which it is being constructed. + */ + + s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); + s->pending_buf_size = (ulg)s->lit_bufsize * 4; if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) { @@ -337,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, deflateEnd (strm); return Z_MEM_ERROR; } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + s->sym_buf = s->pending_buf + s->lit_bufsize; + s->sym_end = (s->lit_bufsize - 1) * 3; + /* We avoid equality with lit_bufsize*3 because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ s->level = level; s->strategy = strategy; @@ -488,13 +528,13 @@ int ZEXPORT deflateResetKeep (strm) #ifdef GZIP s->wrap == 2 ? GZIP_STATE : #endif - s->wrap ? INIT_STATE : BUSY_STATE; + INIT_STATE; strm->adler = #ifdef GZIP s->wrap == 2 ? crc32(0L, Z_NULL, 0) : #endif adler32(0L, Z_NULL, 0); - s->last_flush = Z_NO_FLUSH; + s->last_flush = -2; _tr_init(s); @@ -549,7 +589,8 @@ int ZEXPORT deflatePrime (strm, bits, value) if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; - if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + if (bits < 0 || bits > 16 || + s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR; do { put = Buf_size - s->bi_valid; @@ -587,12 +628,12 @@ int ZEXPORT deflateParams(strm, level, strategy) func = configuration_table[s->level].func; if ((strategy != s->strategy || func != configuration_table[level].func) && - s->high_water) { + s->last_flush != -2) { /* Flush the last buffer: */ int err = deflate(strm, Z_BLOCK); if (err == Z_STREAM_ERROR) return err; - if (strm->avail_out == 0) + if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) return Z_BUF_ERROR; } if (s->level != level) { @@ -811,6 +852,8 @@ int ZEXPORT deflate (strm, flush) } /* Write the header */ + if (s->status == INIT_STATE && s->wrap == 0) + s->status = BUSY_STATE; if (s->status == INIT_STATE) { /* zlib header */ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; @@ -1108,7 +1151,6 @@ int ZEXPORT deflateCopy (dest, source) #else deflate_state *ds; deflate_state *ss; - ushf *overlay; if (deflateStateCheck(source) || dest == Z_NULL) { @@ -1128,8 +1170,7 @@ int ZEXPORT deflateCopy (dest, source) ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; + ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ds->pending_buf == Z_NULL) { @@ -1143,8 +1184,7 @@ int ZEXPORT deflateCopy (dest, source) zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + ds->sym_buf = ds->pending_buf + ds->lit_bufsize; ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; @@ -1513,6 +1553,8 @@ local void fill_window(s) s->match_start -= wsize; s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ s->block_start -= (long) wsize; + if (s->insert > s->strstart) + s->insert = s->strstart; slide_hash(s); more += wsize; } @@ -1742,6 +1784,7 @@ local block_state deflate_stored(s, flush) s->matches = 2; /* clear hash */ zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); s->strstart = s->w_size; + s->insert = s->strstart; } else { if (s->window_size - s->strstart <= used) { @@ -1750,12 +1793,14 @@ local block_state deflate_stored(s, flush) zmemcpy(s->window, s->window + s->w_size, s->strstart); if (s->matches < 2) s->matches++; /* add a pending slide_hash() */ + if (s->insert > s->strstart) + s->insert = s->strstart; } zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); s->strstart += used; + s->insert += MIN(used, s->w_size - s->insert); } s->block_start = s->strstart; - s->insert += MIN(used, s->w_size - s->insert); } if (s->high_water < s->strstart) s->high_water = s->strstart; @@ -1770,7 +1815,7 @@ local block_state deflate_stored(s, flush) return block_done; /* Fill the window with any remaining input. */ - have = s->window_size - s->strstart - 1; + have = s->window_size - s->strstart; if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { /* Slide the window down. */ s->block_start -= s->w_size; @@ -1779,12 +1824,15 @@ local block_state deflate_stored(s, flush) if (s->matches < 2) s->matches++; /* add a pending slide_hash() */ have += s->w_size; /* more space now */ + if (s->insert > s->strstart) + s->insert = s->strstart; } if (have > s->strm->avail_in) have = s->strm->avail_in; if (have) { read_buf(s->strm, s->window + s->strstart, have); s->strstart += have; + s->insert += MIN(have, s->w_size - s->insert); } if (s->high_water < s->strstart) s->high_water = s->strstart; @@ -1912,7 +1960,7 @@ local block_state deflate_fast(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2043,7 +2091,7 @@ local block_state deflate_slow(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2118,7 +2166,7 @@ local block_state deflate_rle(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2157,7 +2205,7 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } diff --git a/3rdparty/zlib/deflate.h b/3rdparty/zlib/deflate.h index 23ecdd312b..17c226113b 100644 --- a/3rdparty/zlib/deflate.h +++ b/3rdparty/zlib/deflate.h @@ -1,5 +1,5 @@ /* deflate.h -- internal compression state - * Copyright (C) 1995-2016 Jean-loup Gailly + * Copyright (C) 1995-2018 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -217,7 +217,7 @@ typedef struct internal_state { /* Depth of each subtree used as tie breaker for trees of equal frequency */ - uchf *l_buf; /* buffer for literals or lengths */ + uchf *sym_buf; /* buffer for distances and literals/lengths */ uInt lit_bufsize; /* Size of match buffer for literals/lengths. There are 4 reasons for @@ -239,13 +239,8 @@ typedef struct internal_state { * - I can't count above 4 */ - uInt last_lit; /* running index in l_buf */ - - ushf *d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ + uInt sym_next; /* running index in sym_buf */ + uInt sym_end; /* symbol table full when sym_next reaches this */ ulg opt_len; /* bit length of current block with optimal trees */ ulg static_len; /* bit length of current block with static trees */ @@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, # define _tr_tally_lit(s, c, flush) \ { uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = cc; \ s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } # define _tr_tally_dist(s, distance, length, flush) \ { uch len = (uch)(length); \ ush dist = (ush)(distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ + s->sym_buf[s->sym_next++] = dist; \ + s->sym_buf[s->sym_next++] = dist >> 8; \ + s->sym_buf[s->sym_next++] = len; \ dist--; \ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } #else # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) diff --git a/3rdparty/zlib/gzguts.h b/3rdparty/zlib/gzguts.h index 990a4d2514..57faf37165 100644 --- a/3rdparty/zlib/gzguts.h +++ b/3rdparty/zlib/gzguts.h @@ -1,5 +1,5 @@ /* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -39,7 +39,7 @@ # include #endif -#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(_WIN32) # define WIDECHAR #endif @@ -190,6 +190,7 @@ typedef struct { /* just for writing */ int level; /* compression level */ int strategy; /* compression strategy */ + int reset; /* true if a reset is pending after a Z_FINISH */ /* seek request */ z_off64_t skip; /* amount to skip (already rewound if backwards) */ int seek; /* true if seek request pending */ diff --git a/3rdparty/zlib/gzlib.c b/3rdparty/zlib/gzlib.c index 4105e6aff9..dddaf26873 100644 --- a/3rdparty/zlib/gzlib.c +++ b/3rdparty/zlib/gzlib.c @@ -1,11 +1,11 @@ /* gzlib.c -- zlib functions common to reading and writing gzip files - * Copyright (C) 2004-2017 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ #include "gzguts.h" -#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__) +#if defined(_WIN32) && !defined(__BORLANDC__) # define LSEEK _lseeki64 #else #if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 @@ -81,6 +81,8 @@ local void gz_reset(state) state->past = 0; /* have not read past end yet */ state->how = LOOK; /* look for gzip header */ } + else /* for writing ... */ + state->reset = 0; /* no deflateReset pending */ state->seek = 0; /* no seek request pending */ gz_error(state, Z_OK, NULL); /* clear error */ state->x.pos = 0; /* no uncompressed data yet */ @@ -397,7 +399,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence) /* if within raw area while reading, just go there */ if (state->mode == GZ_READ && state->how == COPY && state->x.pos + offset >= 0) { - ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR); if (ret == -1) return -1; state->x.have = 0; diff --git a/3rdparty/zlib/gzread.c b/3rdparty/zlib/gzread.c index e75cae64dc..884c9bfe4c 100644 --- a/3rdparty/zlib/gzread.c +++ b/3rdparty/zlib/gzread.c @@ -1,5 +1,5 @@ /* gzread.c -- zlib functions for reading gzip files - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * Copyright (C) 2004-2017 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -314,9 +314,9 @@ local z_size_t gz_read(state, buf, len) got = 0; do { /* set n to the maximum amount of len that fits in an unsigned int */ - n = -1; + n = (unsigned)-1; if (n > len) - n = (int)len; + n = (unsigned)len; /* first just try copying data from the output buffer */ if (state->x.have) { @@ -397,7 +397,7 @@ int ZEXPORT gzread(file, buf, len) } /* read len or fewer bytes to buf */ - len = (int)gz_read(state, buf, len); + len = (unsigned)gz_read(state, buf, len); /* check for an error */ if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) @@ -447,7 +447,6 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file) int ZEXPORT gzgetc(file) gzFile file; { - int ret; unsigned char buf[1]; gz_statep state; @@ -469,8 +468,7 @@ int ZEXPORT gzgetc(file) } /* nothing there -- try gz_read() */ - ret = (int)gz_read(state, buf, 1); - return ret < 1 ? -1 : buf[0]; + return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; } int ZEXPORT gzgetc_(file) diff --git a/3rdparty/zlib/gzwrite.c b/3rdparty/zlib/gzwrite.c index 57a1cc70c8..a8ffc8f53d 100644 --- a/3rdparty/zlib/gzwrite.c +++ b/3rdparty/zlib/gzwrite.c @@ -1,5 +1,5 @@ /* gzwrite.c -- zlib functions for writing gzip files - * Copyright (C) 2004-2017 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -97,6 +97,15 @@ local int gz_comp(state, flush) return 0; } + /* check for a pending reset */ + if (state->reset) { + /* don't start a new gzip member unless there is data to write */ + if (strm->avail_in == 0) + return 0; + deflateReset(strm); + state->reset = 0; + } + /* run deflate() on provided input until it produces no more output */ ret = Z_OK; do { @@ -134,7 +143,7 @@ local int gz_comp(state, flush) /* if that completed a deflate stream, allow another to start */ if (flush == Z_FINISH) - deflateReset(strm); + state->reset = 1; /* all done, no errors */ return 0; @@ -209,7 +218,7 @@ local z_size_t gz_write(state, buf, len) state->in); copy = state->size - have; if (copy > len) - copy = (int)len; + copy = (unsigned)len; memcpy(state->in + have, buf, copy); state->strm.avail_in += copy; state->x.pos += copy; @@ -229,7 +238,7 @@ local z_size_t gz_write(state, buf, len) do { unsigned n = (unsigned)-1; if (n > len) - n = (int)len; + n = (unsigned)len; state->strm.avail_in = n; state->x.pos += n; if (gz_comp(state, Z_NO_FLUSH) == -1) @@ -349,12 +358,11 @@ int ZEXPORT gzputc(file, c) } /* -- see zlib.h -- */ -int ZEXPORT gzputs(file, str) +int ZEXPORT gzputs(file, s) gzFile file; - const char *str; + const char *s; { - int ret; - z_size_t len; + z_size_t len, put; gz_statep state; /* get internal structure */ @@ -367,9 +375,13 @@ int ZEXPORT gzputs(file, str) return -1; /* write string */ - len = strlen(str); - ret = (int)gz_write(state, str, len); - return ret == 0 && len != 0 ? -1 : ret; + len = strlen(s); + if ((int)len < 0 || (unsigned)len != len) { + gz_error(state, Z_STREAM_ERROR, "string length does not fit in int"); + return -1; + } + put = gz_write(state, s, len); + return put < len ? -1 : (int)len; } #if defined(STDC) || defined(Z_HAVE_STDARG_H) @@ -441,7 +453,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) strm->avail_in = state->size; if (gz_comp(state, Z_NO_FLUSH) == -1) return state->err; - memcpy(state->in, state->in + state->size, left); + memmove(state->in, state->in + state->size, left); strm->next_in = state->in; strm->avail_in = left; } @@ -540,7 +552,7 @@ int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, strm->avail_in = state->size; if (gz_comp(state, Z_NO_FLUSH) == -1) return state->err; - memcpy(state->in, state->in + state->size, left); + memmove(state->in, state->in + state->size, left); strm->next_in = state->in; strm->avail_in = left; } diff --git a/3rdparty/zlib/infback.c b/3rdparty/zlib/infback.c index 59679ecbfc..a390c58e81 100644 --- a/3rdparty/zlib/infback.c +++ b/3rdparty/zlib/infback.c @@ -1,5 +1,5 @@ /* infback.c -- inflate using a call-back interface - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -477,6 +477,7 @@ void FAR *out_desc; } Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN; + /* fallthrough */ case LEN: /* use inflate_fast() if we have enough input and output */ diff --git a/3rdparty/zlib/inffast.c b/3rdparty/zlib/inffast.c index 0dbd1dbc09..1fec7f363f 100644 --- a/3rdparty/zlib/inffast.c +++ b/3rdparty/zlib/inffast.c @@ -70,7 +70,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ + code const *here; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -107,20 +107,20 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(*in++) << bits; bits += 8; } - here = lcode[hold & lmask]; + here = lcode + (hold & lmask); dolen: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); + "inflate: literal 0x%02x\n", here->val)); + *out++ = (unsigned char)(here->val); } else if (op & 16) { /* length base */ - len = (unsigned)(here.val); + len = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { @@ -138,14 +138,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(*in++) << bits; bits += 8; } - here = dcode[hold & dmask]; + here = dcode + (hold & dmask); dodist: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); + dist = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (bits < op) { hold += (unsigned long)(*in++) << bits; @@ -264,7 +264,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; + here = dcode + here->val + (hold & ((1U << op) - 1)); goto dodist; } else { @@ -274,7 +274,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; + here = lcode + here->val + (hold & ((1U << op) - 1)); goto dolen; } else if (op & 32) { /* end-of-block */ diff --git a/3rdparty/zlib/inflate.c b/3rdparty/zlib/inflate.c index 19a2cf2ed8..0e7c4f26b1 100644 --- a/3rdparty/zlib/inflate.c +++ b/3rdparty/zlib/inflate.c @@ -1,5 +1,5 @@ /* inflate.c -- zlib decompression - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -130,6 +130,7 @@ z_streamp strm; state->mode = HEAD; state->last = 0; state->havedict = 0; + state->flags = -1; state->dmax = 32768U; state->head = Z_NULL; state->hold = 0; @@ -448,10 +449,10 @@ unsigned copy; /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP -# define UPDATE(check, buf, len) \ +# define UPDATE_CHECK(check, buf, len) \ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) #else -# define UPDATE(check, buf, len) adler32(check, buf, len) +# define UPDATE_CHECK(check, buf, len) adler32(check, buf, len) #endif /* check macros for header crc */ @@ -671,7 +672,6 @@ int flush; state->mode = FLAGS; break; } - state->flags = 0; /* expect zlib header */ if (state->head != Z_NULL) state->head->done = -1; if (!(state->wrap & 1) || /* check if zlib header allowed */ @@ -698,6 +698,7 @@ int flush; break; } state->dmax = 1U << len; + state->flags = 0; /* indicate zlib header */ Tracev((stderr, "inflate: zlib header ok\n")); strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = hold & 0x200 ? DICTID : TYPE; @@ -723,6 +724,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = TIME; + /* fallthrough */ case TIME: NEEDBITS(32); if (state->head != Z_NULL) @@ -731,6 +733,7 @@ int flush; CRC4(state->check, hold); INITBITS(); state->mode = OS; + /* fallthrough */ case OS: NEEDBITS(16); if (state->head != Z_NULL) { @@ -741,6 +744,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = EXLEN; + /* fallthrough */ case EXLEN: if (state->flags & 0x0400) { NEEDBITS(16); @@ -754,6 +758,7 @@ int flush; else if (state->head != Z_NULL) state->head->extra = Z_NULL; state->mode = EXTRA; + /* fallthrough */ case EXTRA: if (state->flags & 0x0400) { copy = state->length; @@ -776,6 +781,7 @@ int flush; } state->length = 0; state->mode = NAME; + /* fallthrough */ case NAME: if (state->flags & 0x0800) { if (have == 0) goto inf_leave; @@ -797,6 +803,7 @@ int flush; state->head->name = Z_NULL; state->length = 0; state->mode = COMMENT; + /* fallthrough */ case COMMENT: if (state->flags & 0x1000) { if (have == 0) goto inf_leave; @@ -817,6 +824,7 @@ int flush; else if (state->head != Z_NULL) state->head->comment = Z_NULL; state->mode = HCRC; + /* fallthrough */ case HCRC: if (state->flags & 0x0200) { NEEDBITS(16); @@ -840,6 +848,7 @@ int flush; strm->adler = state->check = ZSWAP32(hold); INITBITS(); state->mode = DICT; + /* fallthrough */ case DICT: if (state->havedict == 0) { RESTORE(); @@ -847,8 +856,10 @@ int flush; } strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = TYPE; + /* fallthrough */ case TYPE: if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ case TYPEDO: if (state->last) { BYTEBITS(); @@ -899,8 +910,10 @@ int flush; INITBITS(); state->mode = COPY_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case COPY_: state->mode = COPY; + /* fallthrough */ case COPY: copy = state->length; if (copy) { @@ -936,6 +949,7 @@ int flush; Tracev((stderr, "inflate: table sizes ok\n")); state->have = 0; state->mode = LENLENS; + /* fallthrough */ case LENLENS: while (state->have < state->ncode) { NEEDBITS(3); @@ -957,6 +971,7 @@ int flush; Tracev((stderr, "inflate: code lengths ok\n")); state->have = 0; state->mode = CODELENS; + /* fallthrough */ case CODELENS: while (state->have < state->nlen + state->ndist) { for (;;) { @@ -1040,8 +1055,10 @@ int flush; Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case LEN_: state->mode = LEN; + /* fallthrough */ case LEN: if (have >= 6 && left >= 258) { RESTORE(); @@ -1091,6 +1108,7 @@ int flush; } state->extra = (unsigned)(here.op) & 15; state->mode = LENEXT; + /* fallthrough */ case LENEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1101,6 +1119,7 @@ int flush; Tracevv((stderr, "inflate: length %u\n", state->length)); state->was = state->length; state->mode = DIST; + /* fallthrough */ case DIST: for (;;) { here = state->distcode[BITS(state->distbits)]; @@ -1128,6 +1147,7 @@ int flush; state->offset = (unsigned)here.val; state->extra = (unsigned)(here.op) & 15; state->mode = DISTEXT; + /* fallthrough */ case DISTEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1144,6 +1164,7 @@ int flush; #endif Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; + /* fallthrough */ case MATCH: if (left == 0) goto inf_leave; copy = out - left; @@ -1203,7 +1224,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, put - out, out); + UPDATE_CHECK(state->check, put - out, out); out = left; if ((state->wrap & 4) && ( #ifdef GUNZIP @@ -1219,10 +1240,11 @@ int flush; } #ifdef GUNZIP state->mode = LENGTH; + /* fallthrough */ case LENGTH: if (state->wrap && state->flags) { NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { + if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) { strm->msg = (char *)"incorrect length check"; state->mode = BAD; break; @@ -1232,6 +1254,7 @@ int flush; } #endif state->mode = DONE; + /* fallthrough */ case DONE: ret = Z_STREAM_END; goto inf_leave; @@ -1241,6 +1264,7 @@ int flush; case MEM: return Z_MEM_ERROR; case SYNC: + /* fallthrough */ default: return Z_STREAM_ERROR; } @@ -1266,7 +1290,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); + UPDATE_CHECK(state->check, strm->next_out - out, out); strm->data_type = (int)state->bits + (state->last ? 64 : 0) + (state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); @@ -1402,6 +1426,7 @@ int ZEXPORT inflateSync(strm) z_streamp strm; { unsigned len; /* number of bytes to look at or looked at */ + int flags; /* temporary to save header status */ unsigned long in, out; /* temporary to save total_in and total_out */ unsigned char buf[4]; /* to restore bit buffer to byte string */ struct inflate_state FAR *state; @@ -1434,9 +1459,15 @@ z_streamp strm; /* return no joy or set up to restart inflate() on a new block */ if (state->have != 4) return Z_DATA_ERROR; + if (state->flags == -1) + state->wrap = 0; /* if no header yet, treat as raw */ + else + state->wrap &= ~4; /* no point in computing a check value now */ + flags = state->flags; in = strm->total_in; out = strm->total_out; inflateReset(strm); strm->total_in = in; strm->total_out = out; + state->flags = flags; state->mode = TYPE; return Z_OK; } @@ -1532,7 +1563,7 @@ int check; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (check) + if (check && state->wrap) state->wrap |= 4; else state->wrap &= ~4; diff --git a/3rdparty/zlib/inflate.h b/3rdparty/zlib/inflate.h index a46cce6b6d..f127b6b1fa 100644 --- a/3rdparty/zlib/inflate.h +++ b/3rdparty/zlib/inflate.h @@ -1,5 +1,5 @@ /* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -86,7 +86,8 @@ struct inflate_state { int wrap; /* bit 0 true for zlib, bit 1 true for gzip, bit 2 true to validate check value */ int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ + int flags; /* gzip header method and flags, 0 if zlib, or + -1 if raw or no header yet */ unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ unsigned long check; /* protected copy of check value */ unsigned long total; /* protected copy of output count */ diff --git a/3rdparty/zlib/inftrees.c b/3rdparty/zlib/inftrees.c index 2ea08fc13e..09462a740b 100644 --- a/3rdparty/zlib/inftrees.c +++ b/3rdparty/zlib/inftrees.c @@ -1,5 +1,5 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2017 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -9,7 +9,7 @@ #define MAXBITS 15 const char inflate_copyright[] = - " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; + " inflate 1.2.12 Copyright 1995-2022 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -62,7 +62,7 @@ unsigned short FAR *work; 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202}; static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, diff --git a/3rdparty/zlib/trees.c b/3rdparty/zlib/trees.c index 50cf4b4571..f73fd99c37 100644 --- a/3rdparty/zlib/trees.c +++ b/3rdparty/zlib/trees.c @@ -1,5 +1,5 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2017 Jean-loup Gailly + * Copyright (C) 1995-2021 Jean-loup Gailly * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -149,7 +149,7 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, local void compress_block OF((deflate_state *s, const ct_data *ltree, const ct_data *dtree)); local int detect_data_type OF((deflate_state *s)); -local unsigned bi_reverse OF((unsigned value, int length)); +local unsigned bi_reverse OF((unsigned code, int len)); local void bi_windup OF((deflate_state *s)); local void bi_flush OF((deflate_state *s)); @@ -416,7 +416,7 @@ local void init_block(s) s->dyn_ltree[END_BLOCK].Freq = 1; s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; + s->sym_next = s->matches = 0; } #define SMALLEST 1 @@ -870,7 +870,8 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) bi_windup(s); /* align on byte boundary */ put_short(s, (ush)stored_len); put_short(s, (ush)~stored_len); - zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + if (stored_len) + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); s->pending += stored_len; #ifdef ZLIB_DEBUG s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; @@ -947,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); + s->sym_next / 3)); if (static_lenb <= opt_lenb) opt_lenb = static_lenb; @@ -1016,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) unsigned dist; /* distance of matched string */ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; + s->sym_buf[s->sym_next++] = dist; + s->sym_buf[s->sym_next++] = dist >> 8; + s->sym_buf[s->sym_next++] = lc; if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; @@ -1032,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ + return (s->sym_next == s->sym_end); } /* =========================================================================== @@ -1068,13 +1047,14 @@ local void compress_block(s, ltree, dtree) { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ + unsigned sx = 0; /* running index in sym_buf */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; + if (s->sym_next != 0) do { + dist = s->sym_buf[sx++] & 0xff; + dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; + lc = s->sym_buf[sx++]; if (dist == 0) { send_code(s, lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); @@ -1099,11 +1079,10 @@ local void compress_block(s, ltree, dtree) } } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - "pendingBuf overflow"); + /* Check that the overlay between pending_buf and sym_buf is ok: */ + Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - } while (lx < s->last_lit); + } while (sx < s->sym_next); send_code(s, END_BLOCK, ltree); } @@ -1112,9 +1091,9 @@ local void compress_block(s, ltree, dtree) * Check if the data type is TEXT or BINARY, using the following algorithm: * - TEXT if the two conditions below are satisfied: * a) There are no non-portable control characters belonging to the - * "black list" (0..6, 14..25, 28..31). + * "block list" (0..6, 14..25, 28..31). * b) There is at least one printable character belonging to the - * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). * - BINARY otherwise. * - The following partially-portable control characters form a * "gray list" that is ignored in this detection algorithm: @@ -1124,19 +1103,19 @@ local void compress_block(s, ltree, dtree) local int detect_data_type(s) deflate_state *s; { - /* black_mask is the bit mask of black-listed bytes + /* block_mask is the bit mask of block-listed bytes * set bits 0..6, 14..25, and 28..31 * 0xf3ffc07f = binary 11110011111111111100000001111111 */ - unsigned long black_mask = 0xf3ffc07fUL; + unsigned long block_mask = 0xf3ffc07fUL; int n; - /* Check for non-textual ("black-listed") bytes. */ - for (n = 0; n <= 31; n++, black_mask >>= 1) - if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + /* Check for non-textual ("block-listed") bytes. */ + for (n = 0; n <= 31; n++, block_mask >>= 1) + if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) return Z_BINARY; - /* Check for textual ("white-listed") bytes. */ + /* Check for textual ("allow-listed") bytes. */ if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 || s->dyn_ltree[13].Freq != 0) return Z_TEXT; @@ -1144,7 +1123,7 @@ local int detect_data_type(s) if (s->dyn_ltree[n].Freq != 0) return Z_TEXT; - /* There are no "black-listed" or "white-listed" bytes: + /* There are no "block-listed" or "allow-listed" bytes: * this stream either is empty or has tolerated ("gray-listed") bytes only. */ return Z_BINARY; diff --git a/3rdparty/zlib/zlib.h b/3rdparty/zlib/zlib.h index f09cdaf1e0..4a98e38bf3 100644 --- a/3rdparty/zlib/zlib.h +++ b/3rdparty/zlib/zlib.h @@ -1,7 +1,7 @@ /* zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.11, January 15th, 2017 + version 1.2.12, March 11th, 2022 - Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -37,11 +37,11 @@ extern "C" { #endif -#define ZLIB_VERSION "1.2.11" -#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VERSION "1.2.12" +#define ZLIB_VERNUM 0x12c0 #define ZLIB_VER_MAJOR 1 #define ZLIB_VER_MINOR 2 -#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_REVISION 12 #define ZLIB_VER_SUBREVISION 0 /* @@ -543,8 +543,7 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, int strategy)); This is another version of deflateInit with more compression options. The - fields next_in, zalloc, zfree and opaque must be initialized before by the - caller. + fields zalloc, zfree and opaque must be initialized before by the caller. The method parameter is the compression method. It must be Z_DEFLATED in this version of the library. @@ -712,11 +711,12 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, used to switch between compression and straight copy of the input data, or to switch to a different kind of input data requiring a different strategy. If the compression approach (which is a function of the level) or the - strategy is changed, and if any input has been consumed in a previous - deflate() call, then the input available so far is compressed with the old - level and strategy using deflate(strm, Z_BLOCK). There are three approaches - for the compression levels 0, 1..3, and 4..9 respectively. The new level - and strategy will take effect at the next call of deflate(). + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does not have enough output space to complete, then the parameter change will not @@ -865,9 +865,11 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, detection, or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see - below), inflate() will not automatically decode concatenated gzip streams. - inflate() will return Z_STREAM_END at the end of the gzip stream. The state - would need to be reset to continue decoding a subsequent gzip stream. + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the @@ -1302,14 +1304,14 @@ typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ /* ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); - Opens a gzip (.gz) file for reading or writing. The mode parameter is as - in fopen ("rb" or "wb") but can also include a compression level ("wb9") or - a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only - compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' - for fixed code compression as in "wb9F". (See the description of - deflateInit2 for more information about the strategy parameter.) 'T' will - request transparent writing or appending with no compression and not using - the gzip format. + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. "a" can be used instead of "w" to request that the gzip stream that will be written be appended to the file. "+" will result in an error, since @@ -1339,9 +1341,9 @@ ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); /* - gzdopen associates a gzFile with the file descriptor fd. File descriptors - are obtained from calls like open, dup, creat, pipe or fileno (if the file - has been previously opened with fopen). The mode parameter is as in gzopen. + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. The next call of gzclose on the returned gzFile will also close the file descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor @@ -1362,13 +1364,13 @@ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); /* - Set the internal buffer size used by this library's functions. The - default buffer size is 8192 bytes. This function must be called after - gzopen() or gzdopen(), and before any other calls that read or write the - file. The buffer memory allocation is always deferred to the first read or - write. Three times that size in buffer space is allocated. A larger buffer - size of, for example, 64K or 128K bytes will noticeably increase the speed - of decompression (reading). + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). The new buffer size also affects the maximum length for gzprintf(). @@ -1378,9 +1380,9 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); /* - Dynamically update the compression level or strategy. See the description - of deflateInit2 for the meaning of these parameters. Previously provided - data is flushed before the parameter change. + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not opened for writing, Z_ERRNO if there is an error writing the flushed data, @@ -1389,7 +1391,7 @@ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); /* - Reads the given number of uncompressed bytes from the compressed file. If + Read and decompress up to len uncompressed bytes from file into buf. If the input file is not in gzip format, gzread copies the given number of bytes into the buffer directly from the file. @@ -1420,11 +1422,11 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, gzFile file)); /* - Read up to nitems items of size size from file to buf, otherwise operating - as gzread() does. This duplicates the interface of stdio's fread(), with - size_t request and return types. If the library defines size_t, then - z_size_t is identical to size_t. If not, then z_size_t is an unsigned - integer type that can contain a pointer. + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. gzfread() returns the number of full items read of size size, or zero if the end of the file was reached and a full item could not be read, or if @@ -1443,18 +1445,16 @@ ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, file, reseting and retrying on end-of-file, when size is not 1. */ -ZEXTERN int ZEXPORT gzwrite OF((gzFile file, - voidpc buf, unsigned len)); +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len)); /* - Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of uncompressed bytes written or 0 in case of - error. + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. */ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, z_size_t nitems, gzFile file)); /* - gzfwrite() writes nitems items of size size from buf to file, duplicating + Compress and write nitems items of size size from buf to file, duplicating the interface of stdio's fwrite(), with size_t request and return types. If the library defines size_t, then z_size_t is identical to size_t. If not, then z_size_t is an unsigned integer type that can contain a pointer. @@ -1467,22 +1467,22 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); /* - Converts, formats, and writes the arguments to the compressed file under - control of the format string, as in fprintf. gzprintf returns the number of + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of uncompressed bytes actually written, or a negative zlib error code in case of error. The number of uncompressed bytes written is limited to 8191, or one less than the buffer size given to gzbuffer(). The caller should assure that this limit is not exceeded. If it is exceeded, then gzprintf() will return an error (0) with nothing written. In this case, there may also be a buffer overflow with unpredictable consequences, which is possible only if - zlib was compiled with the insecure functions sprintf() or vsprintf() + zlib was compiled with the insecure functions sprintf() or vsprintf(), because the secure snprintf() or vsnprintf() functions were not available. This can be determined using zlibCompileFlags(). */ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); /* - Writes the given null-terminated string to the compressed file, excluding + Compress and write the given null-terminated string s to file, excluding the terminating null character. gzputs returns the number of characters written, or -1 in case of error. @@ -1490,11 +1490,12 @@ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); /* - Reads bytes from the compressed file until len-1 characters are read, or a - newline character is read and transferred to buf, or an end-of-file - condition is encountered. If any characters are read or if len == 1, the - string is terminated with a null character. If no characters are read due - to an end-of-file or len < 1, then the buffer is left untouched. + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. gzgets returns buf which is a null-terminated string, or it returns NULL for end-of-file or in case of error. If there was an error, the contents at @@ -1503,13 +1504,13 @@ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); /* - Writes c, converted to an unsigned char, into the compressed file. gzputc + Compress and write c, converted to an unsigned char, into file. gzputc returns the value that was written, or -1 in case of error. */ ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); /* - Reads one byte from the compressed file. gzgetc returns this byte or -1 + Read and decompress one byte from file. gzgetc returns this byte or -1 in case of end of file or error. This is implemented as a macro for speed. As such, it does not do all of the checking the other functions do. I.e. it does not check to see if file is NULL, nor whether the structure file @@ -1518,8 +1519,8 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); /* - Push one character back onto the stream to be read as the first character - on the next read. At least one character of push-back is allowed. + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. gzungetc() returns the character pushed, or -1 on failure. gzungetc() will fail if c is -1, and may fail if a character has been pushed but not read yet. If gzungetc is used immediately after gzopen or gzdopen, at least the @@ -1530,9 +1531,9 @@ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); /* - Flushes all pending output into the compressed file. The parameter flush - is as in the deflate() function. The return value is the zlib error number - (see function gzerror below). gzflush is only permitted when writing. + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. If the flush parameter is Z_FINISH, the remaining data is written and the gzip stream is completed in the output. If gzwrite() is called again, a new @@ -1547,8 +1548,8 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, z_off_t offset, int whence)); - Sets the starting position for the next gzread or gzwrite on the given - compressed file. The offset represents a number of bytes in the + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the uncompressed data stream. The whence parameter is defined as in lseek(2); the value SEEK_END is not supported. @@ -1565,18 +1566,18 @@ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); /* - Rewinds the given file. This function is supported only for reading. + Rewind file. This function is supported only for reading. - gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). */ /* ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); - Returns the starting position for the next gzread or gzwrite on the given - compressed file. This position represents a number of bytes in the - uncompressed data stream, and is zero when starting, even if appending or - reading a gzip stream from the middle of a file using gzdopen(). + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) */ @@ -1584,22 +1585,22 @@ ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); /* ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); - Returns the current offset in the file being read or written. This offset - includes the count of bytes that precede the gzip stream, for example when - appending or when using gzdopen() for reading. When reading, the offset - does not include as yet unused buffered input. This information can be used - for a progress indicator. On error, gzoffset() returns -1. + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. */ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); /* - Returns true (1) if the end-of-file indicator has been set while reading, - false (0) otherwise. Note that the end-of-file indicator is set only if the - read tried to go past the end of the input, but came up short. Therefore, - just like feof(), gzeof() may return false even if there is no more data to - read, in the event that the last read request was for the exact number of - bytes remaining in the input file. This will happen if the input file size - is an exact multiple of the buffer size. + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. If gzeof() returns true, then the read functions will return no more data, unless the end-of-file indicator is reset by gzclearerr() and the input file @@ -1608,7 +1609,7 @@ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); /* - Returns true (1) if file is being copied directly while reading, or false + Return true (1) if file is being copied directly while reading, or false (0) if file is a gzip stream being decompressed. If the input file is empty, gzdirect() will return true, since the input @@ -1629,8 +1630,8 @@ ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); ZEXTERN int ZEXPORT gzclose OF((gzFile file)); /* - Flushes all pending output if necessary, closes the compressed file and - deallocates the (de)compression state. Note that once file is closed, you + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you cannot call gzerror with file, since its structures have been deallocated. gzclose must not be called more than once on the same file, just as free must not be called more than once on the same allocation. @@ -1654,10 +1655,10 @@ ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); /* - Returns the error message for the last error which occurred on the given - compressed file. errnum is set to zlib error number. If an error occurred - in the file system and not in the compression library, errnum is set to - Z_ERRNO and the application may consult errno to get the exact error code. + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. The application must not modify the returned string. Future calls to this function may invalidate the previously returned string. If file is @@ -1670,7 +1671,7 @@ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); /* - Clears the error and end-of-file flags for file. This is analogous to the + Clear the error and end-of-file flags for file. This is analogous to the clearerr() function in stdio. This is useful for continuing to read a gzip file that is being written concurrently. */ @@ -1688,8 +1689,9 @@ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); /* Update a running Adler-32 checksum with the bytes buf[0..len-1] and - return the updated checksum. If buf is Z_NULL, this function returns the - required initial value for the checksum. + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed much faster. @@ -1722,12 +1724,13 @@ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, negative, the result has no meaning or utility. */ -ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); /* Update a running CRC-32 with the bytes buf[0..len-1] and return the - updated CRC-32. If buf is Z_NULL, this function returns the required - initial value for the crc. Pre- and post-conditioning (one's complement) is - performed within this function so it shouldn't be done by the application. + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. Usage example: @@ -1739,7 +1742,7 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); if (crc != original_crc) error(); */ -ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, +ZEXTERN uLong ZEXPORT crc32_z OF((uLong crc, const Bytef *buf, z_size_t len)); /* Same as crc32(), but with a size_t length. @@ -1755,6 +1758,20 @@ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); len2. */ +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t len2)); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op)); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + /* various hacks, don't look :) */ @@ -1842,6 +1859,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off64_t)); #endif #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) @@ -1852,6 +1870,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ # define z_gzoffset z_gzoffset64 # define z_adler32_combine z_adler32_combine64 # define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 # else # define gzopen gzopen64 # define gzseek gzseek64 @@ -1859,6 +1878,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ # define gzoffset gzoffset64 # define adler32_combine adler32_combine64 # define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 # endif # ifndef Z_LARGE64 ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); @@ -1867,6 +1887,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t)); # endif #else ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); @@ -1875,12 +1896,14 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t)); #endif #else /* Z_SOLO */ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t)); #endif /* !Z_SOLO */ @@ -1893,7 +1916,7 @@ ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); -#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +#if defined(_WIN32) && !defined(Z_SOLO) ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, const char *mode)); #endif diff --git a/3rdparty/zlib/zutil.c b/3rdparty/zlib/zutil.c index 7271667fd7..dcab28a0d5 100644 --- a/3rdparty/zlib/zutil.c +++ b/3rdparty/zlib/zutil.c @@ -137,7 +137,7 @@ const char * ZEXPORT zError(err) } #if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 - /* The Microsoft C Run-Time Library for Windows CE doesn't have + /* The older Microsoft C Run-Time Library for Windows CE doesn't have * errno. We define it as a global variable to simplify porting. * Its value is always 0 and should not be used. */ diff --git a/3rdparty/zlib/zutil.h b/3rdparty/zlib/zutil.h index 4774fc3235..d9a20ae1bf 100644 --- a/3rdparty/zlib/zutil.h +++ b/3rdparty/zlib/zutil.h @@ -1,5 +1,5 @@ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -29,10 +29,6 @@ # include #endif -#ifdef Z_SOLO - typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ -#endif - #ifndef local # define local static #endif @@ -46,6 +42,17 @@ typedef unsigned short ush; typedef ush FAR ushf; typedef unsigned long ulg; +#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC) +# include +# if (ULONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long +# elif (ULLONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long long +# elif (UINT_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned +# endif +#endif + extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ @@ -169,13 +176,7 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX # if defined(_WIN32_WCE) -# if _WIN32_WCE < 0x800 -# define fdopen(fd,mode) NULL /* No fdopen() */ -# ifndef _PTRDIFF_T_DEFINED - typedef int ptrdiff_t; -# define _PTRDIFF_T_DEFINED -# endif -# endif +# define fdopen(fd,mode) NULL /* No fdopen() */ # else # define fdopen(fd,type) _fdopen(fd,type) # endif From 08d44f588f51c4427966968c8697becffd1063b9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 4 Apr 2022 22:02:40 +0000 Subject: [PATCH 67/84] dnn(test): update OpenVINO tests 2022.1.0 (OpenCV 4.x) --- modules/dnn/test/test_darknet_importer.cpp | 12 +- modules/dnn/test/test_model.cpp | 107 +++++++-- ...conformance_layer_filter__openvino.inl.hpp | 35 +++ modules/dnn/test/test_onnx_importer.cpp | 218 ++++++++++-------- 4 files changed, 260 insertions(+), 112 deletions(-) diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 59a709c1e4..75942b4f10 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -554,10 +554,20 @@ TEST_P(Test_Darknet_nets_async, Accuracy) l1 = 0.001; lInf = 0.005; } + if (INF_ENGINE_VER_MAJOR_EQ(2021040000) && targetId == DNN_TARGET_OPENCL_FP16 && prefix == "yolov4-tiny") // FIXIT: 4.x only, 3.4 branch works well + { + l1 = 0.001; + lInf = 0.005; + } + if (INF_ENGINE_VER_MAJOR_EQ(2022010000) && targetId == DNN_TARGET_OPENCL_FP16 && prefix == "yolov4-tiny") // FIXIT: 4.x only, 3.4 branch works well + { + l1 = 0.001; + lInf = 0.005; + } if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") { l1 = 0.005; - lInf = 1.5f; // |ref| = 0.95431125164031982 + lInf = 1.6f; // |ref| = 0.95431125164031982 } } #endif diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 4f5922182a..c5a0f2fa7f 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -110,6 +110,9 @@ public: model.setInputSize(size).setInputMean(mean).setInputScale(scale) .setInputSwapRB(swapRB).setInputCrop(crop); + model.setPreferableBackend(backend); + model.setPreferableTarget(target); + model.segment(frame, mask); normAssert(mask, exp, "", norm, norm); } @@ -287,20 +290,20 @@ TEST_P(Test_Model, DetectRegion) CV_TEST_TAG_MEMORY_2GB ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -347,20 +350,20 @@ TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) CV_TEST_TAG_MEMORY_2GB ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); #endif @@ -403,13 +406,28 @@ TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) TEST_P(Test_Model, DetectionOutput) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -613,13 +631,47 @@ TEST_P(Test_Model, Segmentation) CV_TEST_TAG_MEMORY_2GB ); + float norm = 0; + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + { + norm = 25.0f; // depends on OS/OpenCL version + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + { + norm = 2.0f; // l1 = 0.01 lInf = 2 + } + std::string inp = _tf("dog416.png"); std::string weights_file = _tf("fcn8s-heavy-pascal.prototxt"); std::string config_file = _tf("fcn8s-heavy-pascal.caffemodel", false); std::string exp = _tf("segmentation_exp.png"); Size size{128, 128}; - float norm = 0; double scale = 1.0; Scalar mean = Scalar(); bool swapRB = false; @@ -629,7 +681,16 @@ TEST_P(Test_Model, Segmentation) TEST_P(Test_Model, TextRecognition) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // FIXIT: dnn/src/ie_ngraph.cpp:494: error: (-215:Assertion failed) !inps.empty() in function 'createNet' + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Node Transpose_79 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, @@ -653,7 +714,13 @@ TEST_P(Test_Model, TextRecognition) TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Node Transpose_79 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index 284dfb75fa..cad914d05a 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -1394,6 +1394,13 @@ CASE(test_reduce_prod_keepdims_random) default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 } #endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_prod_negative_axes_keepdims_example) // no filter CASE(test_reduce_prod_negative_axes_keepdims_random) @@ -1404,6 +1411,13 @@ CASE(test_reduce_prod_negative_axes_keepdims_random) default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 } #endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_sum_default_axes_keepdims_example) // no filter CASE(test_reduce_sum_default_axes_keepdims_random) @@ -1441,6 +1455,13 @@ CASE(test_reduce_sum_square_do_not_keepdims_random) default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 } #endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00723048 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201416 vs 0.02 + } +#endif CASE(test_reduce_sum_square_keepdims_example) // no filter CASE(test_reduce_sum_square_keepdims_random) @@ -1451,6 +1472,13 @@ CASE(test_reduce_sum_square_keepdims_random) default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 } #endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reduce_sum_square_negative_axes_keepdims_example) // no filter CASE(test_reduce_sum_square_negative_axes_keepdims_random) @@ -1461,6 +1489,13 @@ CASE(test_reduce_sum_square_negative_axes_keepdims_random) default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 } #endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reflect_pad) // no filter CASE(test_relu) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 20d8abec4b..11bf91b868 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -439,17 +439,25 @@ TEST_P(Test_ONNX_layers, ArgLayer) TEST_P(Test_ONNX_layers, Scale) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (inf/nan) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: mkldnn_node.cpp:238 Ngraph operation Reshape with name ReduceMean_0 has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Ngraph operation Reshape with name ReduceMean_0 has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // accuracy - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif testONNXModels("scale"); } @@ -560,7 +568,19 @@ TEST_P(Test_ONNX_layers, Elementwise_Sqrt) TEST_P(Test_ONNX_layers, Elementwise_not) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) @@ -571,145 +591,151 @@ TEST_P(Test_ONNX_layers, Elementwise_not) TEST_P(Test_ONNX_layers, Compare_EQ) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("equal"); } TEST_P(Test_ONNX_layers, Compare_GT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("greater"); } TEST_P(Test_ONNX_layers, Compare_LT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("less"); } TEST_P(Test_ONNX_layers, CompareSameDims_EQ) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("equal_same_dims", npy, 0, 0, false, true, 2); } TEST_P(Test_ONNX_layers, CompareSameDims_GT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("greater_same_dims", npy, 0, 0, false, true, 2); } TEST_P(Test_ONNX_layers, CompareSameDims_LT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("less_same_dims", npy, 0, 0, false, true, 2); } @@ -1198,6 +1224,16 @@ TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) TEST_P(Test_ONNX_layers, GRU) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node GRU_22 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif testONNXModels("gru", npy, 0, 0, false, false); } From 271f7df3435c619ceba9261f88dcfbb0714b0b0d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 1 Apr 2022 18:02:14 +0000 Subject: [PATCH 68/84] videoio(ffmpeg): avoid memory leaks --- modules/videoio/src/cap_ffmpeg_impl.hpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 91a0f710aa..b7fa0b745c 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -2496,17 +2496,13 @@ double CvVideoWriter_FFMPEG::getProperty(int propId) const /// close video output stream and free associated memory void CvVideoWriter_FFMPEG::close() { - // nothing to do if already released - if ( !picture ) - return; - /* no more frame to compress. The codec has a latency of a few frames if using B frames, so we get the last frames by passing the same picture again */ // TODO -- do we need to account for latency here? /* write the trailer, if any */ - if(ok && oc) + if (picture && ok && oc) { #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0) if (!(oc->oformat->flags & AVFMT_RAWPICTURE)) @@ -2529,7 +2525,7 @@ void CvVideoWriter_FFMPEG::close() } // free pictures - if( context->pix_fmt != input_pix_fmt) + if (picture && context && context->pix_fmt != input_pix_fmt) { if(picture->data[0]) free(picture->data[0]); @@ -2540,8 +2536,14 @@ void CvVideoWriter_FFMPEG::close() if (input_picture) av_free(input_picture); +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#else /* close codec */ - avcodec_close(context); + if (context) // fixed after https://github.com/FFmpeg/FFmpeg/commit/3e1f507f3e8f16b716aa115552d243b48ae809bd + avcodec_close(context); + context = NULL; +#endif av_free(outbuf); @@ -2935,10 +2937,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, #endif #ifdef CV_FFMPEG_CODECPAR - if (context) - { - avcodec_free_context(&context); - } + avcodec_free_context(&context); #endif context = icv_configure_video_stream_FFMPEG(oc, video_st, codec, width, height, (int) (bitrate + 0.5), From d793ec2ffe4f8dbdd654a9b8dd8ba28d8d751974 Mon Sep 17 00:00:00 2001 From: eplankin Date: Tue, 5 Apr 2022 18:58:15 +0300 Subject: [PATCH 69/84] Merge pull request #21779 from eplankin:fix_11303 Fixed out-of-bounds read in parallel version of ippGaussianBlur() * Fixed out-of-memory read in parallel version of ippGaussianBlur() * Fixed check * Revert changes in CMakeLists.txt --- modules/imgproc/src/smooth.dispatch.cpp | 2 +- modules/imgproc/test/test_filter.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index 69d07580f2..b001a37f89 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -566,7 +566,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4)) return false; - if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1 && iwSrc.m_size.height/(threads * 4) >= ksize.height/2) { bool ok; ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index 3e28a2caad..6de6cf4f87 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -2355,5 +2355,16 @@ TEST(Imgproc, filter_empty_src_16857) EXPECT_TRUE(dst2.empty()); } +TEST(Imgproc_GaussianBlur, regression_11303) +{ + cv::Mat dst; + int width = 2115; + int height = 211; + double sigma = 8.64421; + cv::Mat src(cv::Size(width, height), CV_32F, 1); + cv::GaussianBlur(src, dst, cv::Size(), sigma, sigma); + EXPECT_LE(cv::norm(src, dst, NORM_L2), 1e-3); +} + }} // namespace From 7ed557497dfacaab494959ac686891ce9e463dfa Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 5 Apr 2022 21:00:32 +0300 Subject: [PATCH 70/84] Merge pull request #21504 from smirnov-alexey:as/oak_infer [GAPI] Support basic inference in OAK backend * Combined commit which enables basic inference and other extra capabilities of OAK backend * Remove unnecessary target options from the cmakelist --- modules/gapi/CMakeLists.txt | 12 +- .../gapi/include/opencv2/gapi/oak/infer.hpp | 66 ++ modules/gapi/include/opencv2/gapi/oak/oak.hpp | 29 +- modules/gapi/samples/oak_basic_infer.cpp | 122 ++++ modules/gapi/samples/oak_copy.cpp | 48 ++ .../gapi/samples/oak_rgb_camera_encoding.cpp | 10 - .../samples/oak_small_hetero_pipeline.cpp | 11 - modules/gapi/src/backends/oak/goak.cpp | 15 +- ...a_adapter.cpp => goak_memory_adapters.cpp} | 32 +- modules/gapi/src/backends/oak/goakbackend.cpp | 662 ++++++++++++++---- ...ia_adapter.hpp => oak_memory_adapters.hpp} | 22 + modules/gapi/src/compiler/gmodel.hpp | 2 +- 12 files changed, 871 insertions(+), 160 deletions(-) create mode 100644 modules/gapi/include/opencv2/gapi/oak/infer.hpp create mode 100644 modules/gapi/samples/oak_basic_infer.cpp create mode 100644 modules/gapi/samples/oak_copy.cpp rename modules/gapi/src/backends/oak/{goak_media_adapter.cpp => goak_memory_adapters.cpp} (54%) rename modules/gapi/src/backends/oak/{oak_media_adapter.hpp => oak_memory_adapters.hpp} (56%) diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 29036c4e26..579bcfc1be 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -138,7 +138,7 @@ set(gapi_srcs # OAK Backend (optional) src/backends/oak/goak.cpp src/backends/oak/goakbackend.cpp - src/backends/oak/goak_media_adapter.cpp + src/backends/oak/goak_memory_adapters.cpp # OCL Backend (currently built-in) src/backends/ocl/goclbackend.cpp @@ -375,13 +375,3 @@ if(HAVE_GAPI_ONEVPL) endif() endif() endif() - -if(HAVE_OAK) - # FIXME: consider better solution - if(TARGET example_gapi_oak_rgb_camera_encoding) - ocv_target_compile_definitions(example_gapi_oak_rgb_camera_encoding PRIVATE -DHAVE_OAK) - endif() - if(TARGET example_gapi_oak_small_hetero_pipeline) - ocv_target_compile_definitions(example_gapi_oak_small_hetero_pipeline PRIVATE -DHAVE_OAK) - endif() -endif() diff --git a/modules/gapi/include/opencv2/gapi/oak/infer.hpp b/modules/gapi/include/opencv2/gapi/oak/infer.hpp new file mode 100644 index 0000000000..4a1b9f6db6 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/oak/infer.hpp @@ -0,0 +1,66 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef OPENCV_GAPI_OAK_INFER_HPP +#define OPENCV_GAPI_OAK_INFER_HPP + +#include +#include +#include +#include + +#include +#include + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace oak { + +namespace detail { +/** +* @brief This structure contains description of inference parameters +* which is specific to OAK models. +*/ +struct ParamDesc { + std::string blob_file; +}; +} // namespace detail + +/** + * Contains description of inference parameters and kit of functions that + * fill this parameters. + */ +template class Params { +public: + /** @brief Class constructor. + + Constructs Params based on model information and sets default values for other + inference description parameters. + + @param model Path to model (.blob file) + */ + explicit Params(const std::string &model) { + desc.blob_file = model; + }; + + // BEGIN(G-API's network parametrization API) + GBackend backend() const { return cv::gapi::oak::backend(); } + std::string tag() const { return Net::tag(); } + cv::util::any params() const { return { desc }; } + // END(G-API's network parametrization API) + +protected: + detail::ParamDesc desc; +}; + +} // namespace oak +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_OAK_INFER_HPP diff --git a/modules/gapi/include/opencv2/gapi/oak/oak.hpp b/modules/gapi/include/opencv2/gapi/oak/oak.hpp index 05fb09946f..ba1ea46cc7 100644 --- a/modules/gapi/include/opencv2/gapi/oak/oak.hpp +++ b/modules/gapi/include/opencv2/gapi/oak/oak.hpp @@ -89,28 +89,55 @@ G_API_OP(GSobelXY, , "org.opencv } }; +G_API_OP(GCopy, , "org.opencv.oak.copy") { + static GFrameDesc outMeta(const GFrameDesc& in) { + return in; + } +}; + +// FIXME: add documentation on operations below + GAPI_EXPORTS GArray encode(const GFrame& in, const EncoderConfig&); GAPI_EXPORTS GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk); +GAPI_EXPORTS GFrame copy(const GFrame& in); + // OAK backend & kernels //////////////////////////////////////////////////////// GAPI_EXPORTS cv::gapi::GBackend backend(); GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); // Camera object /////////////////////////////////////////////////////////////// -struct GAPI_EXPORTS ColorCameraParams {}; +struct GAPI_EXPORTS ColorCameraParams { + /** + * Format of the frame one gets from the camera + */ + bool interleaved = false; + + // FIXME: extend + enum class BoardSocket: int { RGB, BGR }; + + BoardSocket board_socket = BoardSocket::RGB; + + // FIXME: extend + enum class Resolution: int { THE_1080_P }; + + Resolution resolution = Resolution::THE_1080_P; +}; class GAPI_EXPORTS ColorCamera: public cv::gapi::wip::IStreamSource { cv::MediaFrame m_dummy; + ColorCameraParams m_params; virtual bool pull(cv::gapi::wip::Data &data) override; virtual GMetaArg descr_of() const override; public: ColorCamera(); + explicit ColorCamera(const ColorCameraParams& params); }; } // namespace oak diff --git a/modules/gapi/samples/oak_basic_infer.cpp b/modules/gapi/samples/oak_basic_infer.cpp new file mode 100644 index 0000000000..ee234e3806 --- /dev/null +++ b/modules/gapi/samples/oak_basic_infer.cpp @@ -0,0 +1,122 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +const std::string keys = + "{ h help | | Print this help message }" + "{ detector | | Path to compiled .blob face detector model }" + "{ duration | 100 | Number of frames to pull from camera and run inference on }"; + +namespace custom { + +G_API_NET(FaceDetector, , "sample.custom.face-detector"); + +using GDetections = cv::GArray; +using GSize = cv::GOpaque; +using GPrims = cv::GArray; + +G_API_OP(BBoxes, , "sample.custom.b-boxes") { + static cv::GArrayDesc outMeta(const cv::GArrayDesc &) { + return cv::empty_array_desc(); + } +}; + +GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { + // This kernel converts the rectangles into G-API's + // rendering primitives + static void run(const std::vector &in_face_rcs, + std::vector &out_prims) { + out_prims.clear(); + const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) { + return cv::gapi::wip::draw::Rect(rc, clr, 2); + }; + for (auto &&rc : in_face_rcs) { + out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green + } + } +}; + +} // namespace custom + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const auto det_name = cmd.get("detector"); + const auto duration = cmd.get("duration"); + + if (det_name.empty()) { + std::cerr << "FATAL: path to detection model is not provided for the sample." + << "Please specify it with --detector options." + << std::endl; + return 1; + } + + // Prepare G-API kernels and networks packages: + auto detector = cv::gapi::oak::Params(det_name); + auto networks = cv::gapi::networks(detector); + + auto kernels = cv::gapi::combine( + cv::gapi::kernels(), + cv::gapi::oak::kernels()); + + auto args = cv::compile_args(kernels, networks); + + // Initialize graph structure + cv::GFrame in; + cv::GFrame copy = cv::gapi::oak::copy(in); // NV12 transfered to host + passthrough copy for infer + cv::GOpaque sz = cv::gapi::streaming::size(copy); + + // infer is not affected by the actual copy here + cv::GMat blob = cv::gapi::infer(copy); + // FIXME: OAK infer detects faces slightly out of frame bounds + cv::GArray rcs = cv::gapi::parseSSD(blob, sz, 0.5f, true, false); + auto rendered = cv::gapi::wip::draw::renderFrame(copy, custom::BBoxes::on(rcs)); + // on-the-fly conversion NV12->BGR + cv::GMat out = cv::gapi::streaming::BGR(rendered); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out, rcs)) + .compileStreaming(std::move(args)); + + // Graph execution + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + cv::Mat out_mat; + std::vector out_dets; + int frames = 0; + while (pipeline.pull(cv::gout(out_mat, out_dets))) { + std::string name = "oak_infer_frame_" + std::to_string(frames) + ".png"; + + cv::imwrite(name, out_mat); + + if (!out_dets.empty()) { + std::cout << "Got " << out_dets.size() << " detections on frame #" << frames << std::endl; + } + + ++frames; + if (frames == duration) { + pipeline.stop(); + break; + } + } + std::cout << "Pipeline finished. Processed " << frames << " frames" << std::endl; + return 0; +} diff --git a/modules/gapi/samples/oak_copy.cpp b/modules/gapi/samples/oak_copy.cpp new file mode 100644 index 0000000000..3c4d455f52 --- /dev/null +++ b/modules/gapi/samples/oak_copy.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include +#include // BGR accessor + +#include // CommandLineParser + +const std::string keys = + "{ h help | | Print this help message }" + "{ output | output.png | Path to the output file }"; + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const std::string output_name = cmd.get("output"); + + cv::GFrame in; + // Actually transfers data to host + cv::GFrame copy = cv::gapi::oak::copy(in); + // Default camera works only with nv12 format + cv::GMat out = cv::gapi::streaming::Y(copy); + + auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, + cv::gapi::oak::kernels()); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args)); + + // Graph execution ///////////////////////////////////////////////////////// + cv::Mat out_mat(1920, 1080, CV_8UC1); + + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + // pull 1 frame + pipeline.pull(cv::gout(out_mat)); + + cv::imwrite(output_name, out_mat); + + std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; +} diff --git a/modules/gapi/samples/oak_rgb_camera_encoding.cpp b/modules/gapi/samples/oak_rgb_camera_encoding.cpp index ac6b5cc5f0..ee07ef09e4 100644 --- a/modules/gapi/samples/oak_rgb_camera_encoding.cpp +++ b/modules/gapi/samples/oak_rgb_camera_encoding.cpp @@ -13,8 +13,6 @@ const std::string keys = "{ h help | | Print this help message }" "{ output | output.h265 | Path to the output .h265 video file }"; -#ifdef HAVE_OAK - int main(int argc, char *argv[]) { cv::CommandLineParser cmd(argc, argv, keys); if (cmd.has("help")) { @@ -60,11 +58,3 @@ int main(int argc, char *argv[]) { std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; } -#else // HAVE_OAK - -int main() { - GAPI_Assert(false && "Built without OAK support"); - return -1; -} - -#endif // HAVE_OAK diff --git a/modules/gapi/samples/oak_small_hetero_pipeline.cpp b/modules/gapi/samples/oak_small_hetero_pipeline.cpp index dadb9d0f3b..5df3ba5da1 100644 --- a/modules/gapi/samples/oak_small_hetero_pipeline.cpp +++ b/modules/gapi/samples/oak_small_hetero_pipeline.cpp @@ -13,8 +13,6 @@ const std::string keys = "{ h help | | Print this help message }" "{ output | output.png | Path to the output file }"; -#ifdef HAVE_OAK - int main(int argc, char *argv[]) { cv::CommandLineParser cmd(argc, argv, keys); if (cmd.has("help")) { @@ -58,12 +56,3 @@ int main(int argc, char *argv[]) { std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; } - -#else // HAVE_OAK - -int main() { - GAPI_Assert(false && "Built without OAK support"); - return -1; -} - -#endif // HAVE_OAK diff --git a/modules/gapi/src/backends/oak/goak.cpp b/modules/gapi/src/backends/oak/goak.cpp index 6d9044aefa..022f8f596f 100644 --- a/modules/gapi/src/backends/oak/goak.cpp +++ b/modules/gapi/src/backends/oak/goak.cpp @@ -7,7 +7,7 @@ #include #include -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" #include #include @@ -24,6 +24,10 @@ GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk) { return GSobelXY::on(in, hk, vk); } +GFrame copy(const GFrame& in) { + return GCopy::on(in); +} + // This is a dummy oak::ColorCamera class that just makes our pipelining // machinery work. The real data comes from the physical camera which // is handled by DepthAI library. @@ -31,6 +35,11 @@ ColorCamera::ColorCamera() : m_dummy(cv::MediaFrame::Create()) { } +ColorCamera::ColorCamera(const ColorCameraParams& params) + : m_dummy(cv::MediaFrame::Create()), + m_params(params) { +} + bool ColorCamera::pull(cv::gapi::wip::Data &data) { // FIXME: Avoid passing this formal frame to the pipeline std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -39,7 +48,9 @@ bool ColorCamera::pull(cv::gapi::wip::Data &data) { } cv::GMetaArg ColorCamera::descr_of() const { - return cv::GMetaArg{cv::descr_of(m_dummy)}; + // FIXME: support other resolutions + GAPI_Assert(m_params.resolution == ColorCameraParams::Resolution::THE_1080_P); + return cv::GMetaArg{cv::GFrameDesc{cv::MediaFormat::NV12, cv::Size{1920, 1080}}}; } } // namespace oak diff --git a/modules/gapi/src/backends/oak/goak_media_adapter.cpp b/modules/gapi/src/backends/oak/goak_memory_adapters.cpp similarity index 54% rename from modules/gapi/src/backends/oak/goak_media_adapter.cpp rename to modules/gapi/src/backends/oak/goak_memory_adapters.cpp index c8e6bbb59b..5805f7933f 100644 --- a/modules/gapi/src/backends/oak/goak_media_adapter.cpp +++ b/modules/gapi/src/backends/oak/goak_memory_adapters.cpp @@ -4,17 +4,15 @@ // // Copyright (C) 2021 Intel Corporation -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" namespace cv { namespace gapi { namespace oak { -OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer) { +OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer) +: m_sz(sz), m_fmt(fmt), m_buffer(buffer) { GAPI_Assert(fmt == cv::MediaFormat::NV12 && "OAKMediaAdapter only supports NV12 format for now"); - m_sz = sz; - m_fmt = fmt; - m_buffer = buffer; } MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) { @@ -27,6 +25,30 @@ MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) { cv::GFrameDesc OAKMediaAdapter::OAKMediaAdapter::meta() const { return {m_fmt, m_sz}; } +OAKRMatAdapter::OAKRMatAdapter(const cv::Size& size, + int precision, + std::vector&& buffer) + : m_size(size), m_precision(precision), m_buffer(buffer) { + GAPI_Assert(m_precision == CV_16F); + + std::vector wrapped_dims{1, 1, m_size.width, m_size.height}; + + // FIXME: check layout and add strides + m_desc = cv::GMatDesc(m_precision, wrapped_dims); + m_mat = cv::Mat(static_cast(wrapped_dims.size()), + wrapped_dims.data(), + CV_16FC1, // FIXME: cover other precisions + m_buffer.data()); +} + +cv::GMatDesc OAKRMatAdapter::desc() const { + return m_desc; +} + +cv::RMat::View OAKRMatAdapter::access(cv::RMat::Access) { + return cv::RMat::View{m_desc, m_mat.data}; +} + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/oak/goakbackend.cpp b/modules/gapi/src/backends/oak/goakbackend.cpp index dc0daaead3..83a2ca88d2 100644 --- a/modules/gapi/src/backends/oak/goakbackend.cpp +++ b/modules/gapi/src/backends/oak/goakbackend.cpp @@ -2,9 +2,10 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2021 Intel Corporation +// Copyright (C) 2021-2022 Intel Corporation #include // GKernelPackage +#include // kernels() #ifdef HAVE_OAK @@ -18,21 +19,24 @@ #include #include +#include // GInferBase #include // streaming::meta_tag #include "depthai/depthai.hpp" -#include -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" + +#include // infer params namespace cv { namespace gimpl { // Forward declaration class GOAKContext; -struct OAKNodeInfo; +class OAKKernelParams; class GOAKExecutable final: public GIslandExecutable { friend class GOAKContext; + friend class OAKKernelParams; virtual void run(std::vector&&, std::vector&&) override { GAPI_Assert(false && "Not implemented"); @@ -41,7 +45,8 @@ class GOAKExecutable final: public GIslandExecutable { virtual void run(GIslandExecutable::IInput &in, GIslandExecutable::IOutput &out) override; - void LinkToParents(ade::NodeHandle handle); + void linkToParent(ade::NodeHandle handle); + void linkCopy(ade::NodeHandle handle); class ExtractTypeHelper : protected dai::Node { public: @@ -61,6 +66,7 @@ class GOAKExecutable final: public GIslandExecutable { std::shared_ptr xlink_output; std::shared_ptr out_queue; std::string out_queue_name; + size_t gapi_out_data_index; }; cv::GArg packInArg(const GArg &arg, std::vector& oak_ins); @@ -79,11 +85,16 @@ class GOAKExecutable final: public GIslandExecutable { cv::Size m_camera_size; // Backend outputs - std::vector m_out_queues; + std::unordered_map> m_out_queues; // Backend inputs std::vector> m_in_queues; + std::unordered_set> m_passthrough_copy_nodes; + // Note: dai::Pipeline should be the only one for the whole pipeline, // so there is no way to insert any non-OAK node in graph between other OAK nodes. // The only heterogeneous case possible is if we insert other backends after or before @@ -91,6 +102,14 @@ class GOAKExecutable final: public GIslandExecutable { std::unique_ptr m_device; std::unique_ptr m_pipeline; + // Camera config + cv::gapi::oak::ColorCameraParams m_ccp; + + // Infer info + std::unordered_map> m_oak_infer_info; + public: GOAKExecutable(const ade::Graph& g, const cv::GCompileArgs& args, @@ -122,6 +141,12 @@ public: std::vector& args, std::vector& results); + GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + const cv::gapi::oak::detail::ParamDesc& infer_info, + std::vector& args, + std::vector& results); + // Generic accessor API template T& inArg(int input) { return m_args.at(input).get(); } @@ -130,12 +155,14 @@ public: InputPtr& in(int input); OutputPtr& out(int output); - const std::unique_ptr& pipeline(); + const std::unique_ptr& pipeline() const; const cv::Size& camera_size() const; + const cv::gapi::oak::detail::ParamDesc& ii() const; private: const std::unique_ptr& m_pipeline; - const cv::Size& m_camera_size; + const cv::Size m_camera_size; + const cv::gapi::oak::detail::ParamDesc m_infer_info; std::vector& m_args; std::vector& m_outputs; }; @@ -144,9 +171,18 @@ GOAKContext::GOAKContext(const std::unique_ptr& pipeline, const cv::Size& camera_size, std::vector& args, std::vector& results) - : m_pipeline(pipeline), m_camera_size(camera_size), m_args(args), m_outputs(results) {} + : m_pipeline(pipeline), m_camera_size(camera_size), + m_args(args), m_outputs(results) {} -const std::unique_ptr& GOAKContext::pipeline() { +GOAKContext::GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + const cv::gapi::oak::detail::ParamDesc& infer_info, + std::vector& args, + std::vector& results) + : m_pipeline(pipeline), m_camera_size(camera_size), + m_infer_info(infer_info), m_args(args), m_outputs(results) {} + +const std::unique_ptr& GOAKContext::pipeline() const { return m_pipeline; } @@ -154,6 +190,10 @@ const cv::Size& GOAKContext::camera_size() const { return m_camera_size; } +const cv::gapi::oak::detail::ParamDesc& GOAKContext::ii() const { + return m_infer_info; +} + GOAKContext::InputPtr& GOAKContext::in(int input) { return inArg>(input).get(); } @@ -162,6 +202,14 @@ GOAKContext::OutputPtr& GOAKContext::out(int output) { return m_outputs.at(output); } +class OAKKernelParams { +public: + const std::unique_ptr& pipeline; + const cv::Size& camera_size; + const cv::gapi::oak::detail::ParamDesc& infer_info; + std::vector>& in_queues; +}; + namespace detail { template struct get_in; template<> struct get_in { @@ -179,13 +227,10 @@ template<> struct get_out { template struct get_out> { static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } }; -// FIXME: add support of other types - -struct OAKKernelParams { - const std::unique_ptr& pipeline; - const cv::Size& camera_size; - std::vector>& m_in_queues; +template<> struct get_out { + static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } }; +// FIXME: add support of other types template struct OAKCallHelper; @@ -200,6 +245,7 @@ struct OAKCallHelper, std::tuple > { , cv::detail::Seq) { return Impl::put(OAKKernelParams{ctx.pipeline(), ctx.camera_size(), + ctx.ii(), in_queues_params}, get_in::get(ctx, IIs)..., get_out::get(ctx, OIs)...); @@ -229,23 +275,84 @@ struct OAKComponent static const char *name() { return "OAK Component"; } GOAKKernel k; }; - -}} // namespace gimpl // namespace cv +} // namespace gimpl +} // namespace cv using OAKGraph = ade::TypedGraph - < cv::gimpl::OAKComponent - // FIXME: extend + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + // OAK specific + , cv::gimpl::OAKComponent >; using ConstOAKGraph = ade::ConstTypedGraph - < cv::gimpl::OAKComponent - // FIXME: extend + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + // OAK specific + , cv::gimpl::OAKComponent >; -// This function links dai operation nodes - parent's output to child's input. +namespace +{ +std::pair +parseDaiInferMeta(const cv::gapi::oak::detail::ParamDesc& pd) { + dai::OpenVINO::Blob blob(pd.blob_file); + + GAPI_Assert(blob.networkInputs.size() == 1); + GAPI_Assert(blob.networkOutputs.size() == 1); + + return {blob.networkInputs.begin()->second, + blob.networkOutputs.begin()->second}; +} + +std::string +getDaiInferOutLayerName(const cv::gapi::oak::detail::ParamDesc& pd) { + dai::OpenVINO::Blob blob(pd.blob_file); + + GAPI_Assert(blob.networkInputs.size() == 1); + GAPI_Assert(blob.networkOutputs.size() == 1); + + return blob.networkOutputs.begin()->first; +} +} // anonymous namespace + +// Custom meta function for OAK backend for infer +static cv::GMetaArgs customOutMeta(const ade::Graph &gr, + const ade::NodeHandle &nh, + const cv::GMetaArgs &/*in_metas*/, + const cv::GArgs &/*in_args*/) { + cv::GMetaArgs result; + const auto &np = ConstOAKGraph(gr).metadata(nh).get(); + const auto &pd = cv::util::any_cast(np.opaque); + + // FIXME: Infer kernel and backend does rather the same + auto in_out_tensor_info = parseDaiInferMeta(pd); + + GAPI_Assert(in_out_tensor_info.second.dataType == + dai::TensorInfo::DataType::FP16); + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.second.order == + dai::TensorInfo::StorageOrder::NCHW); + + // FIXME: DAI returns vector, remove workaround + std::vector wrapped_dims; + for (const auto& d : in_out_tensor_info.second.dims) { + wrapped_dims.push_back(d); + } + result = {cv::GMetaArg{cv::GMatDesc(CV_16F, 1, cv::Size(wrapped_dims[1], wrapped_dims[0]), false)}}; + + return result; +} + +// This function links DAI operation nodes - parent's output to child's input. // It utilizes G-API graph to search for operation's node it's previous operation in graph -// when links them in dai graph. -void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) +// when links them in DAI graph. +void cv::gimpl::GOAKExecutable::linkToParent(ade::NodeHandle handle) { ade::NodeHandle parent; for (const auto& data_nh : handle.get()->inNodes()) { @@ -253,6 +360,13 @@ void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) GAPI_Assert(data_nh.get()->inNodes().size() == 1); parent = data_nh.get()->inNodes().front(); + // Don't link if parent is copy - the case is handled differently + // in linkCopy + const auto& op = m_gm.metadata(parent).get(); + if (op.k.name == "org.opencv.oak.copy") { + continue; + } + // Assuming that OAK nodes are aligned for linking. // FIXME: potential rework might be needed then // counterexample is found. @@ -269,6 +383,89 @@ void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) } } +// This function links DAI operations for Copy OP in G-API graph +void cv::gimpl::GOAKExecutable::linkCopy(ade::NodeHandle handle) { + // 1. Check that there are no back-to-back Copy OPs in graph + auto copy_out = handle.get()->outNodes(); + GAPI_Assert(copy_out.size() == 1); + for (const auto& copy_next_op : copy_out.front().get()->outNodes()) { + const auto& op = m_gm.metadata(copy_next_op).get(); + if (op.k.name == "org.opencv.oak.copy") { + GAPI_Assert(false && "Back-to-back Copy operations are not supported in graph"); + } + } + + // 2. Link passthrough case + if (m_passthrough_copy_nodes.find(handle) != m_passthrough_copy_nodes.end()) { + ExtractTypeHelper::OutputPtr parent; + bool parent_is_camera = false; + // Copy has only 1 input data + GAPI_Assert(handle.get()->inNodes().size() == 1); + auto in_ops = handle.get()->inNodes().front().get()->inNodes(); + if (in_ops.size() == 0) { + // No parent nodes - parent = camera + parent = &m_camera_input->video; + parent_is_camera = true; + } else { + // Data has only 1 input + GAPI_Assert(in_ops.size() == 1); + auto node = m_oak_nodes.at(in_ops.front()); + // Should only have 1 output + GAPI_Assert(node.outputs.size() == 1); + parent = node.outputs[0]; + } + + // Now link DAI parent output to Copy's child's inputs ignoring the Copy operation + // FIXME: simplify this loop + auto copy_out_data = handle.get()->outNodes(); + // Copy has only 1 output + GAPI_Assert(copy_out_data.size() == 1); + for (const auto& copy_next_op : copy_out_data.front().get()->outNodes()) { + if (m_oak_nodes.find(copy_next_op) != m_oak_nodes.end()) { + // FIXME: consider a better approach + if (parent_is_camera) { + if (m_oak_infer_info.find(copy_next_op) != m_oak_infer_info.end()) { + parent = &m_camera_input->preview; + } else { + parent = &m_camera_input->video; + } + } + // Found next Copy OP which needs to be linked to Copy's parent + GAPI_Assert(m_oak_nodes.at(copy_next_op).inputs.size() == 1 && + "Internal OAK nodes are not aligned for linking (Copy operation)"); + parent->link(*(m_oak_nodes.at(copy_next_op).inputs.front())); + } + } + } + + // 3. Link output Copy case + if (m_out_queues.find(handle) != m_out_queues.end()) { + // DAI XLinkOutput node + auto xout = m_out_queues[handle].xlink_output->input; + + // Find parent node + // FIXME: copypasted from case 2 above + ExtractTypeHelper::OutputPtr parent; + // Copy has only 1 input data + GAPI_Assert(handle.get()->inNodes().size() == 1); + auto in_ops = handle.get()->inNodes().front().get()->inNodes(); + if (in_ops.size() == 0) { + // No parent nodes - parent = camera + parent = &m_camera_input->video; + } else { + // Data has only 1 input + GAPI_Assert(in_ops.size() == 1); + auto node = m_oak_nodes.at(in_ops.front()); + // Should only have 1 output + GAPI_Assert(node.outputs.size() == 1); + parent = node.outputs[0]; + } + + // Link parent to xout + parent->link(xout); + } +} + cv::GArg cv::gimpl::GOAKExecutable::packInArg(const GArg &arg, std::vector& oak_ins) { @@ -298,9 +495,8 @@ void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc, std::vector& oak_outs) { switch (rc.shape) { case GShape::GFRAME: - oak_outs.push_back(nullptr); - break; case GShape::GARRAY: + case GShape::GMAT: oak_outs.push_back(nullptr); break; default: @@ -309,6 +505,33 @@ void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc, } } +namespace { +static dai::CameraBoardSocket extractCameraBoardSocket(cv::gapi::oak::ColorCameraParams ccp) { + switch (ccp.board_socket) { + case cv::gapi::oak::ColorCameraParams::BoardSocket::RGB: + return dai::CameraBoardSocket::RGB; + // FIXME: extend + default: + // basically unreachable + GAPI_Assert("Unsupported camera board socket"); + return {}; + } +} + +static dai::ColorCameraProperties::SensorResolution +extractCameraResolution(cv::gapi::oak::ColorCameraParams ccp) { + switch (ccp.resolution) { + case cv::gapi::oak::ColorCameraParams::Resolution::THE_1080_P: + return dai::ColorCameraProperties::SensorResolution::THE_1080_P; + // FIXME: extend + default: + // basically unreachable + GAPI_Assert("Unsupported camera board socket"); + return {}; + } +} +} // anonymous namespace + cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, const cv::GCompileArgs &args, const std::vector& nodes, @@ -344,16 +567,52 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, } } + m_ccp = cv::gimpl::getCompileArg(args) + .value_or(cv::gapi::oak::ColorCameraParams{}); + // FIXME: change the hard-coded behavior (XLinkIn path) auto camRgb = m_pipeline->create(); // FIXME: extract camera compile arguments here and properly convert them for dai - camRgb->setBoardSocket(dai::CameraBoardSocket::RGB); - camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + camRgb->setBoardSocket(extractCameraBoardSocket(m_ccp)); + camRgb->setResolution(extractCameraResolution(m_ccp)); + camRgb->setInterleaved(m_ccp.interleaved); + + // Extract infer params + for (const auto& nh : nodes) { + if (m_gm.metadata(nh).get().t == NodeType::OP) { + if (ConstOAKGraph(m_g).metadata(nh).contains()) { + const auto &np = ConstOAKGraph(m_g).metadata(nh).get(); + const auto &pp = cv::util::any_cast(np.opaque); + m_oak_infer_info[nh] = pp; + break; + } + } + } + + // FIXME: handle multiple infers + if (!m_oak_infer_info.empty()) { + GAPI_Assert(m_oak_infer_info.size() == 1); + // FIXME: move to infer node? + auto in_out_tensor_info = parseDaiInferMeta(m_oak_infer_info.begin()->second); + + if (in_out_tensor_info.first.dataType == + dai::TensorInfo::DataType::FP16 || + in_out_tensor_info.first.dataType == + dai::TensorInfo::DataType::FP32) { + camRgb->setFp16(true); + } else { + camRgb->setFp16(false); + } + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.first.order == + dai::TensorInfo::StorageOrder::NCHW); + camRgb->setPreviewSize(in_out_tensor_info.first.dims[0], in_out_tensor_info.first.dims[1]); + } - // Set camera output. Fixme: consider working with other camera outputs m_camera_input = camRgb; // FIXME: change when other camera censors are introduced - std::tuple video_size = camRgb->getVideoSize(); + std::tuple video_size = m_camera_input->getVideoSize(); m_camera_size = cv::Size{std::get<0>(video_size), std::get<1>(video_size)}; // Prepare XLinkOut nodes for each output object in graph @@ -361,7 +620,23 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, auto xout = m_pipeline->create(); std::string xout_name = "xout" + std::to_string(i); xout->setStreamName(xout_name); - m_out_queues.push_back({xout, nullptr, xout_name}); + + // Find parent OP's nh + ade::NodeHandle parent_op_nh; + for (const auto& nh : nodes) { + for (const auto& outdata : nh.get()->outNodes()) { + if (m_gm.metadata(outdata).get().t == NodeType::DATA) { + auto rc = m_gm.metadata(outdata).get().rc; + auto shape = m_gm.metadata(outdata).get().shape; + // Match outs_data with the actual operation + if (rc == outs_data[i].rc && shape == outs_data[i].shape) { + parent_op_nh = nh; + } + } + } + } + + m_out_queues[parent_op_nh] = {xout, nullptr, xout_name, i}; } // Create OAK node for each node in this backend @@ -375,33 +650,66 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, m_oak_nodes.at(nh).inputs.reserve(op.args.size()); m_oak_nodes.at(nh).outputs.reserve(op.outs.size()); + // Copy operation in graph can fall into 3 cases: + // 1) Copy is an output of the island - + // in that case we link it to XLinkOut node from m_out_queues + // 2) Copy is between other two operations in the same OAK island - + // in that case we link its parent operation (could be camera) to + // the child one (those copy operations are placed in m_passthrough_copy_nodes) + // 3) Copy can fall into cases 1) and 2) at the same time + + // Prepare passthrough Copy operations + if (op.k.name == "org.opencv.oak.copy") { + // Copy has only 1 output + auto copy_out = nh.get()->outNodes(); + GAPI_Assert(copy_out.size() == 1); + for (const auto& copy_next_op : copy_out.front().get()->outNodes()) { + // Check that copy is a passthrough OP + if (std::find(nodes.begin(), nodes.end(), copy_next_op) != nodes.end()) { + m_passthrough_copy_nodes.insert(nh); + break; + } + } + } + std::vector in_ctx_args; in_ctx_args.reserve(op.args.size()); for (auto &op_arg : op.args) in_ctx_args.push_back(packInArg(op_arg, - m_oak_nodes.at(nh).inputs)); + m_oak_nodes.at(nh).inputs)); for (auto &&op_out : op.outs) packOutArg(op_out, m_oak_nodes.at(nh).outputs); GAPI_Assert(!m_oak_nodes.at(nh).inputs.empty()); GAPI_Assert(!m_oak_nodes.at(nh).outputs.empty()); - GOAKContext ctx(m_pipeline, m_camera_size, in_ctx_args, m_oak_nodes.at(nh).outputs); - m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); - GAPI_Assert(m_oak_nodes.at(nh).node != nullptr); + if (ConstOAKGraph(m_g).metadata(nh).contains()) { + GOAKContext ctx(m_pipeline, m_camera_size, m_oak_infer_info[nh], + in_ctx_args, m_oak_nodes.at(nh).outputs); + m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); + } else { + GOAKContext ctx(m_pipeline, m_camera_size, + in_ctx_args, m_oak_nodes.at(nh).outputs); + m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); + } // Check that all inputs and outputs are properly filled after constructing kernels // to then link it together // FIXME: add more logging - const auto& node = m_oak_nodes.at(nh); - if (std::any_of(node.inputs.cbegin(), node.inputs.cend(), - [](ExtractTypeHelper::InputPtr ptr) { - return ptr == nullptr; - })) { - GAPI_Assert(false && "DAI input are not set"); - } - if (std::any_of(node.outputs.cbegin(), node.outputs.cend(), - [](ExtractTypeHelper::OutputPtr ptr) { - return ptr == nullptr; - })) { - GAPI_Assert(false && "DAI outputs are not set"); + const auto& node_info = m_oak_nodes.at(nh); + // Copy operations don't set their inputs/outputs properly + if (op.k.name != "org.opencv.oak.copy") { + GAPI_Assert(node_info.node != nullptr); + if (std::any_of(node_info.inputs.cbegin(), node_info.inputs.cend(), + [](ExtractTypeHelper::InputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI input are not set"); + } + + if (std::any_of(node_info.outputs.cbegin(), node_info.outputs.cend(), + [](ExtractTypeHelper::OutputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI outputs are not set"); + } } } } @@ -413,15 +721,26 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, ade::HandleHasher> out_nodes; std::unordered_set> inter_nodes; + std::unordered_set> copy_nodes; // TODO: optimize this loop for (const auto& node : m_oak_nodes) { auto nh = node.first; + // Check if it's a Copy OP - will be handled differently when linking + GAPI_Assert(m_gm.metadata(nh).get().t == NodeType::OP); + const auto& op = m_gm.metadata(nh).get(); + if (op.k.name == "org.opencv.oak.copy") { + copy_nodes.insert(nh); + continue; + } + // Fill input op nodes for (const auto& d : ins_data) { for (const auto& indata : nh.get()->inNodes()) { auto rc = m_gm.metadata(indata).get().rc; - if (rc == d.rc) { + auto shape = m_gm.metadata(indata).get().shape; + if (rc == d.rc && shape == d.shape) { in_nodes.insert(nh); } } @@ -430,7 +749,8 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, for (const auto& d : outs_data) { for (const auto& outdata : nh.get()->outNodes()) { auto rc = m_gm.metadata(outdata).get().rc; - if (rc == d.rc) { + auto shape = m_gm.metadata(outdata).get().shape; + if (rc == d.rc && shape == d.shape) { out_nodes.insert(nh); } } @@ -446,42 +766,47 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, // 1. Link input nodes to camera for (const auto& nh : in_nodes) { GAPI_Assert(m_oak_nodes.at(nh).inputs.size() == 1); - // FIXME: covert other camera outputs - m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); + // FIXME: cover other camera outputs + // Link preview to infer, video to all other nodes + if (m_oak_infer_info.find(nh) == m_oak_infer_info.end()) { + m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); + } else { + m_camera_input->preview.link(*(m_oak_nodes.at(nh).inputs[0])); + } } // 2. Link output nodes to XLinkOut nodes - size_t out_counter = 0; for (const auto& nh : out_nodes) { - GAPI_Assert(out_counter + m_oak_nodes.at(nh).outputs.size() <= m_out_queues.size()); for (const auto& out : m_oak_nodes.at(nh).outputs) { - out->link(m_out_queues[out_counter++].xlink_output->input); + out->link(m_out_queues[nh].xlink_output->input); } // Input nodes in OAK doesn't have parent operation - just camera (for now) if (in_nodes.find(nh) == in_nodes.end()) { - LinkToParents(nh); + linkToParent(nh); } } // 3. Link internal nodes to their parents for (const auto& nh : inter_nodes) { - // Input nodes in OAK doesn't have parent operation - just camera (for now) - if (in_nodes.find(nh) == in_nodes.end()) { - LinkToParents(nh); - } + linkToParent(nh); + } + + // 4. Link copy nodes + for (const auto& nh : copy_nodes) { + linkCopy(nh); } m_device = std::unique_ptr(new dai::Device(*m_pipeline)); // Prepare OAK output queues GAPI_Assert(m_out_queues.size() == outs_data.size()); - for (const auto out_it : ade::util::indexed(outs_data)) + for (const auto out_it : ade::util::indexed(m_out_queues)) { - auto& q = m_out_queues[ade::util::index(out_it)]; + auto& q = ade::util::value(out_it).second; GAPI_Assert(q.out_queue == nullptr); // shouldn't be not filled till this point // FIXME: add queue parameters - // Currently: 30 - max DAI queue capacity, true - blocking queue - q.out_queue = m_device->getOutputQueue(q.out_queue_name, 30, true); + // Currently: 4 - max DAI queue capacity, true - blocking queue + q.out_queue = m_device->getOutputQueue(q.out_queue_name, 4, true); } } @@ -507,17 +832,25 @@ void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput &in, q->send(in_q.second); } - for (size_t i = 0; i < m_out_queues.size(); ++i) { - auto q = m_out_queues[i].out_queue; - // TODO: support other DAI types if needed - // Note: we utilize getData() method that returns std::vector of data - // on which we gain ownership - auto oak_frame = q->get(); + for (size_t i = 0; i < m_in_queues.size(); ++i) { + auto q = m_device->getInputQueue(m_in_queues[i].first); + q->send(m_in_queues[i].second); + } - auto out_arg = out.get(i); + for (const auto el : m_out_queues) { + const auto out_q = el.second; + auto& q = out_q.out_queue; + + auto out_arg = out.get(out_q.gapi_out_data_index); + + // FIXME: misc info to be utilized in switch below + cv::GRunArg::Meta meta; + std::shared_ptr oak_frame; switch(out_arg.index()) { case cv::GRunArgP::index_of(): + { + oak_frame = q->get(); // FIXME: hard-coded NV12 *cv::util::get(out_arg) = cv::MediaFrame::Create( @@ -525,58 +858,63 @@ void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput &in, static_cast(oak_frame->getHeight())), cv::MediaFormat::NV12, std::move(oak_frame->getData())); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = oak_frame->getTimestamp(); + meta[seq_id] = oak_frame->getSequenceNum(); + break; + } case cv::GRunArgP::index_of(): + { + oak_frame = q->get(); cv::util::get(out_arg).wref() = std::move(oak_frame->getData()); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = oak_frame->getTimestamp(); + meta[seq_id] = oak_frame->getSequenceNum(); + break; + } + case cv::GRunArgP::index_of(): // only supported for infer + { + auto nn_data = q->get(); + + auto out_layer_name = getDaiInferOutLayerName(m_oak_infer_info.begin()->second); + auto in_out_tensor_info = parseDaiInferMeta(m_oak_infer_info.begin()->second); + + auto layer = std::move(nn_data->getLayerFp16(out_layer_name)); + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.second.order == + dai::TensorInfo::StorageOrder::NCHW); + // FIMXE: only 1-channel data is supported for now + GAPI_Assert(in_out_tensor_info.second.dims[2] == 1); + + *cv::util::get(out_arg) = + cv::make_rmat( + cv::Size(in_out_tensor_info.second.dims[1], + in_out_tensor_info.second.dims[0]), + CV_16F, // FIXME: cover other precisions + std::move(layer) + ); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = nn_data->getTimestamp(); + meta[seq_id] = nn_data->getSequenceNum(); + + break; + } // FIXME: Add support for remaining types default: GAPI_Assert(false && "Unsupported type in OAK backend"); } - using namespace cv::gapi::streaming::meta_tag; - cv::GRunArg::Meta meta; - meta[timestamp] = oak_frame->getTimestamp(); - meta[seq_id] = oak_frame->getSequenceNum(); - out.meta(out_arg, meta); out.post(std::move(out_arg)); } } -// Built-in kernels for OAK ///////////////////////////////////////////////////// - -class GOAKBackendImpl final : public cv::gapi::GBackend::Priv { - virtual void unpackKernel(ade::Graph &graph, - const ade::NodeHandle &op_node, - const cv::GKernelImpl &impl) override { - OAKGraph gm(graph); - - const auto &kimpl = cv::util::any_cast(impl.opaque); - gm.metadata(op_node).set(cv::gimpl::OAKComponent{kimpl}); - } - - virtual EPtr compile(const ade::Graph &graph, - const cv::GCompileArgs &args, - const std::vector &nodes, - const std::vector& ins_data, - const std::vector& outs_data) const override { - cv::gimpl::GModel::ConstGraph gm(graph); - // FIXME: pass streaming/non-streaming option to support non-camera case - // NB: how could we have non-OAK source in streaming mode, then OAK backend in - // streaming mode but without camera input? - if (!gm.metadata().contains()) { - GAPI_Assert(false && "OAK backend only supports Streaming mode for now"); - } - return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)}; - } -}; - -cv::gapi::GBackend cv::gapi::oak::backend() { - static cv::gapi::GBackend this_backend(std::make_shared()); - return this_backend; -} - namespace cv { namespace gimpl { namespace oak { @@ -604,10 +942,13 @@ static dai::VideoEncoderProperties::Profile convertEncProfile(cv::gapi::oak::Enc // Kernels /////////////////////////////////////////////////////////////// -template -class GOAKKernelImpl: public detail::OAKCallHelper +// FIXME: consider a better solution - hard-coded API +// Is there a way to extract API from somewhereelse/utilize structs +// like in streaming/infer backends (mainly infer and copy operations) +template +class GOAKKernelImpl: public detail::OAKCallHelper , public cv::detail::KernelTag { - using P = detail::OAKCallHelper; + using P = detail::OAKCallHelper; public: using API = K; static cv::gapi::GBackend backend() { return cv::gapi::oak::backend(); } @@ -617,17 +958,51 @@ public: #define GAPI_OAK_KERNEL(Name, API) \ struct Name: public cv::gimpl::oak::GOAKKernelImpl +#define GAPI_OAK_FIXED_API_KERNEL(Name, API, InArgs, OutArgs) \ + struct Name: public cv::gimpl::oak::GOAKKernelImpl + namespace { +GAPI_OAK_FIXED_API_KERNEL(GOAKInfer, cv::GInferBase, std::tuple, std::tuple) { + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, + GOAKContext::InputPtr& in, + GOAKContext::OutputPtr& out) { + auto nn = params.pipeline->create(); + + nn->input.setBlocking(true); + nn->input.setQueueSize(1); + + // FIXME: add G-API built-in preproc here (currently it's only setPreviewSize() on the camera node) + // Note: for some reason currently it leads to: + // "Fatal error. Please report to developers. Log: 'ImageManipHelper' '61'" + + nn->setBlobPath(params.infer_info.blob_file); + + in = &(nn->input); + out = &(nn->out); + + return nn; + } +}; + +GAPI_OAK_KERNEL(GOAKCopy, cv::gapi::oak::GCopy) { + static std::shared_ptr put(const cv::gimpl::OAKKernelParams&, + GOAKContext::InputPtr&, + GOAKContext::OutputPtr&) { + // Do nothing in Copy OP since it's either already represented + // by XLinkOut node (bonded to output queues) or it's a passthrough OP + return nullptr; + } +}; + GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) { - static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, GOAKContext::InputPtr& in, const cv::gapi::oak::EncoderConfig& cfg, GOAKContext::OutputPtr& out) { auto videoEnc = params.pipeline->create(); // FIXME: convert all the parameters to dai - videoEnc->setDefaultProfilePreset(cfg.width, cfg.height, - cfg.frameRate, + videoEnc->setDefaultProfilePreset(cfg.frameRate, convertEncProfile(cfg.profile)); in = &(videoEnc->input); @@ -638,7 +1013,7 @@ GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) { }; GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { - static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, GOAKContext::InputPtr& in, const cv::Mat& hk, const cv::Mat& vk, @@ -664,7 +1039,7 @@ GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { xinEdgeCfg->out.link(edgeDetector->inputConfig); - params.m_in_queues.push_back({"sobel_cfg", cfg}); + params.in_queues.push_back({"sobel_cfg", cfg}); in = &(edgeDetector->inputImage); out = &(edgeDetector->outputImage); @@ -672,11 +1047,55 @@ GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { return edgeDetector; } }; + } // anonymous namespace } // namespace oak } // namespace gimpl } // namespace cv +class GOAKBackendImpl final : public cv::gapi::GBackend::Priv { + virtual void unpackKernel(ade::Graph &graph, + const ade::NodeHandle &op_node, + const cv::GKernelImpl &impl) override { + using namespace cv::gimpl; + + OAKGraph gm(graph); + + const auto &kimpl = cv::util::any_cast(impl.opaque); + gm.metadata(op_node).set(OAKComponent{kimpl}); + + // Set custom meta for infer + if (gm.metadata(op_node).contains()) { + gm.metadata(op_node).set(CustomMetaFunction{customOutMeta}); + } + } + + virtual EPtr compile(const ade::Graph &graph, + const cv::GCompileArgs &args, + const std::vector &nodes, + const std::vector& ins_data, + const std::vector& outs_data) const override { + cv::gimpl::GModel::ConstGraph gm(graph); + // FIXME: pass streaming/non-streaming option to support non-camera case + // NB: how could we have non-OAK source in streaming mode, then OAK backend in + // streaming mode but without camera input? + if (!gm.metadata().contains()) { + GAPI_Assert(false && "OAK backend only supports Streaming mode for now"); + } + return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)}; + } + + virtual cv::GKernelPackage auxiliaryKernels() const override { + return cv::gapi::kernels< cv::gimpl::oak::GOAKInfer + >(); + } +}; + +cv::gapi::GBackend cv::gapi::oak::backend() { + static cv::gapi::GBackend this_backend(std::make_shared()); + return this_backend; +} + namespace cv { namespace gapi { namespace oak { @@ -684,6 +1103,7 @@ namespace oak { cv::gapi::GKernelPackage kernels() { return cv::gapi::kernels< cv::gimpl::oak::GOAKEncFrame , cv::gimpl::oak::GOAKSobelXY + , cv::gimpl::oak::GOAKCopy >(); } @@ -697,13 +1117,17 @@ namespace cv { namespace gapi { namespace oak { -cv::gapi::GKernelPackage kernels(); - cv::gapi::GKernelPackage kernels() { GAPI_Assert(false && "Built without OAK support"); return {}; } +cv::gapi::GBackend backend() { + GAPI_Assert(false && "Built without OAK support"); + static cv::gapi::GBackend this_backend(nullptr); + return this_backend; +} + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/oak/oak_media_adapter.hpp b/modules/gapi/src/backends/oak/oak_memory_adapters.hpp similarity index 56% rename from modules/gapi/src/backends/oak/oak_media_adapter.hpp rename to modules/gapi/src/backends/oak/oak_memory_adapters.hpp index 9c81f5a953..69677978d4 100644 --- a/modules/gapi/src/backends/oak/oak_media_adapter.hpp +++ b/modules/gapi/src/backends/oak/oak_memory_adapters.hpp @@ -10,11 +10,15 @@ #include #include +#include namespace cv { namespace gapi { namespace oak { +// Used for OAK backends outputs only. +// Filled from DepthAI's ImgFrame type and owns the memory. +// Used mainly for CV operations. class GAPI_EXPORTS OAKMediaAdapter final : public cv::MediaFrame::IAdapter { public: OAKMediaAdapter() = default; @@ -28,6 +32,24 @@ private: std::vector m_buffer; }; +// Used for OAK backends outputs only. +// Filled from DepthAI's NNData type and owns the memory. +// Used only for infer operations. +class GAPI_EXPORTS OAKRMatAdapter final : public cv::RMat::Adapter { +public: + OAKRMatAdapter() = default; + OAKRMatAdapter(const cv::Size& size, int precision, std::vector&& buffer); + cv::GMatDesc desc() const override; + cv::RMat::View access(cv::RMat::Access) override; + ~OAKRMatAdapter() = default; +private: + cv::Size m_size; + int m_precision; + std::vector m_buffer; + cv::GMatDesc m_desc; + cv::Mat m_mat; +}; + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/compiler/gmodel.hpp b/modules/gapi/src/compiler/gmodel.hpp index d016766fb5..592678dae7 100644 --- a/modules/gapi/src/compiler/gmodel.hpp +++ b/modules/gapi/src/compiler/gmodel.hpp @@ -70,7 +70,7 @@ struct Data // FIXME: This is a _pure_ duplication of RcDesc now! (except storage) GShape shape; // FIXME: Probably to be replaced by GMetaArg? - int rc; + int rc; // rc is unique but local to shape GMetaArg meta; HostCtor ctor; // T-specific helper to deal with unknown types in our code cv::detail::OpaqueKind kind; // FIXME: is needed to store GArray/GOpaque type From e36948cfbc7d2f7eb04f9e88dee2ba746c8bac45 Mon Sep 17 00:00:00 2001 From: zihaomu Date: Thu, 7 Apr 2022 15:32:12 +0800 Subject: [PATCH 71/84] add ONNX OP sign, shrink and reciprocal --- .../dnn/include/opencv2/dnn/all_layers.hpp | 20 +++ modules/dnn/src/cuda/activations.cu | 21 ++++ modules/dnn/src/cuda/functors.hpp | 44 +++++++ .../dnn/src/cuda4dnn/kernels/activations.hpp | 8 ++ .../src/cuda4dnn/primitives/activation.hpp | 46 +++++++ modules/dnn/src/init.cpp | 3 + modules/dnn/src/layers/elementwise_layers.cpp | 117 ++++++++++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 4 +- modules/dnn/src/opencl/activations.cl | 23 ++++ ..._conformance_layer_parser_denylist.inl.hpp | 5 - 10 files changed, 284 insertions(+), 7 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 2acb41076d..dae8701970 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -794,6 +794,26 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS SignLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ShrinkLayer : public ActivationLayer + { + public: + float bias; + float lambd; + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ReciprocalLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + /* Layers used in semantic segmentation */ class CV_EXPORTS CropLayer : public Layer diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index f5dafcea7f..ed34d57e0b 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -248,6 +248,21 @@ void selu(const Stream& stream, Span output, View input, T alpha, T gamma) generic_op>(stream, output, input, {alpha, gamma}); } +template +void sign(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void shrink(const Stream& stream, Span output, View input, T bias, T lambd) { + generic_op>(stream, output, input, {bias, lambd}); +} + +template +void reciprocal(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + template void thresholdedrelu(const Stream& stream, Span output, View input, T alpha) { generic_op>(stream, output, input, {alpha}); @@ -312,6 +327,9 @@ template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __ template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void sign<__half>(const Stream&, Span<__half>, View<__half>); +template void shrink<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void reciprocal<__half>(const Stream&, Span<__half>, View<__half>); #endif @@ -351,6 +369,9 @@ template void selu(const Stream&, Span, View, float, float) template void thresholdedrelu(const Stream&, Span, View, float); template void power(const Stream&, Span, View, float, float, float); template void exp(const Stream&, Span, View, float, float); +template void sign(const Stream&, Span, View); +template void shrink(const Stream&, Span, View, float, float); +template void reciprocal(const Stream&, Span, View); template static void launch_vectorized_axiswise_relu(const Stream& stream, Span output, View input, std::size_t inner_size, View slope) { diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 640c7c8ad6..378df82f26 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -726,6 +726,50 @@ struct DivFunctor { CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; } }; +template +struct SignFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() {} + }; + + CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { } + + CUDA4DNN_DEVICE T operator()(T value) { + return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0)); + } +}; + +template +struct ShrinkFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : bias(0), lambd(0.5) { } + CUDA4DNN_HOST_DEVICE Params(T bias_, T lambd_) : bias(bias_), lambd(lambd_) { } + T bias, lambd; + }; + + CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { } + CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { } + + CUDA4DNN_DEVICE T operator()(T value) { + return value > lambd ? value - bias : (value < -lambd ? value + bias : T(0)); + } + + T bias, lambd; +}; + +template +struct ReciprocalFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() {} + }; + + CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { } + + CUDA4DNN_DEVICE T operator()(T value) { + return T(1.0f)/value; + } +}; + }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index ef1f6da3e6..6958b93d5e 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -123,6 +123,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void exp(const csl::Stream& stream, csl::Span output, csl::View input, T normScale, T normShift); + template + void sign(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void shrink(const csl::Stream& stream, csl::Span output, csl::View input, T bias, T lambd); + + template + void reciprocal(const csl::Stream& stream, csl::Span output, csl::View input); }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index 39ebf513a7..564202e8c0 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -584,6 +584,52 @@ namespace cv { namespace dnn { namespace cuda4dnn { const T normScale, normShift; }; + template + class ShrinkOp final : public BaseOp { + public: + ShrinkOp(csl::Stream stream_, T bias_, T lambd_) + : stream(std::move(stream_)), bias{ bias_ }, lambd{ lambd_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::shrink(stream, output, input, bias, lambd); + } + + private: + csl::Stream stream; + const T bias, lambd; + }; + + template + class SignOp final : public BaseOp { + public: + SignOp(csl::Stream stream_) + : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::sign(stream, output, input); + } + + private: + csl::Stream stream; + }; + + template + class ReciprocalOp final : public BaseOp { + public: + ReciprocalOp(csl::Stream stream_) + : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::reciprocal(stream, output, input); + } + + private: + csl::Stream stream; + }; + }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */ diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 86ceba382e..6979d1864d 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -130,6 +130,8 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(HardSwish, HardSwishLayer); CV_DNN_REGISTER_LAYER_CLASS(Sin, SinLayer); CV_DNN_REGISTER_LAYER_CLASS(Sinh, SinhLayer); + CV_DNN_REGISTER_LAYER_CLASS(Sign, SignLayer); + CV_DNN_REGISTER_LAYER_CLASS(Shrink, ShrinkLayer); CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer); CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer); CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer); @@ -144,6 +146,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Silence, BlankLayer); CV_DNN_REGISTER_LAYER_CLASS(Const, ConstLayer); CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer); + CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer); CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer); CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 0accbe0fbb..eb2ecce3ce 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -2270,6 +2270,96 @@ struct ChannelsPReLUFunctor : public BaseFunctor int64 getFLOPSPerElement() const { return 1; } }; +struct SignFunctor : public BaseDefaultFunctor +{ + typedef SignLayer Layer; + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return x > 0 ? 1 : (x < 0 ? -1 : 0); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const SignFunctor::BaseDefaultFunctor::ocl_kernel_name = "SignForward"; + + +struct ShrinkFunctor : public BaseDefaultFunctor +{ + typedef ShrinkLayer Layer; + float bias; + float lambd; + + explicit ShrinkFunctor(float bias_ = 0.0f, float lambd_ = 0.5f) : bias(bias_), lambd(lambd_) {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return x > lambd ? x - bias : (x < -lambd ? x + bias : 0); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const ShrinkFunctor::BaseDefaultFunctor::ocl_kernel_name = "ShrinkForward"; + +struct ReciprocalFunctor : public BaseDefaultFunctor +{ + typedef ReciprocalLayer Layer; + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return 1.0/x; + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const ReciprocalFunctor::BaseDefaultFunctor::ocl_kernel_name = "ReciprocalForward"; + + #define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \ Ptr<_Layer> _Layer::create() { \ return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); } @@ -2611,5 +2701,32 @@ Ptr ChannelsPReLULayer::create(const LayerParams& params) return l; } +Ptr SignLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr ReciprocalLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr ShrinkLayer::create(const LayerParams& params) +{ + float bias = params.get("bias", 0.f); + float lambd = params.get("lambd", 0.5f); + Ptr l(new ElementWiseLayer(ShrinkFunctor(bias, lambd))); + l->setParamsFrom(params); + l->bias = bias; + l->lambd = lambd; + + return l; +} } } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 6cf0fe8f57..2a440a1284 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -3675,8 +3675,8 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", - "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax", - "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"}; + "Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax", + "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"}; for (const auto& name : simpleLayers) { dispatch[name] = &ONNXImporter::parseSimpleLayers; diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 3e99f18570..d105623403 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -306,3 +306,26 @@ __kernel void ThresholdedReluForward(const int n, __global T* in, __global T* ou if(index < n) out[index] = (in[index] > alpha ? in[index] : 0.f); } + +__kernel void ShrinkForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE bias, + const KERNEL_ARG_DTYPE lambd) +{ + int index = get_global_id(0); + if(index < n) + out[index] = in[index] < -lambd ? in[index] + bias : (in[index] > lambd ? in[index] - bias : 0.f); +} + +__kernel void SignForward(const int n, __global T* in, __global T* out) +{ + int index = get_global_id(0); + if(index < n) + out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f); +} + +__kernel void ReciprocalForward(const int n, __global T* in, __global T* out) +{ + int index = get_global_id(0); + if(index < n) + out[index] = 1.0f/in[index]; +} \ No newline at end of file diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index eef421491c..49db810c7f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -337,8 +337,6 @@ "test_range_float_type_positive_delta_expanded", "test_range_int32_type_negative_delta", "test_range_int32_type_negative_delta_expanded", -"test_reciprocal", -"test_reciprocal_example", "test_reduce_sum_default_axes_keepdims_example", "test_reduce_sum_default_axes_keepdims_random", "test_reduce_sum_do_not_keepdims_example", @@ -479,9 +477,6 @@ "test_shape_start_1_end_2", "test_shape_start_1_end_negative_1", "test_shape_start_negative_1", -"test_shrink_hard", -"test_shrink_soft", -"test_sign", "test_simple_rnn_batchwise", "test_simple_rnn_defaults", "test_simple_rnn_with_initial_bias", From be4a432bea6f714bf14c8d9a4883b56d0effb4a0 Mon Sep 17 00:00:00 2001 From: OpenCV Developers Date: Mon, 11 Apr 2022 19:14:33 +0000 Subject: [PATCH 72/84] dnn(test): update opencv_face_detector checks --- modules/dnn/test/test_caffe_importer.cpp | 10 ++++++++-- modules/dnn/test/test_int8_layers.cpp | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 47ebab90f3..b747b47660 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -633,6 +633,9 @@ TEST_P(opencv_face_detector, Accuracy) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("gpu/lbpcascade/er.png")); Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); @@ -660,6 +663,9 @@ TEST_P(opencv_face_detector, issue_15106) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("cv/shared/lena.png")); img = img.rowRange(img.rows / 4, 3 * img.rows / 4).colRange(img.cols / 4, 3 * img.cols / 4); @@ -673,13 +679,13 @@ TEST_P(opencv_face_detector, issue_15106) // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] Mat out = net.forward(); Mat ref = (Mat_(1, 7) << 0, 1, 0.9149431, 0.30424616, 0.26964942, 0.88733053, 0.99815309); - normAssertDetections(ref, out, "", 0.2, 6e-5, 1e-4); + normAssertDetections(ref, out, "", 0.89, 6e-5, 1e-4); } INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, Combine( Values("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector_fp16.caffemodel"), - Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL) + testing::ValuesIn(getAvailableTargets(DNN_BACKEND_OPENCV)) ) ); diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 1f9ae6c2a6..f2f00b08c2 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -906,7 +906,7 @@ TEST_P(Test_Int8_nets, opencv_face_detector) 0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494, 0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); - float confThreshold = 0.5, scoreDiff = 0.002, iouDiff = 0.21; + float confThreshold = 0.5, scoreDiff = 0.002, iouDiff = 0.4; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } From e3a55af33657eb73064f7b34b75123a3de0d397f Mon Sep 17 00:00:00 2001 From: OpenCV Developers Date: Mon, 11 Apr 2022 19:14:33 +0000 Subject: [PATCH 73/84] dnn(test): update opencv_face_detector checks original commit: be4a432bea6f714bf14c8d9a4883b56d0effb4a0 --- modules/dnn/test/test_caffe_importer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index d1ea09a3bf..f56512e932 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -606,6 +606,9 @@ TEST_P(opencv_face_detector, Accuracy) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("gpu/lbpcascade/er.png")); Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); @@ -633,6 +636,9 @@ TEST_P(opencv_face_detector, issue_15106) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("cv/shared/lena.png")); img = img.rowRange(img.rows / 4, 3 * img.rows / 4).colRange(img.cols / 4, 3 * img.cols / 4); @@ -646,13 +652,13 @@ TEST_P(opencv_face_detector, issue_15106) // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] Mat out = net.forward(); Mat ref = (Mat_(1, 7) << 0, 1, 0.9149431, 0.30424616, 0.26964942, 0.88733053, 0.99815309); - normAssertDetections(ref, out, "", 0.2, 6e-5, 1e-4); + normAssertDetections(ref, out, "", 0.89, 6e-5, 1e-4); } INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, Combine( Values("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector_fp16.caffemodel"), - Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL) + testing::ValuesIn(getAvailableTargets(DNN_BACKEND_OPENCV)) ) ); From 554d08c3a10551646e892d6a5a05f92bcdc050e2 Mon Sep 17 00:00:00 2001 From: luzpaz Date: Wed, 13 Apr 2022 13:06:37 -0400 Subject: [PATCH 74/84] Merge pull request #21775 from luzpaz:typos/gapi * GAPI: fix various gapi related typos Fixes source comments and documentation related to gapi code. * Fix source typos * Fixed typos requested * Follow-up typo fix --- modules/gapi/doc/slides/gapi_overview.org | 2 +- modules/gapi/include/opencv2/gapi/garray.hpp | 2 +- modules/gapi/include/opencv2/gapi/gframe.hpp | 2 +- modules/gapi/include/opencv2/gapi/gscalar.hpp | 2 +- modules/gapi/include/opencv2/gapi/gstreaming.hpp | 2 +- modules/gapi/include/opencv2/gapi/gtype_traits.hpp | 2 +- modules/gapi/include/opencv2/gapi/imgproc.hpp | 4 ++-- modules/gapi/include/opencv2/gapi/infer/onnx.hpp | 2 +- modules/gapi/include/opencv2/gapi/infer/parsers.hpp | 4 ++-- modules/gapi/include/opencv2/gapi/oak/oak.hpp | 2 +- modules/gapi/include/opencv2/gapi/s11n.hpp | 2 +- modules/gapi/include/opencv2/gapi/streaming/desync.hpp | 2 +- .../include/opencv2/gapi/streaming/onevpl/cfg_params.hpp | 4 ++-- .../gapi/streaming/onevpl/data_provider_interface.hpp | 2 +- modules/gapi/misc/python/package/gapi/__init__.py | 2 +- modules/gapi/misc/python/pyopencv_gapi.hpp | 4 ++-- .../gapi/misc/python/test/test_gapi_sample_pipelines.py | 6 +++--- modules/gapi/misc/python/test/test_gapi_streaming.py | 2 +- modules/gapi/samples/face_detection_mtcnn.cpp | 8 ++++---- modules/gapi/samples/infer_single_roi.cpp | 2 +- modules/gapi/samples/onevpl_infer_single_roi.cpp | 6 +++--- .../gapi/samples/pipeline_modeling_tool/dummy_source.hpp | 2 +- modules/gapi/src/api/kernels_streaming.cpp | 2 +- modules/gapi/src/backends/common/gbackend.hpp | 2 +- modules/gapi/src/backends/common/serialization.hpp | 2 +- modules/gapi/src/backends/ie/giebackend.cpp | 4 ++-- modules/gapi/src/backends/oak/goakbackend.cpp | 2 +- modules/gapi/src/backends/onnx/gonnxbackend.cpp | 2 +- modules/gapi/src/backends/render/ft_render.cpp | 2 +- modules/gapi/src/compiler/gislandmodel.cpp | 4 ++-- modules/gapi/src/compiler/passes/intrin.cpp | 2 +- modules/gapi/src/compiler/passes/kernels.cpp | 2 +- modules/gapi/src/executor/gexecutor.cpp | 2 +- modules/gapi/src/executor/gstreamingexecutor.cpp | 4 ++-- modules/gapi/src/executor/gtbbexecutor.cpp | 2 +- .../src/streaming/gstreamer/gstreamer_pipeline_facade.cpp | 4 ++-- .../onevpl/accelerators/accel_policy_interface.hpp | 2 +- .../src/streaming/onevpl/accelerators/surface/surface.hpp | 2 +- .../onevpl/accelerators/utils/elastic_barrier.hpp | 2 +- .../onevpl/engine/decode/decode_engine_legacy.cpp | 8 ++++---- .../streaming/onevpl/engine/preproc/preproc_engine.cpp | 4 ++-- .../onevpl/engine/transcode/transcode_engine_legacy.cpp | 4 ++-- modules/gapi/src/streaming/onevpl/source_priv.cpp | 8 ++++---- modules/gapi/src/streaming/onevpl/utils.cpp | 2 +- modules/gapi/src/utils/itt.hpp | 2 +- modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp | 2 +- modules/gapi/test/gapi_sample_pipelines.cpp | 2 +- .../test/streaming/gapi_streaming_vpp_preproc_test.cpp | 4 ++-- 48 files changed, 72 insertions(+), 72 deletions(-) diff --git a/modules/gapi/doc/slides/gapi_overview.org b/modules/gapi/doc/slides/gapi_overview.org index 676c914b66..7ed85baeca 100644 --- a/modules/gapi/doc/slides/gapi_overview.org +++ b/modules/gapi/doc/slides/gapi_overview.org @@ -592,7 +592,7 @@ subgraph cluster_3 {style=filled;color=azure2; C}; *** But how does it run? -- Since ~infer~ is an *Operation*, backends may provide *Kernels* implenting it; +- Since ~infer~ is an *Operation*, backends may provide *Kernels* implementing it; - The only publicly available inference backend now is *OpenVINOâ„¢*: - Brings its ~infer~ kernel atop of the Inference Engine; - NN model data is passed through G-API compile arguments (like kernels); diff --git a/modules/gapi/include/opencv2/gapi/garray.hpp b/modules/gapi/include/opencv2/gapi/garray.hpp index 17b03332e0..55f4d11b12 100644 --- a/modules/gapi/include/opencv2/gapi/garray.hpp +++ b/modules/gapi/include/opencv2/gapi/garray.hpp @@ -381,7 +381,7 @@ public: * * @note The value of `cv::GArray` may be overwritten by assigning some * other `cv::GArray` to the object using `operator=` -- on the - * assigment, the old association or value is discarded. + * assignment, the old association or value is discarded. * * @param v a std::vector to associate with this * `cv::GArray` object. Vector data is copied into the diff --git a/modules/gapi/include/opencv2/gapi/gframe.hpp b/modules/gapi/include/opencv2/gapi/gframe.hpp index af5fc6bee5..54fb30789e 100644 --- a/modules/gapi/include/opencv2/gapi/gframe.hpp +++ b/modules/gapi/include/opencv2/gapi/gframe.hpp @@ -48,7 +48,7 @@ struct GOrigin; * operations like BGR(), Y(), UV() -- these operations provide * access to frame's data in the familiar cv::GMat form, which can be * used with the majority of the existing G-API operations. These - * accessor functions may perform color space converion on the fly if + * accessor functions may perform color space conversion on the fly if * the image format of the GFrame they are applied to differs from the * operation's semantic (e.g. the BGR() accessor is called on an NV12 * image frame). diff --git a/modules/gapi/include/opencv2/gapi/gscalar.hpp b/modules/gapi/include/opencv2/gapi/gscalar.hpp index 7ebededcf0..d8a47c8ea8 100644 --- a/modules/gapi/include/opencv2/gapi/gscalar.hpp +++ b/modules/gapi/include/opencv2/gapi/gscalar.hpp @@ -63,7 +63,7 @@ public: * * @note The value of GScalar may be overwritten by assigning some * other GScalar to the object using `operator=` -- on the - * assigment, the old GScalar value is discarded. + * assignment, the old GScalar value is discarded. * * @param s a cv::Scalar value to associate with this GScalar object. */ diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp index 890eb584fb..5677768a96 100644 --- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp +++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp @@ -225,7 +225,7 @@ public: * setSource() to run the graph on a new video stream. * * @overload - * @param args arguments used to contruct and initialize a stream + * @param args arguments used to construct and initialize a stream * source. */ template diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp index 2b43421907..b56175788f 100644 --- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp +++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp @@ -204,7 +204,7 @@ namespace detail { static_assert(!(cv::detail::has_gshape>::value || cv::detail::contains::type, GAPI_OWN_TYPES_LIST>::value), - "gin/gout must not be used with G* classses or cv::gapi::own::*"); + "gin/gout must not be used with G* classes or cv::gapi::own::*"); return GTypeTraits::wrap_out(u); } }; diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp index de6f3bcdb9..44f0528153 100644 --- a/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -1241,7 +1241,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 2D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the @@ -1313,7 +1313,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 3D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp index bb5ef6c59e..16fc42eb63 100644 --- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp @@ -126,7 +126,7 @@ public: The function is used to associate data of graph outputs with output layers of network topology. If a network has only one output layer, there is no need to call it - as the layer is associated with ouput automatically but this doesn't prevent + as the layer is associated with output automatically but this doesn't prevent you from doing it yourself. Count of names has to match to number of network outputs or you can set your own output but for this case you have to additionally use @ref cfgPostProc function. diff --git a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp index c7308dd39f..e39d6fd4c6 100644 --- a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp @@ -95,7 +95,7 @@ GAPI_EXPORTS_W GArray parseSSD(const GMat& in, /** @brief Parses output of Yolo network. Extracts detection information (box, confidence, label) from Yolo output, -filters it by given confidence and performs non-maximum supression for overlapping boxes. +filters it by given confidence and performs non-maximum suppression for overlapping boxes. @note Function textual ID is "org.opencv.nn.parsers.parseYolo" @@ -105,7 +105,7 @@ where num_classes - a number of classes Yolo network was trained with. @param inSz Size to project detected boxes to (size of the input image). @param confidenceThreshold If confidence of the detection is smaller than confidence threshold, detection is rejected. -@param nmsThreshold Non-maximum supression threshold which controls minimum +@param nmsThreshold Non-maximum suppression threshold which controls minimum relative box intersection area required for rejecting the box with a smaller confidence. If 1.f, nms is not performed and no boxes are rejected. @param anchors Anchors Yolo network was trained with. diff --git a/modules/gapi/include/opencv2/gapi/oak/oak.hpp b/modules/gapi/include/opencv2/gapi/oak/oak.hpp index ba1ea46cc7..8b56b8a365 100644 --- a/modules/gapi/include/opencv2/gapi/oak/oak.hpp +++ b/modules/gapi/include/opencv2/gapi/oak/oak.hpp @@ -27,7 +27,7 @@ struct EncoderConfig { */ enum class Profile: int { H264_BASELINE, H264_HIGH, H264_MAIN, H265_MAIN, MJPEG }; /** - * Specifies prefered bitrate (kb) of compressed output bitstream + * Specifies preferred bitrate (kb) of compressed output bitstream */ std::int32_t bitrate = 8000; /** diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp index 6863a5ecab..adbcfdbdeb 100644 --- a/modules/gapi/include/opencv2/gapi/s11n.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n.hpp @@ -241,7 +241,7 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::Scalar &s); GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Mat &m); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::Mat &m); -// FIXME: for GRunArgs serailization +// FIXME: for GRunArgs serialization #if !defined(GAPI_STANDALONE) GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat & um); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::UMat & um); diff --git a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp index 1ed6e24b49..9e927872a3 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp @@ -67,7 +67,7 @@ G desync(const G &g) { * always produce their full output vectors. * * This operation only makes sense when a GComputation is compiled in - * straming mode with cv::GComputation::compileStreaming(). If this + * streaming mode with cv::GComputation::compileStreaming(). If this * operation is used and there are desynchronized outputs, the user * should use a special version of cv::GStreamingCompiled::pull() * which produces an array of cv::util::optional<> objects. diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp index d93b4c561d..25573afd4c 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp @@ -22,14 +22,14 @@ namespace onevpl { /** * @brief Public class is using for creation of onevpl::GSource instances. * - * Class members availaible through methods @ref CfgParam::get_name() and @ref CfgParam::get_value() are used by + * Class members available through methods @ref CfgParam::get_name() and @ref CfgParam::get_value() are used by * onevpl::GSource inner logic to create or find oneVPL particular implementation * (software/hardware, specific API version and etc.). * * @note Because oneVPL may provide several implementations which are satisfying with multiple (or single one) @ref CfgParam * criteria therefore it is possible to configure `preferred` parameters. This kind of CfgParams are created * using `is_major = false` argument in @ref CfgParam::create method and are not used by creating oneVPL particular implementations. - * Instead they fill out a "score table" to select preferrable implementation from available list. Implementation are satisfying + * Instead they fill out a "score table" to select preferable implementation from available list. Implementation are satisfying * with most of these optional params would be chosen. * If no one optional CfgParam params were present then first of available oneVPL implementation would be applied. * Please get on https://spec.oneapi.io/versions/latest/elements/oneVPL/source/API_ref/VPL_disp_api_func.html?highlight=mfxcreateconfig#mfxsetconfigfilterproperty diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp index c70e3db0ac..ec683a7527 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp @@ -27,7 +27,7 @@ private: }; struct GAPI_EXPORTS DataProviderSystemErrorException final : public DataProviderException { - DataProviderSystemErrorException(int error_code, const std::string& desription = std::string()); + DataProviderSystemErrorException(int error_code, const std::string& description = std::string()); ~DataProviderSystemErrorException() = default; }; diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index a2983e30ff..87ad9e2086 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -218,7 +218,7 @@ def op(op_id, in_types, out_types): for i, t in enumerate(out_types): if t not in [cv.GMat, cv.GScalar, *garray_types, *gopaque_types]: - raise Exception('{} unsupported output type: {} in possition: {}' + raise Exception('{} unsupported output type: {} in position: {}' .format(cls.__name__, t.__name__, i)) def on(*args): diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index b4be0048d0..7b760920e7 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -701,7 +701,7 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, PyErr_Clear(); throw std::logic_error("Python kernel failed with error!"); } - // NB: In fact it's impossible situation, becase errors were handled above. + // NB: In fact it's impossible situation, because errors were handled above. GAPI_Assert(result.get() && "Python kernel returned NULL!"); if (out_info.size() == 1) @@ -811,7 +811,7 @@ static GMetaArgs run_py_meta(cv::detail::PyObjectHolder out_meta, PyErr_Clear(); throw std::logic_error("Python outMeta failed with error!"); } - // NB: In fact it's impossible situation, becase errors were handled above. + // NB: In fact it's impossible situation, because errors were handled above. GAPI_Assert(result.get() && "Python outMeta returned NULL!"); out_metas = PyTuple_Check(result.get()) ? get_meta_args(result.get()) diff --git a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py index 34a07b535b..7763579ebf 100644 --- a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py +++ b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py @@ -187,7 +187,7 @@ try: blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) # NB: The operation output is cv::GArray, so it should be mapped - # to python paramaters like this: [(1.2, 3.4), (5.2, 3.2)], because the cv::Point2f + # to python parameters like this: [(1.2, 3.4), (5.2, 3.2)], because the cv::Point2f # according to opencv rules mapped to the tuple and cv::GArray<> mapped to the list. # OpenCV returns np.array with shape (n_features, 1, 2), so let's to convert it to list # tuples with size == n_features. @@ -203,7 +203,7 @@ try: @staticmethod def outMeta(desc): - raise NotImplementedError("outMeta isn't imlemented") + raise NotImplementedError("outMeta isn't implemented") return Op @@ -605,7 +605,7 @@ try: img1 = np.array([1, 2, 3]) # FIXME: Cause Bad variant access. - # Need to provide more descriptive error messsage. + # Need to provide more descriptive error message. with self.assertRaises(Exception): comp.apply(cv.gin(img0, img1), args=cv.gapi.compile_args( cv.gapi.kernels(GAddImpl))) diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index d06447d791..e4c5926204 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -454,7 +454,7 @@ try: else: raise unittest.SkipTest(str(e)) except SystemError as e: - raise unittest.SkipTest(str(e) + ", casued by " + str(e.__cause__)) + raise unittest.SkipTest(str(e) + ", caused by " + str(e.__cause__)) def test_gst_multiple_sources(self): diff --git a/modules/gapi/samples/face_detection_mtcnn.cpp b/modules/gapi/samples/face_detection_mtcnn.cpp index 50cb666a90..583219b51d 100644 --- a/modules/gapi/samples/face_detection_mtcnn.cpp +++ b/modules/gapi/samples/face_detection_mtcnn.cpp @@ -33,7 +33,7 @@ const std::string keys = "{ thrr | 0.7 | MTCNN R confidence threshold}" "{ thro | 0.7 | MTCNN O confidence threshold}" "{ half_scale | false | MTCNN P use half scale pyramid}" -"{ queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0}" +"{ queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0}" ; namespace { @@ -488,8 +488,8 @@ static inline std::string get_pnet_level_name(const cv::Size &in_size) { } int calculate_scales(const cv::Size &input_size, std::vector &out_scales, std::vector &out_sizes ) { - //calculate multi - scale and limit the maxinum side to 1000 - //pr_scale: limit the maxinum side to 1000, < 1.0 + //calculate multi - scale and limit the maximum side to 1000 + //pr_scale: limit the maximum side to 1000, < 1.0 double pr_scale = 1.0; double h = static_cast(input_size.height); double w = static_cast(input_size.width); @@ -602,7 +602,7 @@ int main(int argc, char* argv[]) { cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); total_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false); - //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] + //The rest PNet pyramid layers to accumulate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) { std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[i])); diff --git a/modules/gapi/samples/infer_single_roi.cpp b/modules/gapi/samples/infer_single_roi.cpp index e9c26a9b63..15af442a9d 100644 --- a/modules/gapi/samples/infer_single_roi.cpp +++ b/modules/gapi/samples/infer_single_roi.cpp @@ -150,7 +150,7 @@ int main(int argc, char *argv[]) auto networks = cv::gapi::networks(face_net); // Now build the graph. The graph structure may vary - // pased on the input parameters + // passed on the input parameters cv::GStreamingCompiled pipeline; auto inputs = cv::gin(cv::gapi::wip::make_src(input)); diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index 7f0da6070c..a89cd5071b 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -43,7 +43,7 @@ const std::string keys = "{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }" "{ faced | AUTO | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" - "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0 }" + "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }"; @@ -281,7 +281,7 @@ int main(int argc, char *argv[]) { const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); const auto device_id = cmd.get("faced"); - // check ouput file extension + // check output file extension if (!output.empty()) { auto ext = output.find_last_of("."); if (ext == std::string::npos || (output.substr(ext + 1) != "avi")) { @@ -421,7 +421,7 @@ int main(int argc, char *argv[]) { } else { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs); } - std::cout << "oneVPL source desription: " << cap->descr_of() << std::endl; + std::cout << "oneVPL source description: " << cap->descr_of() << std::endl; } catch (const std::exception& ex) { std::cerr << "Cannot create source: " << ex.what() << std::endl; return -1; diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp index 3079b99204..d77e120081 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp @@ -70,7 +70,7 @@ bool DummySource::pull(cv::gapi::wip::Data& data) { * update current seq_id correspondingly. * * if drop_frames is enabled, wait for the next tick, otherwise - * return last writen frame (+2 at the picture above) immediately. + * return last written frame (+2 at the picture above) immediately. */ int64_t num_frames = static_cast((curr_ts - m_next_tick_ts) / m_latency); diff --git a/modules/gapi/src/api/kernels_streaming.cpp b/modules/gapi/src/api/kernels_streaming.cpp index 2c50551f4e..55834ffb9a 100644 --- a/modules/gapi/src/api/kernels_streaming.cpp +++ b/modules/gapi/src/api/kernels_streaming.cpp @@ -39,7 +39,7 @@ cv::GMat cv::gapi::streaming::desync(const cv::GMat &g) { // // At the same time, generally, every island in the streaming // graph gets its individual input as a queue (so normally, a - // writer pushes the same output MULTIPLE TIMES if it has mutliple + // writer pushes the same output MULTIPLE TIMES if it has multiple // readers): // // LWV diff --git a/modules/gapi/src/backends/common/gbackend.hpp b/modules/gapi/src/backends/common/gbackend.hpp index 99b8f5dd37..b05d8e2c36 100644 --- a/modules/gapi/src/backends/common/gbackend.hpp +++ b/modules/gapi/src/backends/common/gbackend.hpp @@ -173,7 +173,7 @@ namespace magazine // without utilizing magazine at all void GAPI_EXPORTS bindInArg (Mag& mag, const RcDesc &rc, const GRunArg &arg, HandleRMat handleRMat = HandleRMat::BIND); - // Extracts a memory object reference fro GRunArgP, stores it in appropriate slot in a magazine + // Extracts a memory object reference from GRunArgP, stores it in appropriate slot in a magazine // Note on RMat handling from bindInArg above is also applied here void GAPI_EXPORTS bindOutArg(Mag& mag, const RcDesc &rc, const GRunArgP &arg, HandleRMat handleRMat = HandleRMat::BIND); diff --git a/modules/gapi/src/backends/common/serialization.hpp b/modules/gapi/src/backends/common/serialization.hpp index 529fdc635d..3ba2e83581 100644 --- a/modules/gapi/src/backends/common/serialization.hpp +++ b/modules/gapi/src/backends/common/serialization.hpp @@ -164,7 +164,7 @@ GAPI_EXPORTS void serialize( IOStream& os GAPI_EXPORTS GSerialized deserialize(IIStream& is); GAPI_EXPORTS void reconstruct(const GSerialized &s, ade::Graph &g); -// FIXME: Basic Stream implementaions ////////////////////////////////////////// +// FIXME: Basic Stream implementations ///////////////////////////////////////// // Basic in-memory stream implementations. class GAPI_EXPORTS ByteMemoryOutStream final: public IOStream { diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 6d851d3547..6647e484b1 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -460,7 +460,7 @@ public: const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; - // NB: Need to gurantee that MediaFrame::View doesn't die until request is over. + // NB: Need to guarantee that MediaFrame::View doesn't die until request is over. using Views = std::vector>; Views views; @@ -963,7 +963,7 @@ cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g, void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in, cv::gimpl::GIslandExecutable::IOutput &out) { - // General alghoritm: + // General algorithm: // 1. Collect island inputs/outputs. // 2. Create kernel context. (Every kernel has his own context). // 3. If the EndOfStream message is recieved, wait until all passed task are done. diff --git a/modules/gapi/src/backends/oak/goakbackend.cpp b/modules/gapi/src/backends/oak/goakbackend.cpp index 83a2ca88d2..e159160ba9 100644 --- a/modules/gapi/src/backends/oak/goakbackend.cpp +++ b/modules/gapi/src/backends/oak/goakbackend.cpp @@ -766,7 +766,7 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, // 1. Link input nodes to camera for (const auto& nh : in_nodes) { GAPI_Assert(m_oak_nodes.at(nh).inputs.size() == 1); - // FIXME: cover other camera outputs + // FIXME: convert other camera outputs // Link preview to infer, video to all other nodes if (m_oak_infer_info.find(nh) == m_oak_infer_info.end()) { m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp index 2ab6610378..af1f7f8948 100644 --- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp +++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp @@ -1102,7 +1102,7 @@ struct InferList2: public cv::detail::KernelTag { } else { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - // }}} (Preapre input) + // }}} (Prepare input) } // }}} (For every input of the net) std::vector out_mats(uu.oc->numOutputs()); diff --git a/modules/gapi/src/backends/render/ft_render.cpp b/modules/gapi/src/backends/render/ft_render.cpp index fcf84713ff..3c4ae2077f 100644 --- a/modules/gapi/src/backends/render/ft_render.cpp +++ b/modules/gapi/src/backends/render/ft_render.cpp @@ -80,7 +80,7 @@ cv::Size cv::gapi::wip::draw::FTTextRender::Priv::getTextSize(const std::wstring // See (1) on picture. // // 4) As we can see the last pen position is isn't horizontal size yet. - // We need to check if the glyph goes beyound the last position of the pen + // We need to check if the glyph goes beyond the last position of the pen // To do this we can: // a) Return to the previous position -advance // b) Shift on left value +left diff --git a/modules/gapi/src/compiler/gislandmodel.cpp b/modules/gapi/src/compiler/gislandmodel.cpp index 920fd700fc..0567a90e3a 100644 --- a/modules/gapi/src/compiler/gislandmodel.cpp +++ b/modules/gapi/src/compiler/gislandmodel.cpp @@ -346,9 +346,9 @@ std::string GIslandModel::traceIslandName(const ade::NodeHandle& island_nh, cons auto& backend_impl = island_ptr->backend().priv(); std::string backend_impl_type_name = typeid(backend_impl).name(); - // NOTE: Major part of already existing backends implementaion classes are called using + // NOTE: Major part of already existing backends implementation classes are called using // "*G[Name]BackendImpl*" pattern. - // We are trying to match against this pattern and retrive just [Name] part. + // We are trying to match against this pattern and retrieve just [Name] part. // If matching isn't successful, full mangled class name will be used. // // To match we use following algorithm: diff --git a/modules/gapi/src/compiler/passes/intrin.cpp b/modules/gapi/src/compiler/passes/intrin.cpp index 56f2db69e0..8920be6d4e 100644 --- a/modules/gapi/src/compiler/passes/intrin.cpp +++ b/modules/gapi/src/compiler/passes/intrin.cpp @@ -254,7 +254,7 @@ void apply(cv::gimpl::GModel::Graph &g) { } // Probably the simplest case: desync makes no sense in the regular -// compilation process, so just drop all its occurences in the graph, +// compilation process, so just drop all its occurrences in the graph, // reconnecting nodes properly. void drop(cv::gimpl::GModel::Graph &g) { // FIXME: LOG here that we're dropping the desync operations as diff --git a/modules/gapi/src/compiler/passes/kernels.cpp b/modules/gapi/src/compiler/passes/kernels.cpp index 2776c60359..22b791650f 100644 --- a/modules/gapi/src/compiler/passes/kernels.cpp +++ b/modules/gapi/src/compiler/passes/kernels.cpp @@ -212,7 +212,7 @@ void cv::gimpl::passes::resolveKernels(ade::passes::PassContext &ctx, GAPI_Assert(op.k.outMeta == nullptr); const_cast(op.k.outMeta) = selected_impl.outMeta; } else { - // Sanity check: the metadata funciton must be present + // Sanity check: the metadata function must be present GAPI_Assert(op.k.outMeta != nullptr); } } diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp index a8abde27b1..472abaaa14 100644 --- a/modules/gapi/src/executor/gexecutor.cpp +++ b/modules/gapi/src/executor/gexecutor.cpp @@ -153,7 +153,7 @@ void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) { case GRunArgP::index_of() : { // If there is a copy intrinsic at the end of the graph - // we need to actualy copy the data to the user buffer + // we need to actually copy the data to the user buffer // since output runarg was optimized to simply point // to the input of the copy kernel // FIXME: diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 6c8c56852c..557e5ceee4 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -1520,7 +1520,7 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr && cv::gimpl::GStreamingExecutor::~GStreamingExecutor() { - // FIXME: this is a temporary try-catch exception hadling. + // FIXME: this is a temporary try-catch exception handling. // Need to eliminate throwings from stop() try { if (state == State::READY || state == State::RUNNING) @@ -1619,7 +1619,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) case T::index_of(): #if !defined(GAPI_STANDALONE) emitter.reset(new VideoEmitter{emit_arg}); - // Currently all video inputs are syncronized if sync policy is to drop, + // Currently all video inputs are synchronized if sync policy is to drop, // there is no different fps branches etc, so all video emitters are registered video_emitters.emplace_back(emit_nh); #else diff --git a/modules/gapi/src/executor/gtbbexecutor.cpp b/modules/gapi/src/executor/gtbbexecutor.cpp index cc6ccf9ef4..64cd9b5313 100644 --- a/modules/gapi/src/executor/gtbbexecutor.cpp +++ b/modules/gapi/src/executor/gtbbexecutor.cpp @@ -353,7 +353,7 @@ namespace graph { } ctx.executed++; - // reset dependecy_count to initial state to simplify re-execution of the same graph + // reset dependency_count to initial state to simplify re-execution of the same graph node->dependency_count = node->dependencies; return result; diff --git a/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp b/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp index cd782537ca..d0f5bd5db2 100644 --- a/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp +++ b/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp @@ -111,11 +111,11 @@ GstElement* GStreamerPipelineFacade::getElementByName(const std::string& element void GStreamerPipelineFacade::completePreroll() { // FIXME: If there are multiple sources in pipeline and one of them is live, then pipeline // will return GST_STATE_CHANGE_NO_PREROLL while pipeline pausing. - // But appsink may not be connected to this live source and only to anothers, + // But appsink may not be connected to this live source and only to another, // not-live ones. So, it is not required to start the playback for appsink to complete // the preroll. // Starting of playback for the not-live sources before the first frame pull will lead - // to loosing of some amount of frames and pulling of the first frame can return frame + // to losing of some amount of frames and pulling of the first frame can return frame // which is far from the first. // // Need to handle this case or forbid to mix multiples sources of different diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp index a2a4845db2..c955b7529a 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp @@ -54,7 +54,7 @@ struct VPLAccelerationPolicy virtual void init(session_t session) = 0; virtual void deinit(session_t session) = 0; - // Limitation: cannot give guarantee in succesful memory realloccation + // Limitation: cannot give guarantee in successful memory realloccation // for existing workspace in existing pool (see realloc) // thus it is not implemented, // PLEASE provide initial memory area large enough diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp index 4f93312e24..68d7a697e7 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp @@ -59,7 +59,7 @@ public: /** * Extract value thread-safe lock counter (see @ref Surface description). * It's usual situation that counter may be instantly decreased in other thread after this method called. - * We need instantaneous value. This method syncronized in inter-threading way with @ref Surface::release_lock() + * We need instantaneous value. This method synchronized in inter-threading way with @ref Surface::release_lock() * * @return fetched locks count. */ diff --git a/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp index 827392f8be..b91554f435 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp @@ -163,7 +163,7 @@ private: * deinitialization called off in `on_unlock` * because new `incoming` request had appeared at here before * `on_unlock` started deinit procedure in another thread. - * So no reinit required because no deinit had happended + * So no reinit required because no deinit had happened * * main `busy-wait` request must break busy-wait state * and become `outgoing` request. diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp index e6afbb92fd..b80ad2e12e 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp @@ -44,7 +44,7 @@ void VPLLegacyDecodeEngine::try_modify_pool_size_request_param(const char* param param_name + ", overflow"); } request.NumFrameSuggested = static_cast(new_frames_count); - GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input: " << + GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overridden by user input: " << ", mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin << ", mfxFrameAllocRequest.NumFrameSuggested: " << request.NumFrameSuggested << ", mfxFrameAllocRequest.Type: " << request.Type); @@ -152,7 +152,7 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr ExecutionStatus { return this->process_error(sess.last_status, static_cast(sess)); @@ -177,7 +177,7 @@ VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param mfxVideoParam mfxDecParams {}; mfxDecParams.mfx.CodecId = decoder_id_name; - // set memory stream direction accroding to accelearion policy device type + // set memory stream direction according to acceleration policy device type IDeviceSelector::DeviceScoreTable devices = acceleration_policy->get_device_selector()->select_devices(); GAPI_Assert(devices.size() == 1 && "Multiple(or zero) acceleration devices case is unsupported"); AccelType accel_type = devices.begin()->second.get_type(); @@ -252,7 +252,7 @@ VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param acceleration_policy->create_surface_pool(decRequest, mfxDecParams.mfx.FrameInfo); // Input parameters finished, now initialize decode - // create decoder for session accoring to header recovered from source file + // create decoder for session according to header recovered from source file sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); if (MFX_ERR_NONE != sts) { diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp index ec27a6422d..2e9a9bebce 100644 --- a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -147,7 +147,7 @@ VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& acce } while (MFX_ERR_NONE == sess.last_status && !my_sess.vpp_out_queue.empty()); return ExecutionStatus::Continue; }, - // 2) Falls back on generic status procesing + // 2) Falls back on generic status processing [this] (EngineSession& sess) -> ExecutionStatus { return this->process_error(sess.last_status, static_cast(sess)); @@ -273,7 +273,7 @@ pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_p throw std::runtime_error("Cannot execute MFXVideoVPP_QueryIOSurf"); } - // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation + // NB: Assign ID as upper limit descendant to distinguish specific VPP allocation // from decode allocations witch started from 0: by local module convention static uint16_t request_id = 0; diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp index 7cb2c2e5f1..6226061925 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp @@ -248,7 +248,7 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr ExecutionStatus { return this->process_error(sess.last_status, static_cast(sess)); @@ -358,7 +358,7 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, } - // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation + // NB: Assign ID as upper limit descendant to distinguish specific VPP allocation // from decode allocations witch started from 0: by local module convention vppRequests[1].AllocId = std::numeric_limits::max(); diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp index 792110aaf9..7afe1cde56 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp @@ -129,7 +129,7 @@ GSource::Priv::Priv(std::shared_ptr provider, } // collect optional-preferred input parameters from input params - // which may (optionally) or may not be used to choose the most preferrable + // which may (optionally) or may not be used to choose the most preferable // VPL implementation (for example, specific API version or Debug/Release VPL build) std::vector preferred_params; std::copy_if(cfg_params.begin(), cfg_params.end(), std::back_inserter(preferred_params), @@ -137,7 +137,7 @@ GSource::Priv::Priv(std::shared_ptr provider, std::sort(preferred_params.begin(), preferred_params.end()); GAPI_LOG_DEBUG(nullptr, "Find MFX better implementation from handle: " << mfx_handle << - " is satisfying preferrable params count: " << preferred_params.size()); + " is satisfying preferable params count: " << preferred_params.size()); int i = 0; mfxImplDescription *idesc = nullptr; std::vector available_impl_descriptions; @@ -162,7 +162,7 @@ GSource::Priv::Priv(std::shared_ptr provider, GAPI_LOG_INFO(nullptr, "Implementation index: " << i << "\n" << ss.str()); // Only one VPL implementation is required for GSource here. - // Let's find intersection params from available impl with preferrable input params + // Let's find intersection params from available impl with preferable input params // to find best match. // An available VPL implementation with max matching count std::vector impl_params = get_params_from_string(ss.str()); @@ -178,7 +178,7 @@ GSource::Priv::Priv(std::shared_ptr provider, // in case of no input preferrance we consider all params are matched // for the first available VPL implementation. It will be a chosen one matches_count.emplace(impl_params.size(), i++); - GAPI_LOG_DEBUG(nullptr, "No preferrable params, use the first one implementation"); + GAPI_LOG_DEBUG(nullptr, "No preferable params, use the first one implementation"); break; } else { GAPI_LOG_DEBUG(nullptr, "Equal param intersection count: " << matched_params.size()); diff --git a/modules/gapi/src/streaming/onevpl/utils.cpp b/modules/gapi/src/streaming/onevpl/utils.cpp index 37b4074209..549ff597cf 100644 --- a/modules/gapi/src/streaming/onevpl/utils.cpp +++ b/modules/gapi/src/streaming/onevpl/utils.cpp @@ -401,7 +401,7 @@ std::string ext_mem_frame_type_to_cstr(int type) { std::stringstream ss; APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_DECODER_TARGET); APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_PROCESSOR_TARGET); - // NB: accoring to VPL source the commented MFX_* constane below are belong to the + // NB: according to VPL source the commented MFX_* constane below are belong to the // same actual integral value as condition abobe. So it is impossible // to distinct them in condition branch. Just put this comment and possible // constans here... diff --git a/modules/gapi/src/utils/itt.hpp b/modules/gapi/src/utils/itt.hpp index 0b49af7226..4a9cd54d65 100644 --- a/modules/gapi/src/utils/itt.hpp +++ b/modules/gapi/src/utils/itt.hpp @@ -13,7 +13,7 @@ #include -// NOTE: OPENCV_WITH_ITT is only defined if ITT dependecy is built by OpenCV infrastructure. +// NOTE: OPENCV_WITH_ITT is only defined if ITT dependency is built by OpenCV infrastructure. // There will not be such define in G-API standalone mode. // TODO: Consider using OpenCV's trace.hpp #if defined(OPENCV_WITH_ITT) diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp index 17c4e2f447..b462e701f2 100644 --- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp +++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp @@ -415,7 +415,7 @@ TEST(StatefulKernel, StateIsInitViaCompArgs) // Allowing 1% difference of all pixels between G-API and OpenCV results compareBackSubResults(gapiForeground, ocvForeground, 1); - // Additionally, test the case where state is resetted + // Additionally, test the case where state is reset gapiBackSub.prepareForNewStream(); gapiBackSub(cv::gin(frame), cv::gout(gapiForeground)); pOcvBackSub->apply(frame, ocvForeground); diff --git a/modules/gapi/test/gapi_sample_pipelines.cpp b/modules/gapi/test/gapi_sample_pipelines.cpp index e48d36b3ac..da71cd0ab0 100644 --- a/modules/gapi/test/gapi_sample_pipelines.cpp +++ b/modules/gapi/test/gapi_sample_pipelines.cpp @@ -67,7 +67,7 @@ namespace } }; - // These definitons test the correct macro work if the kernel has multiple output values + // These definitions test the correct macro work if the kernel has multiple output values G_TYPED_KERNEL(GRetGArrayTupleOfGMat2Kernel, >(GMat, Scalar)>, "org.opencv.test.retarrayoftupleofgmat2kernel") {}; G_TYPED_KERNEL(GRetGArraTupleyOfGMat3Kernel, >(GMat)>, "org.opencv.test.retarrayoftupleofgmat3kernel") {}; G_TYPED_KERNEL(GRetGArraTupleyOfGMat4Kernel, >(GMat)>, "org.opencv.test.retarrayoftupleofgmat4kernel") {}; diff --git a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp index 9c0cc9ca4a..f33d967c89 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp @@ -346,9 +346,9 @@ void preproc_function(cv::gapi::wip::IPreprocEngine &preproc_engine, SafeQueue&q // launch pipeline bool in_progress = false; // let's allow counting of preprocessed frames to check this value later: - // Currently, it looks redundant to implement any kind of gracefull shutdown logic + // Currently, it looks redundant to implement any kind of graceful shutdown logic // in this test - so let's apply agreement that media source is processed - // succesfully when preproc_number != 1 in result. + // successfully when preproc_number != 1 in result. // Specific validation logic which adhere to explicit counter value may be implemented // in particular test scope preproc_number = 1; From a2b84e989759aabbc260e8db1b56e75e4e9d97fd Mon Sep 17 00:00:00 2001 From: rogday Date: Wed, 13 Apr 2022 15:55:36 +0300 Subject: [PATCH 75/84] add assert to tf graph simplifier to address security concerns --- modules/dnn/src/tensorflow/tf_graph_simplifier.cpp | 2 +- modules/dnn/test/test_tf_importer.cpp | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 54395504c7..f81ff83ab0 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -1035,7 +1035,7 @@ void sortByExecutionOrder(tensorflow::GraphDef& net) nodesToAdd.pop_back(); permIds.push_back(nodeToAdd); - + CV_Assert(nodeToAdd < edges.size()); for (int i = 0; i < edges[nodeToAdd].size(); ++i) { int consumerId = edges[nodeToAdd][i]; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index d3ee5d3300..81880fb66c 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -1571,6 +1571,13 @@ TEST_P(Test_TensorFlow_layers, tf2_permute_nhwc_ncwh) runTensorFlowNet("tf2_permute_nhwc_ncwh"); } +// issue #21852 +TEST_P(Test_TensorFlow_layers, tf_graph_simplifier_buffer_overflow) +{ + // This just shouldn't segfault, otherwise it's fine + EXPECT_ANY_THROW(readNetFromTensorflow(path("tf_graph_simplifier_buffer_overflow_net.pb"))); +} + TEST_P(Test_TensorFlow_layers, squeeze) { #if defined(INF_ENGINE_RELEASE) From 7daf84fb440a1201f8e75ecaf2d223d8c0d7d55f Mon Sep 17 00:00:00 2001 From: rogday Date: Wed, 13 Apr 2022 22:45:43 +0300 Subject: [PATCH 76/84] address security concerns in persistence --- modules/core/src/persistence.cpp | 2 +- modules/core/test/test_io.cpp | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 6789c78e9d..09435b5850 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -817,7 +817,7 @@ char *FileStorage::Impl::gets(size_t maxCount) { int delta = (int) strlen(ptr); ofs += delta; maxCount -= delta; - if (ptr[delta - 1] == '\n' || maxCount == 0) + if (delta == 0 || ptr[delta - 1] == '\n' || maxCount == 0) break; if (delta == count) buffer.resize((size_t) (buffer.size() * 1.5)); diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 4218cb9297..04426e58f7 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -3,6 +3,8 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include + namespace opencv_test { namespace { static SparseMat cvTsGetRandomSparseMat(int dims, const int* sz, int type, @@ -799,6 +801,25 @@ TEST(Core_InputOutput, filestorage_base64_basic_memory_JSON) test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", true, true); } +// issue #21851 +TEST(Core_InputOutput, filestorage_heap_overflow) +{ + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + CV_Assert(test_info); + + std::string name = std::string(test_info->test_case_name()) + "--" + test_info->name(); + const char data[] = {0x00, 0x2f, 0x4a, 0x4a, 0x50, 0x4a, 0x4a }; + + std::ofstream file; + file.open(name, std::ios_base::binary); + assert(file.is_open()); + + file.write(data, sizeof(data)); + file.close(); + + // This just shouldn't segfault, otherwise it's fine + EXPECT_ANY_THROW(FileStorage(name, FileStorage::READ)); +} TEST(Core_InputOutput, filestorage_base64_valid_call) { From 0ee2a58cdc0fd5d078849a35dedafcf950fddcf2 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 15 Apr 2022 14:36:48 +0300 Subject: [PATCH 77/84] Document defaults for JPEG settings. --- modules/imgcodecs/include/opencv2/imgcodecs.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index 01d46cba7c..5dbf348463 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -87,8 +87,8 @@ enum ImwriteFlags { IMWRITE_JPEG_PROGRESSIVE = 2, //!< Enable JPEG features, 0 or 1, default is False. IMWRITE_JPEG_OPTIMIZE = 3, //!< Enable JPEG features, 0 or 1, default is False. IMWRITE_JPEG_RST_INTERVAL = 4, //!< JPEG restart interval, 0 - 65535, default is 0 - no restart. - IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is 0 - don't use. - IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is 0 - don't use. + IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is -1 - don't use. + IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is -1 - don't use. IMWRITE_PNG_COMPRESSION = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. If specified, strategy is changed to IMWRITE_PNG_STRATEGY_DEFAULT (Z_DEFAULT_STRATEGY). Default value is 1 (best speed setting). IMWRITE_PNG_STRATEGY = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_RLE. IMWRITE_PNG_BILEVEL = 18, //!< Binary level PNG, 0 or 1, default is 0. From 27c15bed601b9dd8e808d0fc1958001a6d123299 Mon Sep 17 00:00:00 2001 From: Victor Date: Sat, 16 Apr 2022 20:59:53 +0300 Subject: [PATCH 78/84] Merge pull request #21834 from victor1234:issue-19138_add_termcriteria_to_fisheye_undistortpoints * Add termination criteria to cv::fisheye::undistortPoints() * Add criteria.type check --- modules/calib3d/include/opencv2/calib3d.hpp | 4 +++- modules/calib3d/src/fisheye.cpp | 25 +++++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index b3709c8cc2..5fe7fb9596 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -3797,10 +3797,12 @@ namespace fisheye @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel @param P New camera intrinsic matrix (3x3) or new projection matrix (3x4) + @param criteria Termination criteria @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . */ CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted, - InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray()); + InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray(), + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)); /** @brief Computes undistortion and rectification maps for image transform by #remap. If D is empty zero distortion is used, if R or P is empty identity matrixes are used. diff --git a/modules/calib3d/src/fisheye.cpp b/modules/calib3d/src/fisheye.cpp index 43b679f9e4..9a73132cbe 100644 --- a/modules/calib3d/src/fisheye.cpp +++ b/modules/calib3d/src/fisheye.cpp @@ -318,7 +318,8 @@ void cv::fisheye::distortPoints(InputArray undistorted, OutputArray distorted, I ////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// cv::fisheye::undistortPoints -void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray R, InputArray P) +void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, + InputArray R, InputArray P, TermCriteria criteria) { CV_INSTRUMENT_REGION(); @@ -330,6 +331,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted CV_Assert(R.empty() || R.size() == Size(3, 3) || R.total() * R.channels() == 3); CV_Assert(D.total() == 4 && K.size() == Size(3, 3) && (K.depth() == CV_32F || K.depth() == CV_64F)); + CV_Assert(criteria.isValid()); + cv::Vec2d f, c; if (K.depth() == CV_32F) { @@ -372,6 +375,15 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted size_t n = distorted.total(); int sdepth = distorted.depth(); + const bool isEps = criteria.type & TermCriteria::EPS; + + /* Define max count for solver iterations */ + int maxCount = std::numeric_limits::max(); + if (criteria.type & TermCriteria::MAX_ITER) { + maxCount = criteria.maxCount; + } + + for(size_t i = 0; i < n; i++ ) { Vec2d pi = sdepth == CV_32F ? (Vec2d)srcf[i] : srcd[i]; // image point @@ -389,13 +401,11 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted double scale = 0.0; - if (fabs(theta_d) > 1e-8) + if (!isEps || fabs(theta_d) > criteria.epsilon) { // compensate distortion iteratively - const double EPS = 1e-8; // or std::numeric_limits::epsilon(); - - for (int j = 0; j < 10; j++) + for (int j = 0; j < maxCount; j++) { double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2; double k0_theta2 = k[0] * theta2, k1_theta4 = k[1] * theta4, k2_theta6 = k[2] * theta6, k3_theta8 = k[3] * theta8; @@ -403,7 +413,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) / (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8); theta = theta - theta_fix; - if (fabs(theta_fix) < EPS) + + if (isEps && (fabs(theta_fix) < criteria.epsilon)) { converged = true; break; @@ -422,7 +433,7 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted // so we can check whether theta has changed the sign during the optimization bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0)); - if (converged && !theta_flipped) + if ((converged || !isEps) && !theta_flipped) { Vec2d pu = pw * scale; //undistorted point From 9cd5a0a1e675b3a95c796b692cbcb041481e91c6 Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 19 Apr 2022 19:40:25 +0300 Subject: [PATCH 79/84] Merge pull request #21884 from rogday:cuda_cleanup Fix CUDA compilation issues and adjust thresholds. * Fix CUDA compilation issues and adjust thresholds. * add conformance tests to denylist --- modules/dnn/src/cuda/activations.cu | 2 +- modules/dnn/src/cuda/functors.hpp | 10 ++++++---- modules/dnn/src/layers/convolution_layer.cpp | 1 + modules/dnn/src/layers/elementwise_layers.cpp | 8 ++++---- modules/dnn/src/opencl/activations.cl | 2 +- modules/dnn/test/test_backends.cpp | 2 +- modules/dnn/test/test_misc.cpp | 5 +++-- modules/dnn/test/test_model.cpp | 5 +++-- modules/dnn/test/test_onnx_conformance.cpp | 2 +- ...formance_layer_filter__cuda_denylist.inl.hpp | 9 +++++++++ modules/dnn/test/test_onnx_importer.cpp | 17 +++++++++++------ 11 files changed, 41 insertions(+), 22 deletions(-) diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index ed34d57e0b..e12457a164 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -260,7 +260,7 @@ void shrink(const Stream& stream, Span output, View input, T bias, T lambd template void reciprocal(const Stream& stream, Span output, View input) { - generic_op>(stream, output, input); + generic_op>(stream, output, input); } template diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 378df82f26..83a949f8e7 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -732,7 +732,8 @@ struct SignFunctor { CUDA4DNN_HOST_DEVICE Params() {} }; - CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { } + CUDA4DNN_DEVICE SignFunctor() { } + CUDA4DNN_DEVICE SignFunctor(const Params& params) { } CUDA4DNN_DEVICE T operator()(T value) { return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0)); @@ -747,7 +748,7 @@ struct ShrinkFunctor { T bias, lambd; }; - CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { } + CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { } CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { } CUDA4DNN_DEVICE T operator()(T value) { @@ -763,10 +764,11 @@ struct ReciprocalFunctor { CUDA4DNN_HOST_DEVICE Params() {} }; - CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { } + CUDA4DNN_DEVICE ReciprocalFunctor() { } + CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { } CUDA4DNN_DEVICE T operator()(T value) { - return T(1.0f)/value; + return T(1.f)/value; } }; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 4e377b9f7e..0bf39f93b3 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -2080,6 +2080,7 @@ public: { auto context = reinterpret_cast(context_); + // TODO: extract bias from inputs and pass it CV_Assert(inputs.size() == 1 || inputs.size() == 2); auto input_wrapper = inputs[0].dynamicCast(); auto input_shape = input_wrapper->getShape(); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index eb2ecce3ce..353ce8c0b4 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -2282,7 +2282,7 @@ struct SignFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return x > 0 ? 1 : (x < 0 ? -1 : 0); + return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f); } #ifdef HAVE_CUDA @@ -2315,13 +2315,13 @@ struct ShrinkFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return x > lambd ? x - bias : (x < -lambd ? x + bias : 0); + return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f); } #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream, bias, lambd); } #endif @@ -2343,7 +2343,7 @@ struct ReciprocalFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return 1.0/x; + return 1.f/x; } #ifdef HAVE_CUDA diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index d105623403..0624f48e19 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -320,7 +320,7 @@ __kernel void SignForward(const int n, __global T* in, __global T* out) { int index = get_global_id(0); if(index < n) - out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f); + out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f); } __kernel void ReciprocalForward(const int n, __global T* in, __global T* out) diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 8a673ba0d8..f2c6f1e5a0 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -516,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121) else if (target == DNN_TARGET_CUDA_FP16) { l1 = 0.008; - lInf = 0.05; + lInf = 0.06; } processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 108131bd9d..0fab7551a5 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic) Mat ref; addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F); - double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6; - double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5; + double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6; + double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5; + normAssert(out, ref, "", l1, lInf); if (cvtest::debugLevel > 0 || HasFailure()) diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index c5a0f2fa7f..25d1a18d52 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -512,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.002; + scoreDiff = 0.0021; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -661,7 +661,8 @@ TEST_P(Test_Model, Segmentation) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + || (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)) { norm = 2.0f; // l1 = 0.01 lInf = 2 } diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 0e912ede54..e9bc0e4187 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -954,7 +954,7 @@ public: if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) { - default_l1 = 4e-3; + default_l1 = 7e-3; default_lInf = 2e-2; } else diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp index 0f5f387132..c18ced0c59 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp @@ -66,6 +66,15 @@ "test_maxunpool_export_with_output_shape", "test_mul_bcast", "test_mul_uint8", +"test_reduce_prod_default_axes_keepdims_example", // FP16 only +"test_reduce_prod_default_axes_keepdims_random", // FP16 only +"test_reduce_prod_do_not_keepdims_random", // FP16 only +"test_reduce_prod_keepdims_random", // FP16 only +"test_reduce_prod_negative_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_do_not_keepdims_random", // FP16 only +"test_reduce_sum_square_keepdims_random", // FP16 only +"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only "test_softmax_default_axis", "test_softmax_large_number", // FP16 only "test_softmax_large_number_expanded", // FP16 only diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 11bf91b868..8503f55c25 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -169,16 +169,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); - if (backend == DNN_BACKEND_CUDA) - applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported - if (backend == DNN_BACKEND_VKCOM) - applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs + + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported + String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -464,11 +465,15 @@ TEST_P(Test_ONNX_layers, Scale) TEST_P(Test_ONNX_layers, Scale_broadcast) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3); } TEST_P(Test_ONNX_layers, Scale_broadcast_mid) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2); } @@ -2131,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus) double lInf = default_lInf; // Output values are in range [-2.011, 2.111] - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16)) l1 = 0.007; else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) { From 5cc154147f749c0d9ac7a32e4b12aa7469b817c3 Mon Sep 17 00:00:00 2001 From: Victor Date: Tue, 19 Apr 2022 21:07:34 +0300 Subject: [PATCH 80/84] Merge pull request #21841 from victor1234:calib3d-undistortPoints-tests Add distort/undistort test for fisheye::undistortPoints() * Add distort/undistort test for fisheye::undistortPoints() Lack of test has allowed error described in 19138 to be unnoticed. In addition to random points, four corners and principal center added to point set * Add random distortion coefficients set * Move undistortPoints test to google test, refactor * Add fisheye::undistortPoints() perf test * Add negative distortion coefficients to undistortPoints test, increase value * Move to theRNG() * Change test check from cvtest::norm(L2) to EXPECT_MAT_NEAR() * Layout fix * Add points number parameters, comments --- modules/calib3d/perf/perf_undistort.cpp | 36 +++++++++ modules/calib3d/test/test_fisheye.cpp | 49 ++++++++++++ .../calib3d/test/test_undistort_points.cpp | 80 +++++++++---------- 3 files changed, 124 insertions(+), 41 deletions(-) diff --git a/modules/calib3d/perf/perf_undistort.cpp b/modules/calib3d/perf/perf_undistort.cpp index e15d2aefe3..86f622a92f 100644 --- a/modules/calib3d/perf/perf_undistort.cpp +++ b/modules/calib3d/perf/perf_undistort.cpp @@ -27,4 +27,40 @@ PERF_TEST(Undistort, DISABLED_InitInverseRectificationMap) SANITY_CHECK_NOTHING(); } +using PerfIntType = perf::TestBaseWithParam>; +PERF_TEST_P(PerfIntType, fisheye_undistortPoints, + (testing::Values(1e2, 1e3, 1e4))) +{ + const cv::Size imageSize(1280, 800); + + /* Set camera matrix */ + const cv::Matx33d K(558.478087865323, 0, 620.458515360843, + 0, 560.506767351568, 381.939424848348, + 0, 0, 1); + + /* Set distortion coefficients */ + Mat D(1, 4, CV_64F); + theRNG().fill(D, RNG::UNIFORM, -1.e-5, 1.e-5); + + int pointsNumber = std::get<0>(GetParam()); + + /* Create two-channel points matrix */ + cv::Mat xy[2] = {}; + xy[0].create(pointsNumber, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, imageSize.width); // x + xy[1].create(pointsNumber, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, imageSize.height); // y + + cv::Mat points; + merge(xy, 2, points); + + /* Set fixed iteration number to check only c++ code, not algo convergence */ + TermCriteria termCriteria(TermCriteria::MAX_ITER, 10, 0); + + Mat undistortedPoints; + TEST_CYCLE() fisheye::undistortPoints(points, undistortedPoints, K, D, noArray(), noArray(), termCriteria); + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/calib3d/test/test_fisheye.cpp b/modules/calib3d/test/test_fisheye.cpp index 310804d233..ad3b066a6a 100644 --- a/modules/calib3d/test/test_fisheye.cpp +++ b/modules/calib3d/test/test_fisheye.cpp @@ -101,6 +101,55 @@ TEST_F(fisheyeTest, projectPoints) EXPECT_MAT_NEAR(distorted0, distorted2, 1e-10); } +TEST_F(fisheyeTest, distortUndistortPoints) +{ + int width = imageSize.width; + int height = imageSize.height; + + /* Create test points */ + std::vector points0Vector; + cv::Mat principalPoints = (cv::Mat_(5, 2) << K(0, 2), K(1, 2), // (cx, cy) + /* Image corners */ + 0, 0, + 0, height, + width, 0, + width, height + ); + + /* Random points inside image */ + cv::Mat xy[2] = {}; + xy[0].create(100, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x + xy[1].create(100, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y + + cv::Mat randomPoints; + merge(xy, 2, randomPoints); + + cv::Mat points0; + cv::vconcat(principalPoints.reshape(2), randomPoints, points0); + + /* Test with random D set */ + for (size_t i = 0; i < 10; ++i) { + cv::Mat D(1, 4, CV_64F); + theRNG().fill(D, cv::RNG::UNIFORM, -0.00001, 0.00001); + + /* Distort -> Undistort */ + cv::Mat distortedPoints; + cv::fisheye::distortPoints(points0, distortedPoints, K, D); + cv::Mat undistortedPoints; + cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, D); + + EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8); + + /* Undistort -> Distort */ + cv::fisheye::undistortPoints(points0, undistortedPoints, K, D); + cv::fisheye::distortPoints(undistortedPoints, distortedPoints, K, D); + + EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8); + } +} + TEST_F(fisheyeTest, undistortImage) { cv::Matx33d theK = this->K; diff --git a/modules/calib3d/test/test_undistort_points.cpp b/modules/calib3d/test/test_undistort_points.cpp index 8765e2c5eb..1ac18dedba 100644 --- a/modules/calib3d/test/test_undistort_points.cpp +++ b/modules/calib3d/test/test_undistort_points.cpp @@ -1,34 +1,24 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. + +#include // EXPECT_MAT_NEAR #include "test_precomp.hpp" namespace opencv_test { namespace { -class CV_UndistortTest : public cvtest::BaseTest +class UndistortPointsTest : public ::testing::Test { -public: - CV_UndistortTest(); - ~CV_UndistortTest(); protected: - void run(int); -private: void generate3DPointCloud(vector& points, Point3f pmin = Point3f(-1, -1, 5), Point3f pmax = Point3f(1, 1, 10)); void generateCameraMatrix(Mat& cameraMatrix); void generateDistCoeffs(Mat& distCoeffs, int count); - double thresh; - RNG rng; + double thresh = 1.0e-2; }; -CV_UndistortTest::CV_UndistortTest() -{ - thresh = 1.0e-2; -} -CV_UndistortTest::~CV_UndistortTest() {} - -void CV_UndistortTest::generate3DPointCloud(vector& points, Point3f pmin, Point3f pmax) +void UndistortPointsTest::generate3DPointCloud(vector& points, Point3f pmin, Point3f pmax) { RNG rng_Point = cv::theRNG(); // fix the seed to use "fixed" input 3D points for (size_t i = 0; i < points.size(); i++) @@ -39,31 +29,35 @@ void CV_UndistortTest::generate3DPointCloud(vector& points, Point3f pmi points[i] = Point3f(_x, _y, _z); } } -void CV_UndistortTest::generateCameraMatrix(Mat& cameraMatrix) + +void UndistortPointsTest::generateCameraMatrix(Mat& cameraMatrix) { const double fcMinVal = 1e-3; const double fcMaxVal = 100; cameraMatrix.create(3, 3, CV_64FC1); cameraMatrix.setTo(Scalar(0)); - cameraMatrix.at(0,0) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(1,1) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(0,2) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(1,2) = rng.uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(0,0) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(1,1) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(0,2) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(1,2) = theRNG().uniform(fcMinVal, fcMaxVal); cameraMatrix.at(2,2) = 1; } -void CV_UndistortTest::generateDistCoeffs(Mat& distCoeffs, int count) + +void UndistortPointsTest::generateDistCoeffs(Mat& distCoeffs, int count) { distCoeffs = Mat::zeros(count, 1, CV_64FC1); for (int i = 0; i < count; i++) - distCoeffs.at(i,0) = rng.uniform(0.0, 1.0e-3); + distCoeffs.at(i,0) = theRNG().uniform(-0.1, 0.1); } -void CV_UndistortTest::run(int /* start_from */) +TEST_F(UndistortPointsTest, accuracy) { Mat intrinsics, distCoeffs; generateCameraMatrix(intrinsics); + vector points(500); generate3DPointCloud(points); + vector projectedPoints; projectedPoints.resize(points.size()); @@ -71,10 +65,15 @@ void CV_UndistortTest::run(int /* start_from */) for (int idx = 0; idx < 3; idx++) { generateDistCoeffs(distCoeffs, modelMembersCount[idx]); - projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics, distCoeffs, projectedPoints); + + projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), + Mat::zeros(3,1,CV_64FC1), intrinsics, + distCoeffs, projectedPoints); vector realUndistortedPoints; - projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics, Mat::zeros(4,1,CV_64FC1), realUndistortedPoints); + projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), + Mat::zeros(3,1,CV_64FC1), intrinsics, + Mat::zeros(4,1,CV_64FC1), realUndistortedPoints); Mat undistortedPoints; undistortPoints(Mat(projectedPoints), undistortedPoints, intrinsics, distCoeffs); @@ -82,44 +81,43 @@ void CV_UndistortTest::run(int /* start_from */) Mat p; perspectiveTransform(undistortedPoints, p, intrinsics); undistortedPoints = p; - double diff = cvtest::norm(Mat(realUndistortedPoints), undistortedPoints, NORM_L2); - if (diff > thresh) - { - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } - ts->set_failed_test_info(cvtest::TS::OK); + + EXPECT_MAT_NEAR(realUndistortedPoints, undistortedPoints.t(), thresh); } } -TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); } - -TEST(Calib3d_Undistort, stop_criteria) +TEST_F(UndistortPointsTest, stop_criteria) { Mat cameraMatrix = (Mat_(3,3,CV_64F) << 857.48296979, 0, 968.06224829, 0, 876.71824265, 556.37145899, 0, 0, 1); Mat distCoeffs = (Mat_(5,1,CV_64F) << -2.57614020e-01, 8.77086999e-02, -2.56970803e-04, -5.93390389e-04, -1.52194091e-02); - RNG rng(2); - Point2d pt_distorted(rng.uniform(0.0, 1920.0), rng.uniform(0.0, 1080.0)); + + Point2d pt_distorted(theRNG().uniform(0.0, 1920.0), theRNG().uniform(0.0, 1080.0)); + std::vector pt_distorted_vec; pt_distorted_vec.push_back(pt_distorted); + const double maxError = 1e-6; TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 100, maxError); + std::vector pt_undist_vec; undistortPoints(pt_distorted_vec, pt_undist_vec, cameraMatrix, distCoeffs, noArray(), noArray(), criteria); - std::vector pt_redistorted_vec; std::vector pt_undist_vec_homogeneous; - pt_undist_vec_homogeneous.push_back( Point3d(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0) ); - projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F), Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec); + pt_undist_vec_homogeneous.emplace_back(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0 ); + + std::vector pt_redistorted_vec; + projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F), + Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec); + const double obtainedError = sqrt( pow(pt_distorted.x - pt_redistorted_vec[0].x, 2) + pow(pt_distorted.y - pt_redistorted_vec[0].y, 2) ); ASSERT_LE(obtainedError, maxError); } -TEST(undistortPoints, regression_14583) +TEST_F(UndistortPointsTest, regression_14583) { const int col = 720; // const int row = 540; From 438fe3f9db55880204fd508025e5a58c846d78d9 Mon Sep 17 00:00:00 2001 From: rogday Date: Wed, 20 Apr 2022 07:55:04 +0300 Subject: [PATCH 81/84] Merge pull request #21805 from rogday:pretty_fix Mat pretty printer: fix submatrix indexation * fix submatrix indexation * fix channels --- samples/gdb/mat_pretty_printer.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/samples/gdb/mat_pretty_printer.py b/samples/gdb/mat_pretty_printer.py index e6ad2cbde2..54afd5e1d3 100644 --- a/samples/gdb/mat_pretty_printer.py +++ b/samples/gdb/mat_pretty_printer.py @@ -122,28 +122,38 @@ class Mat: (dtype, ctype) = flags.dtype() elsize = np.dtype(dtype).itemsize - ptr = m['data'] - dataptr = int(ptr) - length = (int(m['dataend']) - dataptr) // elsize - start = (int(m['datastart']) - dataptr) // elsize + shape = size.to_numpy() + steps = np.asarray([int(m['step']['p'][i]) for i in range(len(shape))], dtype=np.int64) - if length == 0: + ptr = m['data'] + # either we are default-constructed or sizes are zero + if int(ptr) == 0 or np.prod(shape * steps) == 0: self.mat = np.array([]) self.view = self.mat return + # we don't want to show excess brackets + if flags.channels() != 1: + shape = np.append(shape, flags.channels()) + steps = np.append(steps, elsize) + + # get the length of contiguous array from data to the last element of the matrix + length = 1 + np.sum((shape - 1) * steps) // elsize + if dtype != np.float16: + # read all elements into self.mat ctype = gdb.lookup_type(ctype) ptr = ptr.cast(ctype.array(length - 1).pointer()).dereference() self.mat = np.array([ptr[i] for i in range(length)], dtype=dtype) else: + # read as uint16_t and then reinterpret the bytes as float16 u16 = gdb.lookup_type('uint16_t') ptr = ptr.cast(u16.array(length - 1).pointer()).dereference() self.mat = np.array([ptr[i] for i in range(length)], dtype=np.uint16) self.mat = self.mat.view(np.float16) - steps = np.asarray([int(m['step']['p'][i]) for i in range(size.dims())], dtype=np.int64) - self.view = np.lib.stride_tricks.as_strided(self.mat[start:], shape=size.to_numpy(), strides=steps) + # numpy will do the heavy lifting of strided access + self.view = np.lib.stride_tricks.as_strided(self.mat, shape=shape, strides=steps) def __iter__(self): return iter({'data': stri(self.view)}.items()) From d23142027ff2f66211b28271870459a092221848 Mon Sep 17 00:00:00 2001 From: fengyuentau Date: Thu, 21 Apr 2022 14:48:40 +0800 Subject: [PATCH 82/84] add mirrors for tim-vx and others in opencv_contrib --- cmake/mirrors/custom.cmake | 30 ++++++++++++++++++++++-------- cmake/mirrors/gitcode.cmake | 36 ++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake index 2be93d8165..3cdf700e19 100644 --- a/cmake/mirrors/custom.cmake +++ b/cmake/mirrors/custom.cmake @@ -3,27 +3,39 @@ # OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade # from OPENCV_DOWNLOAD_MIRROR ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "") + +###### +# Download via commit id +###### # Tengine ocv_update(TENGINE_PKG_MD5_CUSTOM "") ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e +# NVIDIA_OPTICAL_FLOW +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "") +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) +# TIM-VX +ocv_update(TIM-VX_PKG_MD5_GITCODE "") +ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5) +###### +# Download from release page +##### # TBB ocv_update(TBB_RELEASE_CUSTOM "") ocv_update(TBB_PKG_NAME_CUSTOM "") ocv_update(TBB_PKG_MD5_CUSTOM "") ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 - # ADE ocv_update(ADE_RELEASE_CUSTOM "") ocv_update(ADE_PKG_NAME_CUSTOM "") ocv_update(ADE_PKG_MD5_CUSTOM "") ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f -macro(ocv_download_url_custom_usercontent) +macro(ocv_download_url_custom_usercontent OWNER) string(REPLACE "/" ";" DL_URL_split ${DL_URL}) list(GET DL_URL_split 5 __COMMIT_ID) list(GET DL_URL_split 6 __PKG_NAME) - set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/opencv/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") endmacro() macro(ocv_download_url_custom_archive_commit_id) if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m") @@ -54,16 +66,18 @@ macro(ocv_download_url_custom_archive_release) string(REPLACE "/" ";" DL_URL_split ${DL_URL}) list(GET DL_URL_split 3 __OWNER) list(GET DL_URL_split 4 __REPO_NAME) - set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_PKG_RELEASE_CUSTOM}/${__REPO_NAME}-") + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_CUSTOM}/${__REPO_NAME}-") set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}") endmacro() if("m${OPENCV_DOWNLOAD_MIRROR_URL}" STREQUAL "m") message(WARNING "ocv_download: specify OPENCV_DOWNLOAD_MIRROR_URL to use custom mirror.") else() - if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV")) - ocv_download_url_custom_usercontent() - elseif(DL_ID STREQUAL "TENGINE") + if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg")) + ocv_download_url_custom_usercontent(opencv) + elseif(DL_ID STREQUAL "wechat_qrcode") + ocv_download_url_gitcode_usercontent(WeChatCV) + elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_custom_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_custom_archive_release() @@ -74,4 +88,4 @@ else() else() message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror ${OPENCV_DOWNLOAD_MIRROR_URL}. Use original source instead.") endif() -endif() \ No newline at end of file +endif() diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake index aafe9635f3..abd7a29be4 100644 --- a/cmake/mirrors/gitcode.cmake +++ b/cmake/mirrors/gitcode.cmake @@ -1,14 +1,25 @@ -# Tengine (Download via commit id) +###### +# Download via commit id +###### +# Tengine ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690) ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e +# NVIDIA_OPTICAL_FLOW +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47) +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) +# TIM-VX +ocv_update(TIM-VX_PKG_MD5_GITCODE 3f2a548b40b170668aaa60d4f60ba40b) +ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5) -# TBB (Download from release page) +###### +# Download from release page +##### +# TBB ocv_update(TBB_RELEASE_GITCODE "v2020.2") ocv_update(TBB_PKG_NAME_GITCODE "tbb-${TBB_RELEASE_GITCODE}") ocv_update(TBB_PKG_MD5_GITCODE 4eeafdf16a90cb66e39a31c8d6c6804e) ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 - -# ADE (Download from release page) +# ADE ocv_update(ADE_RELEASE_GITCODE "v0.1.1f") ocv_update(ADE_PKG_NAME_GITCODE "ade-${ADE_RELEASE_GITCODE}") ocv_update(ADE_PKG_MD5_GITCODE c12909e0ccfa93138c820ba91ff37b3c) @@ -19,11 +30,14 @@ ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_ # 1. Extract repo owner and repo name from DL_URL. # 2. Put repo owner and repo name into the placeholders of new DL_URL. # -macro(ocv_download_url_gitcode_usercontent) +macro(ocv_download_url_gitcode_usercontent OWNER) string(REPLACE "/" ";" DL_URL_split ${DL_URL}) list(GET DL_URL_split 5 __COMMIT_ID) list(GET DL_URL_split 6 __PKG_NAME) - set(DL_URL "https://gitcode.net/opencv/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") + set(DL_URL "https://gitcode.net/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/") + if(__PKG_NAME) + set(DL_URL "${DL_URL}${__PKG_NAME}/") + endif() endmacro() # # Replace download links and checksums for archives/releases in other repositories: @@ -37,7 +51,7 @@ macro(ocv_download_url_gitcode_archive_commit_id) string(REPLACE "/" ";" DL_URL_split ${DL_URL}) list(GET DL_URL_split 3 __OWNER) list(GET DL_URL_split 4 __REPO_NAME) - set(DL_URL "https://gitcode.net/${__OWNER}/${__REPO_NAME}/-/archive/") + set(DL_URL "https://gitcode.net/mirrors/${__OWNER}/${__REPO_NAME}/-/archive/") set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}") else() message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.") @@ -55,9 +69,11 @@ macro(ocv_download_url_gitcode_archive_release) endif() endmacro() -if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV")) - ocv_download_url_gitcode_usercontent() -elseif(DL_ID STREQUAL "TENGINE") +if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg")) + ocv_download_url_gitcode_usercontent(opencv) +elseif(DL_ID STREQUAL "wechat_qrcode") + ocv_download_url_gitcode_usercontent(mirrors/WeChatCV) +elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_gitcode_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_gitcode_archive_release() From a55fa8389e1ef46b64fc00257256c280aadf5bb8 Mon Sep 17 00:00:00 2001 From: Andrey Senyaev <76472231+asenyaev@users.noreply.github.com> Date: Thu, 21 Apr 2022 17:59:56 +0300 Subject: [PATCH 83/84] Merge pull request #21875 from asenyaev:asen/workflow_only_linux Added workflow for Github Actions to build and test OpenCV on Linux * Added workflow for Github Actions to build and test OpenCV * Merged a build and tests in one job, changed naming * Renamed job names, split workflows by branch, removed and added some cmake flags, save unit tests results as a html file * Split tests by steps, removed workflows for 4.x and 5.x branches --- .github/workflows/PR-3.4-U20.yaml | 167 ++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 .github/workflows/PR-3.4-U20.yaml diff --git a/.github/workflows/PR-3.4-U20.yaml b/.github/workflows/PR-3.4-U20.yaml new file mode 100644 index 0000000000..8c4f0e90e2 --- /dev/null +++ b/.github/workflows/PR-3.4-U20.yaml @@ -0,0 +1,167 @@ +name: PR:3.4 U20 + +on: + pull_request: + branches: + - 3.4 + +env: + EXTRA_CMAKE_OPTIONS: '-DBUILD_DOCS=ON -DPYTHON_DEFAULT_EXECUTABLE=/usr/bin/python3 -DBUILD_EXAMPLES=ON -DOPENCV_ENABLE_NONFREE=ON -DENABLE_CCACHE=OFF' + OPENCV_TEST_DATA_PATH: '/opencv_extra/testdata' + OPENCV_DOCKER_WORKDIR: '/__w/opencv/opencv' + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + ANT_HOME: '/usr/share/ant' + PYTHONPATH: /opencv-build/python_loader:$PYTHONPATH + +jobs: + BuildAndTest: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with ${{ env.SOURCE_BRANCH_NAME }} branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_extra + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_extra.git /opencv_extra + - name: Configure OpenCV + run: | + cd /opencv-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV + run: | + cd /opencv-build + ninja + - name: Accuracy:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_calib3d + - name: Accuracy:core + run: cd /opencv-build && xvfb-run -a bin/opencv_test_core + - name: Accuracy:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_test_dnn + - name: Accuracy:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_features2d + - name: Accuracy:flann + run: cd /opencv-build && xvfb-run -a bin/opencv_test_flann + - name: Accuracy:highgui + run: cd /opencv-build && xvfb-run -a bin/opencv_test_highgui + - name: Accuracy:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgcodecs + - name: Accuracy:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgproc + - name: Accuracy:ml + run: cd /opencv-build && xvfb-run -a bin/opencv_test_ml + - name: Accuracy:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_test_objdetect + - name: Accuracy:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_test_photo + - name: Accuracy:shape + run: cd /opencv-build && xvfb-run -a bin/opencv_test_shape + - name: Accuracy:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_test_stitching + - name: Accuracy:superres + run: cd /opencv-build && xvfb-run -a bin/opencv_test_superres + - name: Accuracy:video + run: cd /opencv-build && xvfb-run -a bin/opencv_test_video + - name: Accuracy:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_test_videoio + - name: Accuracy:videostab + run: cd /opencv-build && xvfb-run -a bin/opencv_test_videostab + - name: Performance:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_calib3d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:core + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_core --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_dnn --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_features2d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgcodecs --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgproc --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_objdetect --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_photo --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_stitching --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:superres + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_superres --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:video + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_video --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_videoio --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Python3 + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/python/test + python3 ./test.py --repo ../../../ -v + - name: Java + run: cd /opencv-build && xvfb-run -a python3 ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/ts/misc/run.py . -a -t java + - name: Save Unit Test Results + uses: actions/upload-artifact@v3 + if: always() + with: + name: junit-html + path: /opencv-build/java_test/testResults/junit-noframes.html + - name: Pylint + run: cd /opencv-build && cmake --build . --config release --target check_pylint -- -j4 + + BuildContrib: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with a test branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_contrib + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_contrib.git /opencv_contrib + - name: Configure OpenCV Contrib + run: | + cd /opencv-contrib-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} -DOPENCV_EXTRA_MODULES_PATH=/opencv_contrib/modules ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV Contrib + run: | + cd /opencv-contrib-build + ninja From 2e41db39f503eaaf919118654c0cadafaf128b25 Mon Sep 17 00:00:00 2001 From: Andrey Senyaev <76472231+asenyaev@users.noreply.github.com> Date: Thu, 21 Apr 2022 18:04:22 +0300 Subject: [PATCH 84/84] Merge pull request #21876 from asenyaev:asen/workflow_only_linux_4x Added workflow for Github Actions to build and test OpenCV on Linux for 4.x * Added workflow for Github Actions to build and test OpenCV * Merged a build and tests jobs into one, split tests by steps, renamed job names --- .github/workflows/PR-4.x-U20.yaml | 163 ++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 .github/workflows/PR-4.x-U20.yaml diff --git a/.github/workflows/PR-4.x-U20.yaml b/.github/workflows/PR-4.x-U20.yaml new file mode 100644 index 0000000000..978a3a4301 --- /dev/null +++ b/.github/workflows/PR-4.x-U20.yaml @@ -0,0 +1,163 @@ +name: PR:4.x U20 + +on: + pull_request: + branches: + - 4.x + +env: + EXTRA_CMAKE_OPTIONS: '-DBUILD_DOCS=ON -DPYTHON_DEFAULT_EXECUTABLE=/usr/bin/python3 -DBUILD_EXAMPLES=ON -DOPENCV_ENABLE_NONFREE=ON -DENABLE_CCACHE=OFF' + OPENCV_TEST_DATA_PATH: '/opencv_extra/testdata' + OPENCV_DOCKER_WORKDIR: '/__w/opencv/opencv' + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + ANT_HOME: '/usr/share/ant' + PYTHONPATH: /opencv-build/python_loader:$PYTHONPATH + +jobs: + BuildAndTest: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with ${{ env.SOURCE_BRANCH_NAME }} branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_extra + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_extra.git /opencv_extra + - name: Configure OpenCV + run: | + cd /opencv-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV + run: | + cd /opencv-build + ninja + - name: Accuracy:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_calib3d + - name: Accuracy:core + run: cd /opencv-build && xvfb-run -a bin/opencv_test_core + - name: Accuracy:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_test_dnn + - name: Accuracy:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_features2d + - name: Accuracy:flann + run: cd /opencv-build && xvfb-run -a bin/opencv_test_flann + - name: Accuracy:gapi + run: cd /opencv-build && xvfb-run -a bin/opencv_test_gapi + - name: Accuracy:highgui + run: cd /opencv-build && xvfb-run -a bin/opencv_test_highgui + - name: Accuracy:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgcodecs + - name: Accuracy:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgproc + - name: Accuracy:ml + run: cd /opencv-build && xvfb-run -a bin/opencv_test_ml + - name: Accuracy:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_test_objdetect + - name: Accuracy:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_test_photo + - name: Accuracy:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_test_stitching + - name: Accuracy:video + run: cd /opencv-build && xvfb-run -a bin/opencv_test_video + - name: Accuracy:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_test_videoio + - name: Performance:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_calib3d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:core + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_core --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_dnn --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_features2d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:gapi + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_gapi --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgcodecs --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgproc --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_objdetect --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_photo --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_stitching --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:video + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_video --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_videoio --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Python3 + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/python/test + python3 ./test.py --repo ../../../ -v + - name: Java + run: cd /opencv-build && xvfb-run -a python3 ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/ts/misc/run.py . -a -t java + - name: Save Unit Test Results + uses: actions/upload-artifact@v3 + if: always() + with: + name: junit-html + path: /opencv-build/java_test/testResults/junit-noframes.html + - name: Pylint + run: cd /opencv-build && cmake --build . --config release --target check_pylint -- -j4 + + BuildContrib: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with a test branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_contrib + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_contrib.git /opencv_contrib + - name: Configure OpenCV Contrib + run: | + cd /opencv-contrib-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} -DOPENCV_EXTRA_MODULES_PATH=/opencv_contrib/modules ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV Contrib + run: | + cd /opencv-contrib-build + ninja