From b71744082b9d601bde333c2d3743a4694fb1d498 Mon Sep 17 00:00:00 2001 From: David Bradley Date: Thu, 3 Jul 2014 15:15:23 -0400 Subject: [PATCH 01/71] cudabgsegm module was not compiling. Changing all references of opencv2/legacy.hpp to opencv2/cudalegacy.hpp seemed to work. I'm new at this so it would be good to have someone familiar with the build system confirm the change. --- modules/cudabgsegm/perf/perf_bgsegm.cpp | 8 ++++---- modules/cudabgsegm/test/test_bgsegm.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/cudabgsegm/perf/perf_bgsegm.cpp b/modules/cudabgsegm/perf/perf_bgsegm.cpp index 9d3da2927f..02fc9a8ee9 100644 --- a/modules/cudabgsegm/perf/perf_bgsegm.cpp +++ b/modules/cudabgsegm/perf/perf_bgsegm.cpp @@ -42,8 +42,8 @@ #include "perf_precomp.hpp" -#ifdef HAVE_OPENCV_LEGACY -# include "opencv2/legacy.hpp" +#ifdef HAVE_OPENCV_CUDALEGACY +# include "opencv2/cudalegacy.hpp" #endif #ifdef HAVE_OPENCV_CUDAIMGPROC @@ -72,7 +72,7 @@ using namespace perf; #if BUILD_WITH_VIDEO_INPUT_SUPPORT -#ifdef HAVE_OPENCV_LEGACY +#ifdef HAVE_OPENCV_CUDALEGACY namespace cv { @@ -150,7 +150,7 @@ PERF_TEST_P(Video, FGDStatModel, } else { -#ifdef HAVE_OPENCV_LEGACY +#ifdef HAVE_OPENCV_CUDALEGACY IplImage ipl_frame = frame; cv::Ptr model(cvCreateFGDStatModel(&ipl_frame)); diff --git a/modules/cudabgsegm/test/test_bgsegm.cpp b/modules/cudabgsegm/test/test_bgsegm.cpp index 75d6d73a3f..34f3dcc9ab 100644 --- a/modules/cudabgsegm/test/test_bgsegm.cpp +++ b/modules/cudabgsegm/test/test_bgsegm.cpp @@ -42,8 +42,8 @@ #include "test_precomp.hpp" -#ifdef HAVE_OPENCV_LEGACY -# include "opencv2/legacy.hpp" +#ifdef HAVE_OPENCV_CUDALEGACY +# include "opencv2/cudalegacy.hpp" #endif #ifdef HAVE_CUDA @@ -66,7 +66,7 @@ using namespace cvtest; ////////////////////////////////////////////////////// // FGDStatModel -#if BUILD_WITH_VIDEO_INPUT_SUPPORT && defined(HAVE_OPENCV_LEGACY) +#if BUILD_WITH_VIDEO_INPUT_SUPPORT && defined(HAVE_OPENCV_CUDALEGACY) namespace cv { From 68827072c903e4cee6c9bd683f71294521051c09 Mon Sep 17 00:00:00 2001 From: Adi Shavit Date: Mon, 20 Jan 2014 21:44:27 +0200 Subject: [PATCH 02/71] [HighGUI] On Windows: Support Ctrl+C to copy image to clipboard. --- modules/highgui/src/window_w32.cpp | 84 ++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index 8b317dff71..a7aca0ec47 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -1286,6 +1286,10 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) switch(uMsg) { + case WM_COPY: + ::WindowProc(hwnd, uMsg, wParam, lParam); // call highgui proc. There may be a better way to do this. + break; + case WM_DESTROY: icvRemoveWindow(window); @@ -1448,6 +1452,81 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM // Process the message switch(uMsg) { + case WM_COPY: + { + if (!::OpenClipboard(hwnd) ) + break; + + HDC hDC = 0; + HDC memDC = 0; + HBITMAP memBM = 0; + + // We'll use a do-while(0){} scope as a single-run breakable scope + // Upon any error we can jump out of the single-time while scope to clean up the resources. + do + { + if (!::EmptyClipboard()) + break; + + if(!window->image) + break; + + // Get window device context + if (0 == (hDC = ::GetDC(hwnd))) + break; + + // Create another DC compatible with hDC + if (0 == (memDC = ::CreateCompatibleDC( hDC ))) + break; + + // Determine the bitmap's dimensions + int nchannels = 3; + SIZE size = {0,0}; + icvGetBitmapData( window, &size, &nchannels, 0 ); + + // Create bitmap to draw on and it in the new DC + if (0 == (memBM = ::CreateCompatibleBitmap ( hDC, size.cx, size.cy))) + break; + + if (!::SelectObject( memDC, memBM )) + break; + + // Begin drawing to DC + if (!::SetStretchBltMode(memDC, COLORONCOLOR)) + break; + + RGBQUAD table[256]; + if( 1 == nchannels ) + { + for(int i = 0; i < 256; ++i) + { + table[i].rgbBlue = (unsigned char)i; + table[i].rgbGreen = (unsigned char)i; + table[i].rgbRed = (unsigned char)i; + } + if (!::SetDIBColorTable(window->dc, 0, 255, table)) + break; + } + + // The image copied to the clipboard will be in its original size, regardless if the window itself was resized. + + // Render the image to the dc/bitmap (at original size). + if (!::BitBlt( memDC, 0, 0, size.cx, size.cy, window->dc, 0, 0, SRCCOPY )) + break; + + // Finally, set bitmap to clipboard + ::SetClipboardData(CF_BITMAP, memBM); + } while (0,0); // (0,0) instead of (0) to avoid MSVC compiler warning C4127: "conditional expression is constant" + + ////////////////////////////////////////////////////////////////////////// + // if handle is allocated (i.e. != 0) then clean-up. + memBM && ::DeleteObject(memBM); + memDC && ::DeleteDC(memDC); + hDC && ::ReleaseDC(hwnd, hDC); + ::CloseClipboard(); + break; + } + case WM_WINDOWPOSCHANGING: { LPWINDOWPOS pos = (LPWINDOWPOS)lParam; @@ -1798,6 +1877,11 @@ cvWaitKey( int delay ) is_processed = 1; return (int)(message.wParam << 16); } + + // Intercept Ctrl+C for copy to clipboard + if ('C' == message.wParam && (::GetKeyState(VK_CONTROL)>>15)) + ::PostMessage(message.hwnd, WM_COPY, 0, 0); + default: DispatchMessage(&message); is_processed = 1; From d44e3c369d58579013d42c4ea573215385fe51d9 Mon Sep 17 00:00:00 2001 From: Adi Shavit Date: Mon, 20 Jan 2014 22:09:52 +0200 Subject: [PATCH 03/71] Added documentation. --- modules/highgui/doc/user_interface.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/highgui/doc/user_interface.rst b/modules/highgui/doc/user_interface.rst index 0d0ccde946..565ea6d449 100644 --- a/modules/highgui/doc/user_interface.rst +++ b/modules/highgui/doc/user_interface.rst @@ -83,6 +83,9 @@ If window was created with OpenGL support, ``imshow`` also support :ocv:class:`o .. note:: This function should be followed by ``waitKey`` function which displays the image for specified milliseconds. Otherwise, it won't display the image. For example, ``waitKey(0)`` will display the window infinitely until any keypress (it is suitable for image display). ``waitKey(25)`` will display a frame for 25 ms, after which display will be automatically closed. (If you put it in a loop to read videos, it will display the video frame-by-frame) +.. note:: + + [Windows Backend Only] Pressing Ctrl+C will copy the image to the clipboard. namedWindow --------------- From e25dca21d18c1a6af4cf87e697b5612dd34d0175 Mon Sep 17 00:00:00 2001 From: Adi Shavit Date: Mon, 27 Jan 2014 10:14:49 +0200 Subject: [PATCH 04/71] Fixed indentation. --- modules/highgui/src/window_w32.cpp | 84 +++++++++++++++--------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index a7aca0ec47..658bb4f481 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -1287,8 +1287,8 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) switch(uMsg) { case WM_COPY: - ::WindowProc(hwnd, uMsg, wParam, lParam); // call highgui proc. There may be a better way to do this. - break; + ::WindowProc(hwnd, uMsg, wParam, lParam); // call highgui proc. There may be a better way to do this. + break; case WM_DESTROY: @@ -1465,57 +1465,57 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM // Upon any error we can jump out of the single-time while scope to clean up the resources. do { - if (!::EmptyClipboard()) - break; + if (!::EmptyClipboard()) + break; - if(!window->image) - break; + if(!window->image) + break; - // Get window device context - if (0 == (hDC = ::GetDC(hwnd))) - break; + // Get window device context + if (0 == (hDC = ::GetDC(hwnd))) + break; - // Create another DC compatible with hDC - if (0 == (memDC = ::CreateCompatibleDC( hDC ))) - break; + // Create another DC compatible with hDC + if (0 == (memDC = ::CreateCompatibleDC( hDC ))) + break; - // Determine the bitmap's dimensions - int nchannels = 3; - SIZE size = {0,0}; - icvGetBitmapData( window, &size, &nchannels, 0 ); + // Determine the bitmap's dimensions + int nchannels = 3; + SIZE size = {0,0}; + icvGetBitmapData( window, &size, &nchannels, 0 ); - // Create bitmap to draw on and it in the new DC - if (0 == (memBM = ::CreateCompatibleBitmap ( hDC, size.cx, size.cy))) - break; + // Create bitmap to draw on and it in the new DC + if (0 == (memBM = ::CreateCompatibleBitmap ( hDC, size.cx, size.cy))) + break; - if (!::SelectObject( memDC, memBM )) - break; + if (!::SelectObject( memDC, memBM )) + break; - // Begin drawing to DC - if (!::SetStretchBltMode(memDC, COLORONCOLOR)) - break; + // Begin drawing to DC + if (!::SetStretchBltMode(memDC, COLORONCOLOR)) + break; - RGBQUAD table[256]; - if( 1 == nchannels ) - { - for(int i = 0; i < 256; ++i) - { - table[i].rgbBlue = (unsigned char)i; - table[i].rgbGreen = (unsigned char)i; - table[i].rgbRed = (unsigned char)i; - } - if (!::SetDIBColorTable(window->dc, 0, 255, table)) - break; - } + RGBQUAD table[256]; + if( 1 == nchannels ) + { + for(int i = 0; i < 256; ++i) + { + table[i].rgbBlue = (unsigned char)i; + table[i].rgbGreen = (unsigned char)i; + table[i].rgbRed = (unsigned char)i; + } + if (!::SetDIBColorTable(window->dc, 0, 255, table)) + break; + } - // The image copied to the clipboard will be in its original size, regardless if the window itself was resized. + // The image copied to the clipboard will be in its original size, regardless if the window itself was resized. - // Render the image to the dc/bitmap (at original size). - if (!::BitBlt( memDC, 0, 0, size.cx, size.cy, window->dc, 0, 0, SRCCOPY )) - break; + // Render the image to the dc/bitmap (at original size). + if (!::BitBlt( memDC, 0, 0, size.cx, size.cy, window->dc, 0, 0, SRCCOPY )) + break; - // Finally, set bitmap to clipboard - ::SetClipboardData(CF_BITMAP, memBM); + // Finally, set bitmap to clipboard + ::SetClipboardData(CF_BITMAP, memBM); } while (0,0); // (0,0) instead of (0) to avoid MSVC compiler warning C4127: "conditional expression is constant" ////////////////////////////////////////////////////////////////////////// From b449bd5150076535e984805860fbe23484bec840 Mon Sep 17 00:00:00 2001 From: Adi Shavit Date: Thu, 3 Jul 2014 22:45:11 +0300 Subject: [PATCH 05/71] Clarified code. --- modules/highgui/src/window_w32.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index 658bb4f481..bcf1bae8be 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -1520,9 +1520,9 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM ////////////////////////////////////////////////////////////////////////// // if handle is allocated (i.e. != 0) then clean-up. - memBM && ::DeleteObject(memBM); - memDC && ::DeleteDC(memDC); - hDC && ::ReleaseDC(hwnd, hDC); + if (memBM) ::DeleteObject(memBM); + if (memDC) ::DeleteDC(memDC); + if (hDC) ::ReleaseDC(hwnd, hDC); ::CloseClipboard(); break; } From 7a77cf584a64a5a929c1928bc2da3e442d2be083 Mon Sep 17 00:00:00 2001 From: Petr Glotov Date: Fri, 16 May 2014 22:52:07 -0700 Subject: [PATCH 06/71] added timelapse --- .../opencv2/stitching/detail/matchers.hpp | 14 +++ .../opencv2/stitching/detail/timelapsers.hpp | 86 ++++++++++++++ .../include/opencv2/stitching/detail/util.hpp | 1 + modules/stitching/src/matchers.cpp | 34 ++++++ modules/stitching/src/precomp.hpp | 1 + modules/stitching/src/timelapsers.cpp | 107 ++++++++++++++++++ modules/stitching/src/util.cpp | 15 +++ samples/cpp/stitching_detailed.cpp | 74 ++++++++++-- 8 files changed, 322 insertions(+), 10 deletions(-) create mode 100644 modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp create mode 100644 modules/stitching/src/timelapsers.cpp diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp index 8c3284a466..d87a1fff44 100644 --- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp @@ -183,6 +183,20 @@ protected: Ptr impl_; }; +class CV_EXPORTS BestOf2NearestRangeMatcher : public BestOf2NearestMatcher +{ +public: + BestOf2NearestRangeMatcher(int range_width = 5, bool try_use_gpu = false, float match_conf = 0.3f, + int num_matches_thresh1 = 6, int num_matches_thresh2 = 6); + + void operator ()(const std::vector &features, std::vector &pairwise_matches, + const cv::UMat &mask = cv::UMat()); + + +protected: + int range_width_; +}; + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp b/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp new file mode 100644 index 0000000000..f881a9b934 --- /dev/null +++ b/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp @@ -0,0 +1,86 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef __OPENCV_STITCHING_TIMELAPSERS_HPP__ +#define __OPENCV_STITCHING_TIMELAPSERS_HPP__ + +#include "opencv2/core.hpp" + +namespace cv { +namespace detail { + +// Base Timelapser class, takes a sequence of images, applies appropriate shift, stores result in dst_. + +class CV_EXPORTS Timelapser +{ +public: + + enum {AS_IS, CROP}; + + virtual ~Timelapser() {} + + static Ptr createDefault(int type); + + virtual void initialize(const std::vector &corners, const std::vector &sizes); + virtual void process(InputArray img, InputArray mask, Point tl); + virtual const UMat& getDst() {return dst_;} + +protected: + + virtual bool test_point(Point pt); + + UMat dst_; + Rect dst_roi_; +}; + + +class CV_EXPORTS TimelapserCrop : public Timelapser +{ +public: + virtual void initialize(const std::vector &corners, const std::vector &sizes); +}; + +} // namespace detail +} // namespace cv + +#endif // __OPENCV_STITCHING_TIMELAPSERS_HPP__ diff --git a/modules/stitching/include/opencv2/stitching/detail/util.hpp b/modules/stitching/include/opencv2/stitching/detail/util.hpp index 6b1c5f34f3..23c413d5c2 100644 --- a/modules/stitching/include/opencv2/stitching/detail/util.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/util.hpp @@ -148,6 +148,7 @@ private: CV_EXPORTS bool overlapRoi(Point tl1, Point tl2, Size sz1, Size sz2, Rect &roi); CV_EXPORTS Rect resultRoi(const std::vector &corners, const std::vector &images); CV_EXPORTS Rect resultRoi(const std::vector &corners, const std::vector &sizes); +CV_EXPORTS Rect resultRoiIntersection(const std::vector &corners, const std::vector &sizes); CV_EXPORTS Point resultTl(const std::vector &corners); // Returns random 'count' element subset of the {0,1,...,size-1} set diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index c303c4aaba..a512e35254 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -646,5 +646,39 @@ void BestOf2NearestMatcher::collectGarbage() impl_->collectGarbage(); } + +BestOf2NearestRangeMatcher::BestOf2NearestRangeMatcher(int range_width, bool try_use_gpu, float match_conf, int num_matches_thresh1, int num_matches_thresh2): BestOf2NearestMatcher(try_use_gpu, match_conf, num_matches_thresh1, num_matches_thresh2) +{ + range_width_ = range_width; +} + + +void BestOf2NearestRangeMatcher::operator ()(const std::vector &features, std::vector &pairwise_matches, + const UMat &mask) +{ + const int num_images = static_cast(features.size()); + + CV_Assert(mask.empty() || (mask.type() == CV_8U && mask.cols == num_images && mask.rows)); + Mat_ mask_(mask.getMat(ACCESS_READ)); + if (mask_.empty()) + mask_ = Mat::ones(num_images, num_images, CV_8U); + + std::vector > near_pairs; + for (int i = 0; i < num_images - 1; ++i) + for (int j = i + 1; j < std::min(num_images, i + range_width_); ++j) + if (features[i].keypoints.size() > 0 && features[j].keypoints.size() > 0 && mask_(i, j)) + near_pairs.push_back(std::make_pair(i, j)); + + pairwise_matches.resize(num_images * num_images); + MatchPairsBody body(*this, features, pairwise_matches, near_pairs); + + if (is_thread_safe_) + parallel_for_(Range(0, static_cast(near_pairs.size())), body); + else + body(Range(0, static_cast(near_pairs.size()))); + LOGLN_CHAT(""); +} + + } // namespace detail } // namespace cv diff --git a/modules/stitching/src/precomp.hpp b/modules/stitching/src/precomp.hpp index 18ce413094..759d03635d 100644 --- a/modules/stitching/src/precomp.hpp +++ b/modules/stitching/src/precomp.hpp @@ -59,6 +59,7 @@ #include "opencv2/stitching.hpp" #include "opencv2/stitching/detail/autocalib.hpp" #include "opencv2/stitching/detail/blenders.hpp" +#include "opencv2/stitching/detail/timelapsers.hpp" #include "opencv2/stitching/detail/camera.hpp" #include "opencv2/stitching/detail/exposure_compensate.hpp" #include "opencv2/stitching/detail/matchers.hpp" diff --git a/modules/stitching/src/timelapsers.cpp b/modules/stitching/src/timelapsers.cpp new file mode 100644 index 0000000000..d78ad86a57 --- /dev/null +++ b/modules/stitching/src/timelapsers.cpp @@ -0,0 +1,107 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencl_kernels.hpp" + +namespace cv { +namespace detail { + +Ptr Timelapser::createDefault(int type) +{ + if (type == AS_IS) + return makePtr(); + if (type == CROP) + return makePtr(); + CV_Error(Error::StsBadArg, "unsupported timelapsing method"); + return Ptr(); +} + + +void Timelapser::initialize(const std::vector &corners, const std::vector &sizes) +{ + dst_roi_ = resultRoi(corners, sizes); + dst_.create(dst_roi_.size(), CV_16SC3); +} + +void Timelapser::process(InputArray _img, InputArray /*_mask*/, Point tl) +{ + dst_.setTo(Scalar::all(0)); + + Mat img = _img.getMat(); + Mat dst = dst_.getMat(ACCESS_RW); + + CV_Assert(img.type() == CV_16SC3); + int dx = tl.x - dst_roi_.x; + int dy = tl.y - dst_roi_.y; + + for (int y = 0; y < img.rows; ++y) + { + const Point3_ *src_row = img.ptr >(y); + + for (int x = 0; x < img.cols; ++x) + { + if (test_point(Point(tl.x + x, tl.y + y))) + { + Point3_ *dst_row = dst.ptr >(dy + y); + dst_row[dx + x] = src_row[x]; + } + } + } +} + + +bool Timelapser::test_point(Point pt) +{ + return dst_roi_.contains(pt); +} + + +void TimelapserCrop::initialize(const std::vector &corners, const std::vector &sizes) +{ + dst_roi_ = resultRoiIntersection(corners, sizes); + dst_.create(dst_roi_.size(), CV_16SC3); +} + + +} // namespace detail +} // namespace cv diff --git a/modules/stitching/src/util.cpp b/modules/stitching/src/util.cpp index 5e026f0989..ce36d9df3e 100644 --- a/modules/stitching/src/util.cpp +++ b/modules/stitching/src/util.cpp @@ -137,6 +137,21 @@ Rect resultRoi(const std::vector &corners, const std::vector &sizes return Rect(tl, br); } +Rect resultRoiIntersection(const std::vector &corners, const std::vector &sizes) +{ + CV_Assert(sizes.size() == corners.size()); + Point tl(std::numeric_limits::min(), std::numeric_limits::min()); + Point br(std::numeric_limits::max(), std::numeric_limits::max()); + for (size_t i = 0; i < corners.size(); ++i) + { + tl.x = std::max(tl.x, corners[i].x); + tl.y = std::max(tl.y, corners[i].y); + br.x = std::min(br.x, corners[i].x + sizes[i].width); + br.y = std::min(br.y, corners[i].y + sizes[i].height); + } + return Rect(tl, br); +} + Point resultTl(const std::vector &corners) { diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp index df0a9abc84..bc66694cc7 100644 --- a/samples/cpp/stitching_detailed.cpp +++ b/samples/cpp/stitching_detailed.cpp @@ -49,6 +49,7 @@ #include "opencv2/highgui.hpp" #include "opencv2/stitching/detail/autocalib.hpp" #include "opencv2/stitching/detail/blenders.hpp" +#include "opencv2/stitching/detail/timelapsers.hpp" #include "opencv2/stitching/detail/camera.hpp" #include "opencv2/stitching/detail/exposure_compensate.hpp" #include "opencv2/stitching/detail/matchers.hpp" @@ -116,7 +117,9 @@ static void printUsage() " --blend_strength \n" " Blending strength from [0,100] range. The default is 5.\n" " --output \n" - " The default is 'result.jpg'.\n"; + " The default is 'result.jpg'.\n" + " --timelapse (as_is|crop) (range_width)\n" + " Output warped images separately as frames of a time lapse movie, with 'fixed_' prepended to input file names.\n"; } @@ -140,8 +143,12 @@ int expos_comp_type = ExposureCompensator::GAIN_BLOCKS; float match_conf = 0.3f; string seam_find_type = "gc_color"; int blend_type = Blender::MULTI_BAND; +int timelapse_type = Timelapser::AS_IS; float blend_strength = 5; string result_name = "result.jpg"; +bool timelapse = false; +int timelapse_range = 5; + static int parseCmdArgs(int argc, char** argv) { @@ -304,6 +311,24 @@ static int parseCmdArgs(int argc, char** argv) } i++; } + else if (string(argv[i]) == "--timelapse") + { + timelapse = true; + + if (string(argv[i + 1]) == "as_is") + timelapse_type = Timelapser::AS_IS; + else if (string(argv[i + 1]) == "crop") + timelapse_type = Timelapser::CROP; + else + { + cout << "Bad timelapse method\n"; + return -1; + } + i++; + + timelapse_range = atoi(argv[i + 1]); + i++; + } else if (string(argv[i]) == "--blend_strength") { blend_strength = static_cast(atof(argv[i + 1])); @@ -432,9 +457,19 @@ int main(int argc, char* argv[]) t = getTickCount(); #endif vector pairwise_matches; - BestOf2NearestMatcher matcher(try_cuda, match_conf); - matcher(features, pairwise_matches); - matcher.collectGarbage(); + if (!timelapse) + { + BestOf2NearestMatcher matcher(try_cuda, match_conf); + matcher(features, pairwise_matches); + matcher.collectGarbage(); + } + else + { + BestOf2NearestRangeMatcher matcher(timelapse_range, try_cuda, match_conf); + matcher(features, pairwise_matches); + matcher.collectGarbage(); + } + LOGLN("Pairwise matching, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); // Check if we should save matches graph @@ -679,6 +714,7 @@ int main(int argc, char* argv[]) Mat img_warped, img_warped_s; Mat dilated_mask, seam_mask, mask, mask_warped; Ptr blender; + Ptr timelapser; //double compose_seam_aspect = 1; double compose_work_aspect = 1; @@ -755,7 +791,7 @@ int main(int argc, char* argv[]) resize(dilated_mask, seam_mask, mask_warped.size()); mask_warped = seam_mask & mask_warped; - if (!blender) + if (!blender && !timelapse) { blender = Blender::createDefault(blend_type, try_cuda); Size dst_sz = resultRoi(corners, sizes).size(); @@ -776,17 +812,35 @@ int main(int argc, char* argv[]) } blender->prepare(corners, sizes); } + else if (!timelapser) + { + CV_Assert(timelapse); + timelapser = Timelapser::createDefault(timelapse_type); + timelapser->initialize(corners, sizes); + } // Blend the current image - blender->feed(img_warped_s, mask_warped, corners[img_idx]); + if (timelapse) + { + timelapser->process(img_warped_s, Mat::ones(img_warped_s.size(), CV_8UC1), corners[img_idx]); + + imwrite("fixed_" + img_names[img_idx], timelapser->getDst()); + } + else + { + blender->feed(img_warped_s, mask_warped, corners[img_idx]); + } } - Mat result, result_mask; - blender->blend(result, result_mask); + if (!timelapse) + { + Mat result, result_mask; + blender->blend(result, result_mask); - LOGLN("Compositing, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); + LOGLN("Compositing, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); - imwrite(result_name, result); + imwrite(result_name, result); + } LOGLN("Finished, total time: " << ((getTickCount() - app_start_time) / getTickFrequency()) << " sec"); return 0; From d848704b35318c2498b4950ddd82e341c1dd28ca Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 23 Jun 2014 13:55:09 -0700 Subject: [PATCH 07/71] cuda::DisparityBilateralFilter no longer uses constant memory for parameters Now multiple filters can be used in the same context without stepping on each other. --- .../src/cuda/disparity_bilateral_filter.cu | 44 +++++-------------- .../src/disparity_bilateral_filter.cpp | 8 ++-- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu b/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu index b5de989ae7..5b16f8c108 100644 --- a/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu +++ b/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu @@ -49,30 +49,6 @@ namespace cv { namespace cuda { namespace device { namespace disp_bilateral_filter { - __constant__ float* ctable_color; - __constant__ float* ctable_space; - __constant__ size_t ctable_space_step; - - __constant__ int cndisp; - __constant__ int cradius; - - __constant__ short cedge_disc; - __constant__ short cmax_disc; - - void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc) - { - cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) ); - cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) ); - size_t table_space_step = table_space.step / sizeof(float); - cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) ); - - cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) ); - - cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) ); - } - template struct DistRgbMax { @@ -95,7 +71,11 @@ namespace cv { namespace cuda { namespace device }; template - __global__ void disp_bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w) + __global__ void disp_bilateral_filter(int t, T* disp, size_t disp_step, + const uchar* img, size_t img_step, int h, int w, + const float* ctable_color, const float * ctable_space, size_t ctable_space_step, + int cradius, + short cedge_disc, short cmax_disc) { const int y = blockIdx.y * blockDim.y + threadIdx.y; const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1); @@ -178,7 +158,7 @@ namespace cv { namespace cuda { namespace device } template - void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream) + void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *table_color, const float* table_space, size_t table_step, int radius, short edge_disc, short max_disc, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -190,20 +170,20 @@ namespace cv { namespace cuda { namespace device case 1: for (int i = 0; i < iters; ++i) { - disp_bilateral_filter<1><<>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); + disp_bilateral_filter<1><<>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, table_space, table_step, radius, edge_disc, max_disc); cudaSafeCall( cudaGetLastError() ); - disp_bilateral_filter<1><<>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); + disp_bilateral_filter<1><<>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, table_space, table_step, radius, edge_disc, max_disc); cudaSafeCall( cudaGetLastError() ); } break; case 3: for (int i = 0; i < iters; ++i) { - disp_bilateral_filter<3><<>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); + disp_bilateral_filter<3><<>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, table_space, table_step, radius, edge_disc, max_disc); cudaSafeCall( cudaGetLastError() ); - disp_bilateral_filter<3><<>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); + disp_bilateral_filter<3><<>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, table_space, table_step, radius, edge_disc, max_disc); cudaSafeCall( cudaGetLastError() ); } break; @@ -215,8 +195,8 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); - template void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); + template void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, short, short, cudaStream_t stream); + template void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, short, short, cudaStream_t stream); } // namespace bilateral_filter }}} // namespace cv { namespace cuda { namespace cudev diff --git a/modules/cudastereo/src/disparity_bilateral_filter.cpp b/modules/cudastereo/src/disparity_bilateral_filter.cpp index 75cbce48a9..746d5d49cd 100644 --- a/modules/cudastereo/src/disparity_bilateral_filter.cpp +++ b/modules/cudastereo/src/disparity_bilateral_filter.cpp @@ -55,10 +55,8 @@ namespace cv { namespace cuda { namespace device { namespace disp_bilateral_filter { - void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc); - template - void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); + void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, short edge_disc, short max_disc, cudaStream_t stream); } }}} @@ -165,7 +163,7 @@ namespace const short edge_disc = std::max(short(1), short(ndisp * edge_threshold + 0.5)); const short max_disc = short(ndisp * max_disc_threshold + 0.5); - disp_load_constants(table_color.ptr(), table_space, ndisp, radius, edge_disc, max_disc); + size_t table_space_step = table_space.step / sizeof(float); _dst.create(disp.size(), disp.type()); GpuMat dst = _dst.getGpuMat(); @@ -173,7 +171,7 @@ namespace if (dst.data != disp.data) disp.copyTo(dst, stream); - disp_bilateral_filter(dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); + disp_bilateral_filter(dst, img, img.channels(), iters, table_color.ptr(), (float *)table_space.data, table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream)); } void DispBilateralFilterImpl::apply(InputArray _disp, InputArray _image, OutputArray dst, Stream& stream) From 1533d0448f130cbec92c9bba0e200f9864824d2f Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Tue, 24 Jun 2014 11:09:46 -0700 Subject: [PATCH 08/71] cdata_weight always positive; fewer multiplications. --- modules/cudastereo/src/cuda/stereocsbp.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index b1426607dd..b10007e003 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -103,7 +103,7 @@ namespace cv { namespace cuda { namespace device { static __device__ __forceinline__ float compute(const uchar* left, const uchar* right) { - return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term); + return cdata_weight * fmin( ::abs((int)*left - *right), cmax_data_term); } }; template <> struct DataCostPerPixel<3> @@ -114,7 +114,7 @@ namespace cv { namespace cuda { namespace device float tg = 0.587f * ::abs((int)left[1] - right[1]); float tr = 0.299f * ::abs((int)left[2] - right[2]); - return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); + return cdata_weight * fmin(tr + tg + tb, cmax_data_term); } }; template <> struct DataCostPerPixel<4> @@ -128,7 +128,7 @@ namespace cv { namespace cuda { namespace device float tg = 0.587f * ::abs((int)l.y - r.y); float tr = 0.299f * ::abs((int)l.z - r.z); - return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); + return cdata_weight * fmin(tr + tg + tb, cmax_data_term); } }; From e532bd50d5f631cb27585176087096461012073d Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Thu, 26 Jun 2014 12:06:25 -0700 Subject: [PATCH 09/71] Move shared interface to header file rather than repeating. --- .../cudastereo/src/cuda/disparity_bilateral_filter.cu | 2 ++ .../cudastereo/src/cuda/disparity_bilateral_filter.hpp | 8 ++++++++ modules/cudastereo/src/disparity_bilateral_filter.cpp | 9 +-------- 3 files changed, 11 insertions(+), 8 deletions(-) create mode 100644 modules/cudastereo/src/cuda/disparity_bilateral_filter.hpp diff --git a/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu b/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu index 5b16f8c108..a9f2d2650c 100644 --- a/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu +++ b/modules/cudastereo/src/cuda/disparity_bilateral_filter.cu @@ -45,6 +45,8 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/limits.hpp" +#include "cuda/disparity_bilateral_filter.hpp" + namespace cv { namespace cuda { namespace device { namespace disp_bilateral_filter diff --git a/modules/cudastereo/src/cuda/disparity_bilateral_filter.hpp b/modules/cudastereo/src/cuda/disparity_bilateral_filter.hpp new file mode 100644 index 0000000000..95be834573 --- /dev/null +++ b/modules/cudastereo/src/cuda/disparity_bilateral_filter.hpp @@ -0,0 +1,8 @@ +namespace cv { namespace cuda { namespace device +{ + namespace disp_bilateral_filter + { + template + void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, short edge_disc, short max_disc, cudaStream_t stream); + } +}}} diff --git a/modules/cudastereo/src/disparity_bilateral_filter.cpp b/modules/cudastereo/src/disparity_bilateral_filter.cpp index 746d5d49cd..c59e3b2cb4 100644 --- a/modules/cudastereo/src/disparity_bilateral_filter.cpp +++ b/modules/cudastereo/src/disparity_bilateral_filter.cpp @@ -51,14 +51,7 @@ Ptr cv::cuda::createDisparityBilateralFilter(int #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace cuda { namespace device -{ - namespace disp_bilateral_filter - { - template - void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, short edge_disc, short max_disc, cudaStream_t stream); - } -}}} +#include "cuda/disparity_bilateral_filter.hpp" namespace { From fe29ed461cb61f9e0105ac18a2ef8aa727156ca4 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Thu, 26 Jun 2014 14:06:45 -0700 Subject: [PATCH 10/71] Move shared interface to header file rather than repeating. --- modules/cudastereo/src/cuda/stereocsbp.cu | 2 ++ modules/cudastereo/src/cuda/stereocsbp.hpp | 31 +++++++++++++++++++++ modules/cudastereo/src/stereocsbp.cpp | 32 +--------------------- 3 files changed, 34 insertions(+), 31 deletions(-) create mode 100644 modules/cudastereo/src/cuda/stereocsbp.hpp diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index b10007e003..dc3dbcce9e 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -48,6 +48,8 @@ #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/functional.hpp" +#include "cuda/stereocsbp.hpp" + namespace cv { namespace cuda { namespace device { namespace stereocsbp diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp new file mode 100644 index 0000000000..9b90732ffb --- /dev/null +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -0,0 +1,31 @@ +namespace cv { namespace cuda { namespace device +{ + namespace stereocsbp + { + void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, + const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp); + + template + void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + + template + void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step, + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + + template + void init_message(T* u_new, T* d_new, T* l_new, T* r_new, + const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, + T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, + T* data_cost_selected, const T* data_cost, size_t msg_step, + int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); + + template + void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); + + template + void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, + const PtrStepSz& disp, int nr_plane, cudaStream_t stream); + } +}}} diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 474562baf2..2d25bee37d 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -53,37 +53,7 @@ Ptr cv::cuda::createStereoConstantSpaceBP(int, int, #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace cuda { namespace device -{ - namespace stereocsbp - { - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, - const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp); - - template - void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - - template - void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - - template - void init_message(T* u_new, T* d_new, T* l_new, T* r_new, - const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, - T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, - T* data_cost_selected, const T* data_cost, size_t msg_step, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); - - template - void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); - - template - void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, - const PtrStepSz& disp, int nr_plane, cudaStream_t stream); - } -}}} +#include "cuda/stereocsbp.hpp" namespace { From 2982e77495356985e4dc8c4e453f2e2b399a1388 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Thu, 26 Jun 2014 15:31:21 -0700 Subject: [PATCH 11/71] Pass in images and scratch space so that multiple copies can run concurrently. --- modules/cudastereo/src/cuda/stereocsbp.cu | 100 ++++++++++----------- modules/cudastereo/src/cuda/stereocsbp.hpp | 10 +-- modules/cudastereo/src/stereocsbp.cpp | 20 +++-- 3 files changed, 62 insertions(+), 68 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index dc3dbcce9e..a8c70c92de 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -72,13 +72,9 @@ namespace cv { namespace cuda { namespace device __constant__ size_t cdisp_step1; __constant__ size_t cdisp_step2; - __constant__ uchar* cleft; - __constant__ uchar* cright; - __constant__ uchar* ctemp; - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, - const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp) + size_t leftstep) { cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); @@ -89,11 +85,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) ); - - cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) ); - cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) ); - cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) ); + cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &leftstep, sizeof(size_t)) ); } /////////////////////////////////////////////////////////////// @@ -135,7 +127,7 @@ namespace cv { namespace cuda { namespace device }; template - __global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -169,7 +161,7 @@ namespace cv { namespace cuda { namespace device template - __global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -225,7 +217,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(int h, int w, int level) + __global__ void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int h, int w, int level) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -265,7 +257,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(int level, int rows, int cols, int h) + __global__ void init_data_cost_reduce(uchar *cleft, uchar *cright, uchar *ctemp, int level, int rows, int cols, int h) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -313,7 +305,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) + void init_data_cost_caller_(uchar *cleft, uchar *cright, uchar *ctemp, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -323,15 +315,15 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(h, w, level); break; - case 3: init_data_cost<<>>(h, w, level); break; - case 4: init_data_cost<<>>(h, w, level); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) + void init_data_cost_reduce_caller_(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -342,19 +334,19 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(level, rows, cols, h); break; - case 3: init_data_cost_reduce<<>>(level, rows, cols, h); break; - case 4: init_data_cost_reduce<<>>(level, rows, cols, h); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); + typedef void (*InitDataCostCaller)(uchar *cleft, uchar *cright, uchar *ctemp, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -367,7 +359,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream); + init_data_cost_callers[level](cleft, cright, ctemp, rows, cols, h, w, level, ndisp, channels, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -380,9 +372,9 @@ namespace cv { namespace cuda { namespace device grid.y = divUp(h, threads.y); if (use_local_init_data_cost == true) - get_first_k_initial_local<<>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); else - get_first_k_initial_global<<>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); cudaSafeCall( cudaGetLastError() ); @@ -390,10 +382,10 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, + template void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, + template void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// @@ -401,7 +393,7 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) + __global__ void compute_data_cost(uchar *cleft, uchar *cright, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -444,7 +436,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) + __global__ void compute_data_cost_reduce(uchar *cleft, uchar *cright, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -494,7 +486,7 @@ namespace cv { namespace cuda { namespace device } template - void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, + void compute_data_cost_caller_(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); @@ -505,15 +497,15 @@ namespace cv { namespace cuda { namespace device switch(channels) { - case 1: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 3: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 4: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 1: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 3: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 4: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost, int rows, int cols, + void compute_data_cost_reduce_caller_(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { const int threadsNum = 256; @@ -525,18 +517,18 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 3: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 4: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 1: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 3: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 4: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) { - typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols, + typedef void (*ComputeDataCostCaller)(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream); static const ComputeDataCostCaller callers[] = @@ -552,17 +544,17 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); + callers[level](cleft, cright, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step, + template void compute_data_cost(uchar *cleft, uchar *cright, const short* disp_selected_pyr, short* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step, + template void compute_data_cost(uchar *cleft, uchar *cright, const float* disp_selected_pyr, float* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); @@ -605,7 +597,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_, + __global__ void init_message(uchar *ctemp, T* u_new_, T* d_new_, T* l_new_, T* r_new_, const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_, T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, T* data_cost_selected_, const T* data_cost_, @@ -655,7 +647,7 @@ namespace cv { namespace cuda { namespace device template - void init_message(T* u_new, T* d_new, T* l_new, T* r_new, + void init_message(uchar *ctemp, T* u_new, T* d_new, T* l_new, T* r_new, const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, T* data_cost_selected, const T* data_cost, size_t msg_step, @@ -674,7 +666,7 @@ namespace cv { namespace cuda { namespace device grid.x = divUp(w, threads.x); grid.y = divUp(h, threads.y); - init_message<<>>(u_new, d_new, l_new, r_new, + init_message<<>>(ctemp, u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur, selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, @@ -686,13 +678,13 @@ namespace cv { namespace cuda { namespace device } - template void init_message(short* u_new, short* d_new, short* l_new, short* r_new, + template void init_message(uchar *ctemp, short* u_new, short* d_new, short* l_new, short* r_new, const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur, short* selected_disp_pyr_new, const short* selected_disp_pyr_cur, short* data_cost_selected, const short* data_cost, size_t msg_step, int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); - template void init_message(float* u_new, float* d_new, float* l_new, float* r_new, + template void init_message(uchar *ctemp, float* u_new, float* d_new, float* l_new, float* r_new, const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur, float* selected_disp_pyr_new, const float* selected_disp_pyr_cur, float* data_cost_selected, const float* data_cost, size_t msg_step, @@ -738,7 +730,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i) + __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1); @@ -765,7 +757,7 @@ namespace cv { namespace cuda { namespace device template - void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, + void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) { size_t disp_step = msg_step * h; @@ -780,17 +772,17 @@ namespace cv { namespace cuda { namespace device for(int t = 0; t < iters; ++t) { - compute_message<<>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1); + compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1); cudaSafeCall( cudaGetLastError() ); } if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }; - template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, + template void calc_all_iterations(uchar *ctemp, short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); - template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, + template void calc_all_iterations(uchar *ctemp, float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 9b90732ffb..ff5e0e1f0c 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -3,25 +3,25 @@ namespace cv { namespace cuda { namespace device namespace stereocsbp { void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, - const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp); + size_t leftstep); template - void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(uchar *left, uchar *right, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); template - void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(uchar *left, uchar *right, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); template - void init_message(T* u_new, T* d_new, T* l_new, T* r_new, + void init_message(uchar *ctemp, T* u_new, T* d_new, T* l_new, T* r_new, const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, T* data_cost_selected, const T* data_cost, size_t msg_step, int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); template - void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, + void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); template diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 2d25bee37d..55afd8c6b9 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_, left, right, temp_); + load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_, left.step); l[0].setTo(0, _stream); d[0].setTo(0, _stream); @@ -245,17 +245,18 @@ namespace { if (i == levels_ - 1) { - init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); } else { - compute_data_cost(disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + compute_data_cost(left.ptr(), right.ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); int new_idx = (cur_idx + 1) & 1; - init_message(u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), + init_message(temp_.ptr(), + u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), disp_selected_pyr[new_idx].ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), data_cost.ptr(), elem_step, rows_pyr[i], @@ -264,7 +265,7 @@ namespace cur_idx = new_idx; } - calc_all_iterations(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); } @@ -275,17 +276,18 @@ namespace { if (i == levels_ - 1) { - init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); } else { - compute_data_cost(disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + compute_data_cost(left.ptr(), right.ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); int new_idx = (cur_idx + 1) & 1; - init_message(u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), + init_message(temp_.ptr(), + u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), disp_selected_pyr[new_idx].ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), data_cost.ptr(), elem_step, rows_pyr[i], @@ -294,7 +296,7 @@ namespace cur_idx = new_idx; } - calc_all_iterations(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); } From d8d946a45838ab744f2f406d932f9f5ab1635d06 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Thu, 26 Jun 2014 15:58:05 -0700 Subject: [PATCH 12/71] Constify cuda csbp --- modules/cudastereo/src/cuda/stereocsbp.cu | 32 +++++++++++----------- modules/cudastereo/src/cuda/stereocsbp.hpp | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index a8c70c92de..fa17742349 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -217,7 +217,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int h, int w, int level) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int h, int w, int level) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -257,7 +257,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(uchar *cleft, uchar *cright, uchar *ctemp, int level, int rows, int cols, int h) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, int level, int rows, int cols, int h) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -305,7 +305,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(uchar *cleft, uchar *cright, uchar *ctemp, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -323,7 +323,7 @@ namespace cv { namespace cuda { namespace device } template - void init_data_cost_reduce_caller_(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) + void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -342,11 +342,11 @@ namespace cv { namespace cuda { namespace device } template - void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(uchar *cleft, uchar *cright, uchar *ctemp, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); + typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -382,10 +382,10 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - template void init_data_cost(uchar *cleft, uchar *cright, uchar *ctemp, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// @@ -393,7 +393,7 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(uchar *cleft, uchar *cright, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) + __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -436,7 +436,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_data_cost_reduce(uchar *cleft, uchar *cright, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) + __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -486,7 +486,7 @@ namespace cv { namespace cuda { namespace device } template - void compute_data_cost_caller_(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, + void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); @@ -505,7 +505,7 @@ namespace cv { namespace cuda { namespace device } template - void compute_data_cost_reduce_caller_(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, + void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { const int threadsNum = 256; @@ -525,10 +525,10 @@ namespace cv { namespace cuda { namespace device } template - void compute_data_cost(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) { - typedef void (*ComputeDataCostCaller)(uchar *cleft, uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, + typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream); static const ComputeDataCostCaller callers[] = @@ -551,10 +551,10 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void compute_data_cost(uchar *cleft, uchar *cright, const short* disp_selected_pyr, short* data_cost, size_t msg_step, + template void compute_data_cost(const uchar *cleft, const uchar *cright, const short* disp_selected_pyr, short* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - template void compute_data_cost(uchar *cleft, uchar *cright, const float* disp_selected_pyr, float* data_cost, size_t msg_step, + template void compute_data_cost(const uchar *cleft, const uchar *cright, const float* disp_selected_pyr, float* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index ff5e0e1f0c..72b5ab04c7 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -6,11 +6,11 @@ namespace cv { namespace cuda { namespace device size_t leftstep); template - void init_data_cost(uchar *left, uchar *right, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); template - void compute_data_cost(uchar *left, uchar *right, const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(const uchar *left, const uchar *right, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); template From 2832cfdfe581c8a18c695d6737d8ab92f7f6f846 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Fri, 27 Jun 2014 13:38:21 -0700 Subject: [PATCH 13/71] No longer use constant memory for image step. --- modules/cudastereo/src/cuda/stereocsbp.cu | 66 ++++++++++------------ modules/cudastereo/src/cuda/stereocsbp.hpp | 7 +-- modules/cudastereo/src/stereocsbp.cpp | 10 ++-- 3 files changed, 39 insertions(+), 44 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index fa17742349..d0097f3bee 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -67,14 +67,12 @@ namespace cv { namespace cuda { namespace device __constant__ int cth; - __constant__ size_t cimg_step; __constant__ size_t cmsg_step; __constant__ size_t cdisp_step1; __constant__ size_t cdisp_step2; - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, - size_t leftstep) + void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) { cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); @@ -84,8 +82,6 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); - - cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &leftstep, sizeof(size_t)) ); } /////////////////////////////////////////////////////////////// @@ -217,7 +213,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int h, int w, int level) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -257,7 +253,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, int level, int rows, int cols, int h) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -305,7 +301,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -315,15 +311,15 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, h, w, level); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) + void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -334,19 +330,19 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, level, rows, cols, h); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); + typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -359,7 +355,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - init_data_cost_callers[level](cleft, cright, ctemp, rows, cols, h, w, level, ndisp, channels, stream); + init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -382,10 +378,10 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// @@ -393,7 +389,7 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) + __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -436,7 +432,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) + __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -486,7 +482,7 @@ namespace cv { namespace cuda { namespace device } template - void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, + void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); @@ -497,15 +493,15 @@ namespace cv { namespace cuda { namespace device switch(channels) { - case 1: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 3: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 4: compute_data_cost<<>>(cleft, cright, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, + void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) { const int threadsNum = 256; @@ -517,18 +513,18 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 3: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 4: compute_data_cost_reduce<<>>(cleft, cright, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void compute_data_cost(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) { - typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, const T* disp_selected_pyr, T* data_cost, int rows, int cols, + typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, int h, int w, int level, int nr_plane, int channels, cudaStream_t stream); static const ComputeDataCostCaller callers[] = @@ -544,17 +540,17 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - callers[level](cleft, cright, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); + callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void compute_data_cost(const uchar *cleft, const uchar *cright, const short* disp_selected_pyr, short* data_cost, size_t msg_step, + template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const short* disp_selected_pyr, short* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - template void compute_data_cost(const uchar *cleft, const uchar *cright, const float* disp_selected_pyr, float* data_cost, size_t msg_step, + template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const float* disp_selected_pyr, float* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 72b5ab04c7..0854a92a88 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,15 +2,14 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, - size_t leftstep); + void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); template - void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); template - void compute_data_cost(const uchar *left, const uchar *right, const T* disp_selected_pyr, T* data_cost, size_t msg_step, + void compute_data_cost(const uchar *left, const uchar *right, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); template diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 55afd8c6b9..be4d8f1ee0 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_, left.step); + load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); l[0].setTo(0, _stream); d[0].setTo(0, _stream); @@ -245,12 +245,12 @@ namespace { if (i == levels_ - 1) { - init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); } else { - compute_data_cost(left.ptr(), right.ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); int new_idx = (cur_idx + 1) & 1; @@ -276,12 +276,12 @@ namespace { if (i == levels_ - 1) { - init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); } else { - compute_data_cost(left.ptr(), right.ptr(), disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); int new_idx = (cur_idx + 1) & 1; From 0e2ea45c93bde7e92a61736654aaff9cc7318e5b Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 08:46:14 -0700 Subject: [PATCH 14/71] ndisp no longer constant --- modules/cudastereo/src/cuda/stereocsbp.cu | 42 ++++++++++------------ modules/cudastereo/src/cuda/stereocsbp.hpp | 2 +- modules/cudastereo/src/stereocsbp.cpp | 2 +- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index d0097f3bee..4c3bde337f 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -58,8 +58,6 @@ namespace cv { namespace cuda { namespace device /////////////////////// load constants //////////////////////// /////////////////////////////////////////////////////////////// - __constant__ int cndisp; - __constant__ float cmax_data_term; __constant__ float cdata_weight; __constant__ float cmax_disc_term; @@ -72,10 +70,8 @@ namespace cv { namespace cuda { namespace device __constant__ size_t cdisp_step2; - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) + void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) { - cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) ); @@ -123,7 +119,7 @@ namespace cv { namespace cuda { namespace device }; template - __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -138,7 +134,7 @@ namespace cv { namespace cuda { namespace device { T minimum = device::numeric_limits::max(); int id = 0; - for(int d = 0; d < cndisp; d++) + for(int d = 0; d < ndisp; d++) { T cur = data_cost[d * cdisp_step1]; if(cur < minimum) @@ -157,7 +153,7 @@ namespace cv { namespace cuda { namespace device template - __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -174,7 +170,7 @@ namespace cv { namespace cuda { namespace device T cur = data_cost[1 * cdisp_step1]; T next = data_cost[2 * cdisp_step1]; - for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++) + for (int d = 1; d < ndisp - 1 && nr_local_minimum < nr_plane; d++) { if (cur < prev && cur < next) { @@ -195,7 +191,7 @@ namespace cv { namespace cuda { namespace device T minimum = numeric_limits::max(); int id = 0; - for (int d = 0; d < cndisp; d++) + for (int d = 0; d < ndisp; d++) { cur = data_cost[d * cdisp_step1]; if (cur < minimum) @@ -213,7 +209,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -228,7 +224,7 @@ namespace cv { namespace cuda { namespace device T* data_cost = (T*)ctemp + y * cmsg_step + x; - for(int d = 0; d < cndisp; ++d) + for(int d = 0; d < ndisp; ++d) { float val = 0.0f; for(int yi = y0; yi < yt; yi++) @@ -253,7 +249,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -261,7 +257,7 @@ namespace cv { namespace cuda { namespace device int tid = threadIdx.x; - if (d < cndisp) + if (d < ndisp) { int x0 = x_out << level; int y0 = y_out << level; @@ -301,7 +297,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -311,9 +307,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -330,9 +326,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -368,9 +364,9 @@ namespace cv { namespace cuda { namespace device grid.y = divUp(h, threads.y); if (use_local_init_data_cost == true) - get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); else - get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); cudaSafeCall( cudaGetLastError() ); diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 0854a92a88..c9f3983256 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,7 +2,7 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); + void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index be4d8f1ee0..946a14fb2b 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); + load_constants(max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); l[0].setTo(0, _stream); d[0].setTo(0, _stream); From 021b0cb4d53463b878b7b5a0bd1de3ccde5063db Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 09:28:26 -0700 Subject: [PATCH 15/71] Pass max_disc_term as kernel parameter. --- modules/cudastereo/src/cuda/stereocsbp.cu | 26 ++++++++++------------ modules/cudastereo/src/cuda/stereocsbp.hpp | 4 ++-- modules/cudastereo/src/stereocsbp.cpp | 6 ++--- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 4c3bde337f..582aaa6440 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -60,7 +60,6 @@ namespace cv { namespace cuda { namespace device __constant__ float cmax_data_term; __constant__ float cdata_weight; - __constant__ float cmax_disc_term; __constant__ float cdisc_single_jump; __constant__ int cth; @@ -70,11 +69,10 @@ namespace cv { namespace cuda { namespace device __constant__ size_t cdisp_step2; - void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) + void load_constants(float max_data_term, float data_weight, float disc_single_jump, int min_disp_th) { cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); @@ -688,7 +686,7 @@ namespace cv { namespace cuda { namespace device template __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, - const T* dst_disp, const T* src_disp, int nr_plane, volatile T* temp) + const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, volatile T* temp) { T minimum = numeric_limits::max(); @@ -706,7 +704,7 @@ namespace cv { namespace cuda { namespace device float sum = 0; for(int d = 0; d < nr_plane; d++) { - float cost_min = minimum + cmax_disc_term; + float cost_min = minimum + max_disc_term; T src_disp_reg = src_disp[d * cdisp_step1]; for(int d2 = 0; d2 < nr_plane; d2++) @@ -722,7 +720,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i) + __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1); @@ -740,17 +738,17 @@ namespace cv { namespace cuda { namespace device T* temp = (T*)ctemp + y * cmsg_step + x; - message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, temp); - message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, temp); - message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, temp); - message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, temp); + message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, temp); + message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, temp); + message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, temp); + message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, temp); } } template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream) { size_t disp_step = msg_step * h; cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); @@ -764,7 +762,7 @@ namespace cv { namespace cuda { namespace device for(int t = 0; t < iters; ++t) { - compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1); + compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term); cudaSafeCall( cudaGetLastError() ); } if (stream == 0) @@ -772,10 +770,10 @@ namespace cv { namespace cuda { namespace device }; template void calc_all_iterations(uchar *ctemp, short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); template void calc_all_iterations(uchar *ctemp, float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); /////////////////////////////////////////////////////////////// diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index c9f3983256..80224754b9 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,7 +2,7 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); + void load_constants(float max_data_term, float data_weight, float disc_single_jump, int min_disp_th); template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, @@ -21,7 +21,7 @@ namespace cv { namespace cuda { namespace device template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); template void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 946a14fb2b..12d673153d 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); + load_constants(max_data_term_, data_weight_, disc_single_jump_, min_disp_th_); l[0].setTo(0, _stream); d[0].setTo(0, _stream); @@ -267,7 +267,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); } } else @@ -298,7 +298,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); } } From 3ab117df04d361c77a56d4a5400404f264256ff4 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 09:51:32 -0700 Subject: [PATCH 16/71] Change struct with single static function to function. --- modules/cudastereo/src/cuda/stereocsbp.cu | 53 ++++++++++------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 582aaa6440..3ef665b05c 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -82,39 +82,30 @@ namespace cv { namespace cuda { namespace device /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// - template struct DataCostPerPixel; - template <> struct DataCostPerPixel<1> + template float __device__ pixeldiff(const uchar* left, const uchar* right); + template<> float __device__ __forceinline__ pixeldiff<1>(const uchar* left, const uchar* right) { - static __device__ __forceinline__ float compute(const uchar* left, const uchar* right) - { - return cdata_weight * fmin( ::abs((int)*left - *right), cmax_data_term); - } - }; - template <> struct DataCostPerPixel<3> + return cdata_weight * fmin( ::abs((int)*left - *right), cmax_data_term); + } + template <> float __device__ __forceinline__ pixeldiff<3>(const uchar* left, const uchar* right) { - static __device__ __forceinline__ float compute(const uchar* left, const uchar* right) - { - float tb = 0.114f * ::abs((int)left[0] - right[0]); - float tg = 0.587f * ::abs((int)left[1] - right[1]); - float tr = 0.299f * ::abs((int)left[2] - right[2]); + float tb = 0.114f * ::abs((int)left[0] - right[0]); + float tg = 0.587f * ::abs((int)left[1] - right[1]); + float tr = 0.299f * ::abs((int)left[2] - right[2]); - return cdata_weight * fmin(tr + tg + tb, cmax_data_term); - } - }; - template <> struct DataCostPerPixel<4> + return cdata_weight * fmin(tr + tg + tb, cmax_data_term); + } + template <> float __device__ __forceinline__ pixeldiff<4>(const uchar* left, const uchar* right) { - static __device__ __forceinline__ float compute(const uchar* left, const uchar* right) - { - uchar4 l = *((const uchar4*)left); - uchar4 r = *((const uchar4*)right); + uchar4 l = *((const uchar4*)left); + uchar4 r = *((const uchar4*)right); - float tb = 0.114f * ::abs((int)l.x - r.x); - float tg = 0.587f * ::abs((int)l.y - r.y); - float tr = 0.299f * ::abs((int)l.z - r.z); + float tb = 0.114f * ::abs((int)l.x - r.x); + float tg = 0.587f * ::abs((int)l.y - r.y); + float tr = 0.299f * ::abs((int)l.z - r.z); - return cdata_weight * fmin(tr + tg + tb, cmax_data_term); - } - }; + return cdata_weight * fmin(tr + tg + tb, cmax_data_term); + } template __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane, int ndisp) @@ -237,7 +228,7 @@ namespace cv { namespace cuda { namespace device const uchar* lle = cleft + yi * cimg_step + xi * channels; const uchar* lri = cright + yi * cimg_step + xr * channels; - val += DataCostPerPixel::compute(lle, lri); + val += pixeldiff(lle, lri); } } } @@ -274,7 +265,7 @@ namespace cv { namespace cuda { namespace device for(int y = 0; y < len; ++y) { - val += DataCostPerPixel::compute(lle, lri); + val += pixeldiff(lle, lri); lle += cimg_step; lri += cimg_step; @@ -416,7 +407,7 @@ namespace cv { namespace cuda { namespace device const uchar* left_x = cleft + yi * cimg_step + xi * channels; const uchar* right_x = cright + yi * cimg_step + xr * channels; - val += DataCostPerPixel::compute(left_x, right_x); + val += pixeldiff(left_x, right_x); } } } @@ -458,7 +449,7 @@ namespace cv { namespace cuda { namespace device for(int y = 0; y < len; ++y) { - val += DataCostPerPixel::compute(lle, lri); + val += pixeldiff(lle, lri); lle += cimg_step; lri += cimg_step; From eed5cbc5db780e2c8ee5293f662cb663742c4e3e Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 13:22:13 -0700 Subject: [PATCH 17/71] More constant removal. --- modules/cudastereo/src/cuda/stereocsbp.cu | 96 +++++++++++----------- modules/cudastereo/src/cuda/stereocsbp.hpp | 6 +- modules/cudastereo/src/stereocsbp.cpp | 10 +-- 3 files changed, 54 insertions(+), 58 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 3ef665b05c..792df0c99c 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -58,8 +58,6 @@ namespace cv { namespace cuda { namespace device /////////////////////// load constants //////////////////////// /////////////////////////////////////////////////////////////// - __constant__ float cmax_data_term; - __constant__ float cdata_weight; __constant__ float cdisc_single_jump; __constant__ int cth; @@ -69,10 +67,8 @@ namespace cv { namespace cuda { namespace device __constant__ size_t cdisp_step2; - void load_constants(float max_data_term, float data_weight, float disc_single_jump, int min_disp_th) + void load_constants(float disc_single_jump, int min_disp_th) { - cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); @@ -82,20 +78,20 @@ namespace cv { namespace cuda { namespace device /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// - template float __device__ pixeldiff(const uchar* left, const uchar* right); - template<> float __device__ __forceinline__ pixeldiff<1>(const uchar* left, const uchar* right) + template static float __device__ pixeldiff(const uchar* left, const uchar* right, float max_data_term); + template<> __device__ __forceinline__ static float pixeldiff<1>(const uchar* left, const uchar* right, float max_data_term) { - return cdata_weight * fmin( ::abs((int)*left - *right), cmax_data_term); + return fmin( ::abs((int)*left - *right), max_data_term); } - template <> float __device__ __forceinline__ pixeldiff<3>(const uchar* left, const uchar* right) + template<> __device__ __forceinline__ static float pixeldiff<3>(const uchar* left, const uchar* right, float max_data_term) { float tb = 0.114f * ::abs((int)left[0] - right[0]); float tg = 0.587f * ::abs((int)left[1] - right[1]); float tr = 0.299f * ::abs((int)left[2] - right[2]); - return cdata_weight * fmin(tr + tg + tb, cmax_data_term); + return fmin(tr + tg + tb, max_data_term); } - template <> float __device__ __forceinline__ pixeldiff<4>(const uchar* left, const uchar* right) + template<> __device__ __forceinline__ static float pixeldiff<4>(const uchar* left, const uchar* right, float max_data_term) { uchar4 l = *((const uchar4*)left); uchar4 r = *((const uchar4*)right); @@ -104,7 +100,7 @@ namespace cv { namespace cuda { namespace device float tg = 0.587f * ::abs((int)l.y - r.y); float tr = 0.299f * ::abs((int)l.z - r.z); - return cdata_weight * fmin(tr + tg + tb, cmax_data_term); + return fmin(tr + tg + tb, max_data_term); } template @@ -198,7 +194,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp, float data_weight, float max_data_term) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -222,13 +218,13 @@ namespace cv { namespace cuda { namespace device { int xr = xi - d; if(d < cth || xr < 0) - val += cdata_weight * cmax_data_term; + val += data_weight * max_data_term; else { const uchar* lle = cleft + yi * cimg_step + xi * channels; const uchar* lri = cright + yi * cimg_step + xr * channels; - val += pixeldiff(lle, lri); + val += data_weight * pixeldiff(lle, lri, max_data_term); } } } @@ -238,7 +234,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp, float data_weight, float max_data_term) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -257,7 +253,7 @@ namespace cv { namespace cuda { namespace device if (x0 + tid < cols) { if (x0 + tid - d < 0 || d < cth) - val = cdata_weight * cmax_data_term * len; + val = data_weight * max_data_term * len; else { const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid ); @@ -265,7 +261,7 @@ namespace cv { namespace cuda { namespace device for(int y = 0; y < len; ++y) { - val += pixeldiff(lle, lri); + val += data_weight * pixeldiff(lle, lri, max_data_term); lle += cimg_step; lri += cimg_step; @@ -286,7 +282,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -296,15 +292,15 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) + void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -315,19 +311,19 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); + typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -340,7 +336,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, stream); + init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, data_weight, max_data_term, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -364,17 +360,17 @@ namespace cv { namespace cuda { namespace device } template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// ////////////////////// compute data cost ////////////////////// /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane) + __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane, float data_weight, float max_data_term) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -401,13 +397,13 @@ namespace cv { namespace cuda { namespace device int xr = xi - sel_disp; if (xr < 0 || sel_disp < cth) - val += cdata_weight * cmax_data_term; + val += data_weight * max_data_term; else { const uchar* left_x = cleft + yi * cimg_step + xi * channels; const uchar* right_x = cright + yi * cimg_step + xr * channels; - val += pixeldiff(left_x, right_x); + val += data_weight * pixeldiff(left_x, right_x, max_data_term); } } } @@ -417,7 +413,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane) + __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane, float data_weight, float max_data_term) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -441,7 +437,7 @@ namespace cv { namespace cuda { namespace device if (x0 + tid < cols) { if (x0 + tid - sel_disp < 0 || sel_disp < cth) - val = cdata_weight * cmax_data_term * len; + val = data_weight * max_data_term * len; else { const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid ); @@ -449,7 +445,7 @@ namespace cv { namespace cuda { namespace device for(int y = 0; y < len; ++y) { - val += pixeldiff(lle, lri); + val += data_weight * pixeldiff(lle, lri, max_data_term); lle += cimg_step; lri += cimg_step; @@ -468,7 +464,7 @@ namespace cv { namespace cuda { namespace device template void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, - int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -478,16 +474,16 @@ namespace cv { namespace cuda { namespace device switch(channels) { - case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; - case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; + case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; + case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -498,19 +494,19 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; - case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; + case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; + case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) { typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, cudaStream_t stream); + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); static const ComputeDataCostCaller callers[] = { @@ -525,7 +521,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); + callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, data_weight, max_data_term, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -533,10 +529,10 @@ namespace cv { namespace cuda { namespace device } template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const short* disp_selected_pyr, short* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const float* disp_selected_pyr, float* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); /////////////////////////////////////////////////////////////// diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 80224754b9..04337bbd1d 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,15 +2,15 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(float max_data_term, float data_weight, float disc_single_jump, int min_disp_th); + void load_constants(float disc_single_jump, int min_disp_th); template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); template void compute_data_cost(const uchar *left, const uchar *right, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); template void init_message(uchar *ctemp, T* u_new, T* d_new, T* l_new, T* r_new, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 12d673153d..14d6d2589f 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(max_data_term_, data_weight_, disc_single_jump_, min_disp_th_); + load_constants(disc_single_jump_, min_disp_th_); l[0].setTo(0, _stream); d[0].setTo(0, _stream); @@ -246,12 +246,12 @@ namespace if (i == levels_ - 1) { init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), - elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, use_local_init_data_cost_, stream); } else { compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, - left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, stream); int new_idx = (cur_idx + 1) & 1; @@ -277,12 +277,12 @@ namespace if (i == levels_ - 1) { init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), - elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, use_local_init_data_cost_, stream); } else { compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, - left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, stream); int new_idx = (cur_idx + 1) & 1; From 9bc71f4cb61ac82ce166e4638b6cc0b8d5d5c5ae Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 14:20:46 -0700 Subject: [PATCH 18/71] Deconstify minimum disparity. --- modules/cudastereo/src/cuda/stereocsbp.cu | 79 +++++++++++----------- modules/cudastereo/src/cuda/stereocsbp.hpp | 7 +- modules/cudastereo/src/stereocsbp.cpp | 10 +-- 3 files changed, 47 insertions(+), 49 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 792df0c99c..fc6f4f3daf 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -60,18 +60,14 @@ namespace cv { namespace cuda { namespace device __constant__ float cdisc_single_jump; - __constant__ int cth; - __constant__ size_t cmsg_step; __constant__ size_t cdisp_step1; __constant__ size_t cdisp_step2; - void load_constants(float disc_single_jump, int min_disp_th) + void load_constants(float disc_single_jump) { cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); - - cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); } /////////////////////////////////////////////////////////////// @@ -194,7 +190,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp, float data_weight, float max_data_term) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp, float data_weight, float max_data_term, int min_disp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -217,7 +213,7 @@ namespace cv { namespace cuda { namespace device for(int xi = x0; xi < xt; xi++) { int xr = xi - d; - if(d < cth || xr < 0) + if(d < min_disp || xr < 0) val += data_weight * max_data_term; else { @@ -234,7 +230,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp, float data_weight, float max_data_term) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp, float data_weight, float max_data_term, int min_disp) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -252,7 +248,7 @@ namespace cv { namespace cuda { namespace device float val = 0.0f; if (x0 + tid < cols) { - if (x0 + tid - d < 0 || d < cth) + if (x0 + tid - d < 0 || d < min_disp) val = data_weight * max_data_term * len; else { @@ -282,7 +278,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -292,15 +288,15 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream) + void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -311,19 +307,19 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream) + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, float data_weight, float max_data_term, cudaStream_t stream); + typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -336,7 +332,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, data_weight, max_data_term, stream); + init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, data_weight, max_data_term, min_disp, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -359,18 +355,18 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream); - template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); + template void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// ////////////////////// compute data cost ////////////////////// /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane, float data_weight, float max_data_term) + __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane, float data_weight, float max_data_term, int min_disp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -396,7 +392,7 @@ namespace cv { namespace cuda { namespace device int sel_disp = selected_disparity[d * cdisp_step2]; int xr = xi - sel_disp; - if (xr < 0 || sel_disp < cth) + if (xr < 0 || sel_disp < min_disp) val += data_weight * max_data_term; else { @@ -413,7 +409,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane, float data_weight, float max_data_term) + __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane, float data_weight, float max_data_term, int min_disp) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -436,7 +432,7 @@ namespace cv { namespace cuda { namespace device float val = 0.0f; if (x0 + tid < cols) { - if (x0 + tid - sel_disp < 0 || sel_disp < cth) + if (x0 + tid - sel_disp < 0 || sel_disp < min_disp) val = data_weight * max_data_term * len; else { @@ -464,7 +460,7 @@ namespace cv { namespace cuda { namespace device template void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -474,16 +470,16 @@ namespace cv { namespace cuda { namespace device switch(channels) { - case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; - case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; - case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term); break; + case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; + case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; + case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -494,19 +490,20 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; - case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; - case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term); break; + case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; + case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; + case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream) + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, + int min_disp, cudaStream_t stream) { typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); static const ComputeDataCostCaller callers[] = { @@ -521,7 +518,7 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, data_weight, max_data_term, stream); + callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, data_weight, max_data_term, min_disp, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -529,10 +526,10 @@ namespace cv { namespace cuda { namespace device } template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const short* disp_selected_pyr, short* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); template void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const float* disp_selected_pyr, float* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); /////////////////////////////////////////////////////////////// diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 04337bbd1d..95c5a47356 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,15 +2,16 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(float disc_single_jump, int min_disp_th); + void load_constants(float disc_single_jump); template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, - int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, bool use_local_init_data_cost, cudaStream_t stream); + int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream); template void compute_data_cost(const uchar *left, const uchar *right, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, size_t msg_step, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, cudaStream_t stream); + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, float data_weight, float max_data_term, + int min_disp, cudaStream_t stream); template void init_message(uchar *ctemp, T* u_new, T* d_new, T* l_new, T* r_new, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 14d6d2589f..2515061e13 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(disc_single_jump_, min_disp_th_); + load_constants(disc_single_jump_); l[0].setTo(0, _stream); d[0].setTo(0, _stream); @@ -246,12 +246,12 @@ namespace if (i == levels_ - 1) { init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), - elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, use_local_init_data_cost_, stream); + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, min_disp_th_, use_local_init_data_cost_, stream); } else { compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, - left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, stream); + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, min_disp_th_, stream); int new_idx = (cur_idx + 1) & 1; @@ -277,12 +277,12 @@ namespace if (i == levels_ - 1) { init_data_cost(left.ptr(), right.ptr(), temp_.ptr(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), - elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, use_local_init_data_cost_, stream); + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, min_disp_th_, use_local_init_data_cost_, stream); } else { compute_data_cost(left.ptr(), right.ptr(), left.step, disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, - left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, stream); + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, min_disp_th_, stream); int new_idx = (cur_idx + 1) & 1; From 6d86d63ac59a9e581c0680c8a9580f75d9d98d4f Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Tue, 1 Jul 2014 10:44:52 -0700 Subject: [PATCH 19/71] Last of csbp load_constants() gone. --- modules/cudastereo/src/cuda/stereocsbp.cu | 30 ++++++++-------------- modules/cudastereo/src/cuda/stereocsbp.hpp | 4 +-- modules/cudastereo/src/stereocsbp.cpp | 6 ++--- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index fc6f4f3daf..04f6caced6 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -58,18 +58,10 @@ namespace cv { namespace cuda { namespace device /////////////////////// load constants //////////////////////// /////////////////////////////////////////////////////////////// - __constant__ float cdisc_single_jump; - __constant__ size_t cmsg_step; __constant__ size_t cdisp_step1; __constant__ size_t cdisp_step2; - - void load_constants(float disc_single_jump) - { - cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); - } - /////////////////////////////////////////////////////////////// /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// @@ -670,7 +662,7 @@ namespace cv { namespace cuda { namespace device template __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, - const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, volatile T* temp) + const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, float disc_single_jump, volatile T* temp) { T minimum = numeric_limits::max(); @@ -692,7 +684,7 @@ namespace cv { namespace cuda { namespace device T src_disp_reg = src_disp[d * cdisp_step1]; for(int d2 = 0; d2 < nr_plane; d2++) - cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)); + cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + disc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)); temp[d * cdisp_step1] = saturate_cast(cost_min); sum += cost_min; @@ -704,7 +696,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term) + __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term, float disc_single_jump) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1); @@ -722,17 +714,17 @@ namespace cv { namespace cuda { namespace device T* temp = (T*)ctemp + y * cmsg_step + x; - message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, temp); - message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, temp); - message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, temp); - message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, temp); + message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, disc_single_jump, temp); } } template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream) + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream) { size_t disp_step = msg_step * h; cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); @@ -746,7 +738,7 @@ namespace cv { namespace cuda { namespace device for(int t = 0; t < iters; ++t) { - compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term); + compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term, disc_single_jump); cudaSafeCall( cudaGetLastError() ); } if (stream == 0) @@ -754,10 +746,10 @@ namespace cv { namespace cuda { namespace device }; template void calc_all_iterations(uchar *ctemp, short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); template void calc_all_iterations(uchar *ctemp, float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); /////////////////////////////////////////////////////////////// diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 95c5a47356..305497292d 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,8 +2,6 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(float disc_single_jump); - template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream); @@ -22,7 +20,7 @@ namespace cv { namespace cuda { namespace device template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); template void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 2515061e13..ded5fa20e1 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,8 +222,6 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(disc_single_jump_); - l[0].setTo(0, _stream); d[0].setTo(0, _stream); r[0].setTo(0, _stream); @@ -267,7 +265,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); } } else @@ -298,7 +296,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); } } From b792419cde59876969de2f803c1599920cf2a0f2 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 7 Jul 2014 08:12:28 -0700 Subject: [PATCH 20/71] Remove compute_disp()'s use of constant memory. --- modules/cudastereo/src/cuda/stereocsbp.cu | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 04f6caced6..79456a01fa 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -760,26 +760,26 @@ namespace cv { namespace cuda { namespace device template __global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_, const T* data_cost_selected, const T* disp_selected_pyr, - PtrStepSz disp, int nr_plane) + PtrStepSz disp, int nr_plane, size_t msg_step, size_t disp_step) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1) { - const T* data = data_cost_selected + y * cmsg_step + x; - const T* disp_selected = disp_selected_pyr + y * cmsg_step + x; + const T* data = data_cost_selected + y * msg_step + x; + const T* disp_selected = disp_selected_pyr + y * msg_step + x; - const T* u = u_ + (y+1) * cmsg_step + (x+0); - const T* d = d_ + (y-1) * cmsg_step + (x+0); - const T* l = l_ + (y+0) * cmsg_step + (x+1); - const T* r = r_ + (y+0) * cmsg_step + (x-1); + const T* u = u_ + (y+1) * msg_step + (x+0); + const T* d = d_ + (y-1) * msg_step + (x+0); + const T* l = l_ + (y+0) * msg_step + (x+1); + const T* r = r_ + (y+0) * msg_step + (x-1); int best = 0; T best_val = numeric_limits::max(); for (int i = 0; i < nr_plane; ++i) { - int idx = i * cdisp_step1; + int idx = i * disp_step; T val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx]; if (val < best_val) @@ -797,8 +797,6 @@ namespace cv { namespace cuda { namespace device const PtrStepSz& disp, int nr_plane, cudaStream_t stream) { size_t disp_step = disp.rows * msg_step; - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -806,7 +804,7 @@ namespace cv { namespace cuda { namespace device grid.x = divUp(disp.cols, threads.x); grid.y = divUp(disp.rows, threads.y); - compute_disp<<>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane); + compute_disp<<>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane, msg_step, disp_step); cudaSafeCall( cudaGetLastError() ); if (stream == 0) From 9b8002cd43c8735e6e2eb1ecab1930bad6325681 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 7 Jul 2014 08:23:40 -0700 Subject: [PATCH 21/71] remove use of constant memory in calc_all_iterations/compute_message/message_per_pixel --- modules/cudastereo/src/cuda/stereocsbp.cu | 41 +++++++++++------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 79456a01fa..8c38d91ae9 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -662,13 +662,14 @@ namespace cv { namespace cuda { namespace device template __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, - const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, float disc_single_jump, volatile T* temp) + const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, float disc_single_jump, volatile T* temp, + size_t disp_step) { T minimum = numeric_limits::max(); for(int d = 0; d < nr_plane; d++) { - int idx = d * cdisp_step1; + int idx = d * disp_step; T val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx]; if(val < minimum) @@ -681,43 +682,43 @@ namespace cv { namespace cuda { namespace device for(int d = 0; d < nr_plane; d++) { float cost_min = minimum + max_disc_term; - T src_disp_reg = src_disp[d * cdisp_step1]; + T src_disp_reg = src_disp[d * disp_step]; for(int d2 = 0; d2 < nr_plane; d2++) - cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + disc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)); + cost_min = fmin(cost_min, msg_dst[d2 * disp_step] + disc_single_jump * ::abs(dst_disp[d2 * disp_step] - src_disp_reg)); - temp[d * cdisp_step1] = saturate_cast(cost_min); + temp[d * disp_step] = saturate_cast(cost_min); sum += cost_min; } sum /= nr_plane; for(int d = 0; d < nr_plane; d++) - msg_dst[d * cdisp_step1] = saturate_cast(temp[d * cdisp_step1] - sum); + msg_dst[d * disp_step] = saturate_cast(temp[d * disp_step] - sum); } template - __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term, float disc_single_jump) + __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term, float disc_single_jump, size_t msg_step, size_t disp_step) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1); if (y > 0 && y < h - 1 && x > 0 && x < w - 1) { - const T* data = data_cost_selected + y * cmsg_step + x; + const T* data = data_cost_selected + y * msg_step + x; - T* u = u_ + y * cmsg_step + x; - T* d = d_ + y * cmsg_step + x; - T* l = l_ + y * cmsg_step + x; - T* r = r_ + y * cmsg_step + x; + T* u = u_ + y * msg_step + x; + T* d = d_ + y * msg_step + x; + T* l = l_ + y * msg_step + x; + T* r = r_ + y * msg_step + x; - const T* disp = selected_disp_pyr_cur + y * cmsg_step + x; + const T* disp = selected_disp_pyr_cur + y * msg_step + x; - T* temp = (T*)ctemp + y * cmsg_step + x; + T* temp = (T*)ctemp + y * msg_step + x; - message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); - message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); - message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, disc_single_jump, temp); - message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, u, r - 1, u + msg_step, l + 1, disp, disp - msg_step, nr_plane, max_disc_term, disc_single_jump, temp, disp_step); + message_per_pixel(data, d, d - msg_step, r - 1, l + 1, disp, disp + msg_step, nr_plane, max_disc_term, disc_single_jump, temp, disp_step); + message_per_pixel(data, l, u + msg_step, d - msg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, disc_single_jump, temp, disp_step); + message_per_pixel(data, r, u + msg_step, d - msg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, disc_single_jump, temp, disp_step); } } @@ -727,8 +728,6 @@ namespace cv { namespace cuda { namespace device const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream) { size_t disp_step = msg_step * h; - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -738,7 +737,7 @@ namespace cv { namespace cuda { namespace device for(int t = 0; t < iters; ++t) { - compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term, disc_single_jump); + compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term, disc_single_jump, msg_step, disp_step); cudaSafeCall( cudaGetLastError() ); } if (stream == 0) From 1ff270e41c4c69952361baf87ddf304ea0151a0e Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 7 Jul 2014 09:45:30 -0700 Subject: [PATCH 22/71] init_message no longer uses constant memory. --- modules/cudastereo/src/cuda/stereocsbp.cu | 68 +++++++++++------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 8c38d91ae9..6ebdee8e9a 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -534,7 +534,7 @@ namespace cv { namespace cuda { namespace device const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, T* data_cost_selected, T* disparity_selected_new, T* data_cost_new, const T* data_cost_cur, const T* disparity_selected_cur, - int nr_plane, int nr_plane2) + int nr_plane, int nr_plane2, size_t disp_step1, size_t disp_step2) { for(int i = 0; i < nr_plane; i++) { @@ -550,15 +550,15 @@ namespace cv { namespace cuda { namespace device } } - data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1]; - disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2]; + data_cost_selected[i * disp_step1] = data_cost_cur[id * disp_step1]; + disparity_selected_new[i * disp_step1] = disparity_selected_cur[id * disp_step2]; - u_new[i * cdisp_step1] = u_cur[id * cdisp_step2]; - d_new[i * cdisp_step1] = d_cur[id * cdisp_step2]; - l_new[i * cdisp_step1] = l_cur[id * cdisp_step2]; - r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; + u_new[i * disp_step1] = u_cur[id * disp_step2]; + d_new[i * disp_step1] = d_cur[id * disp_step2]; + l_new[i * disp_step1] = l_cur[id * disp_step2]; + r_new[i * disp_step1] = r_cur[id * disp_step2]; - data_cost_new[id * cdisp_step1] = numeric_limits::max(); + data_cost_new[id * disp_step1] = numeric_limits::max(); } } @@ -567,47 +567,49 @@ namespace cv { namespace cuda { namespace device const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_, T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, T* data_cost_selected_, const T* data_cost_, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2) + int h, int w, int nr_plane, int h2, int w2, int nr_plane2, + size_t msg_step, size_t disp_step1, size_t disp_step2) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (y < h && x < w) { - const T* u_cur = u_cur_ + ::min(h2-1, y/2 + 1) * cmsg_step + x/2; - const T* d_cur = d_cur_ + ::max(0, y/2 - 1) * cmsg_step + x/2; - const T* l_cur = l_cur_ + (y/2) * cmsg_step + ::min(w2-1, x/2 + 1); - const T* r_cur = r_cur_ + (y/2) * cmsg_step + ::max(0, x/2 - 1); + const T* u_cur = u_cur_ + ::min(h2-1, y/2 + 1) * msg_step + x/2; + const T* d_cur = d_cur_ + ::max(0, y/2 - 1) * msg_step + x/2; + const T* l_cur = l_cur_ + (y/2) * msg_step + ::min(w2-1, x/2 + 1); + const T* r_cur = r_cur_ + (y/2) * msg_step + ::max(0, x/2 - 1); - T* data_cost_new = (T*)ctemp + y * cmsg_step + x; + T* data_cost_new = (T*)ctemp + y * msg_step + x; - const T* disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step + x/2; - const T* data_cost = data_cost_ + y * cmsg_step + x; + const T* disparity_selected_cur = selected_disp_pyr_cur + y/2 * msg_step + x/2; + const T* data_cost = data_cost_ + y * msg_step + x; for(int d = 0; d < nr_plane2; d++) { - int idx2 = d * cdisp_step2; + int idx2 = d * disp_step2; - T val = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2]; - data_cost_new[d * cdisp_step1] = val; + T val = data_cost[d * disp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2]; + data_cost_new[d * disp_step1] = val; } - T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x; - T* disparity_selected_new = selected_disp_pyr_new + y * cmsg_step + x; + T* data_cost_selected = data_cost_selected_ + y * msg_step + x; + T* disparity_selected_new = selected_disp_pyr_new + y * msg_step + x; - T* u_new = u_new_ + y * cmsg_step + x; - T* d_new = d_new_ + y * cmsg_step + x; - T* l_new = l_new_ + y * cmsg_step + x; - T* r_new = r_new_ + y * cmsg_step + x; + T* u_new = u_new_ + y * msg_step + x; + T* d_new = d_new_ + y * msg_step + x; + T* l_new = l_new_ + y * msg_step + x; + T* r_new = r_new_ + y * msg_step + x; - u_cur = u_cur_ + y/2 * cmsg_step + x/2; - d_cur = d_cur_ + y/2 * cmsg_step + x/2; - l_cur = l_cur_ + y/2 * cmsg_step + x/2; - r_cur = r_cur_ + y/2 * cmsg_step + x/2; + u_cur = u_cur_ + y/2 * msg_step + x/2; + d_cur = d_cur_ + y/2 * msg_step + x/2; + l_cur = l_cur_ + y/2 * msg_step + x/2; + r_cur = r_cur_ + y/2 * msg_step + x/2; get_first_k_element_increase(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur, data_cost_selected, disparity_selected_new, data_cost_new, - data_cost, disparity_selected_cur, nr_plane, nr_plane2); + data_cost, disparity_selected_cur, nr_plane, nr_plane2, + disp_step1, disp_step2); } } @@ -622,9 +624,6 @@ namespace cv { namespace cuda { namespace device size_t disp_step1 = msg_step * h; size_t disp_step2 = msg_step * h2; - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -636,7 +635,8 @@ namespace cv { namespace cuda { namespace device u_cur, d_cur, l_cur, r_cur, selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, - h, w, nr_plane, h2, w2, nr_plane2); + h, w, nr_plane, h2, w2, nr_plane2, + msg_step, disp_step1, disp_step2); cudaSafeCall( cudaGetLastError() ); if (stream == 0) From 52516085d98e3ca19d2785143acfe992b78c29a6 Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 7 Jul 2014 10:12:30 -0700 Subject: [PATCH 23/71] remove constant memory from init_data_cost --- modules/cudastereo/src/cuda/stereocsbp.cu | 92 ++++++++++++----------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 6ebdee8e9a..974b503733 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -92,16 +92,17 @@ namespace cv { namespace cuda { namespace device } template - __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane, int ndisp) + __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane, int ndisp, + size_t msg_step, size_t disp_step) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (y < h && x < w) { - T* selected_disparity = selected_disp_pyr + y * cmsg_step + x; - T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x; - T* data_cost = (T*)ctemp + y * cmsg_step + x; + T* selected_disparity = selected_disp_pyr + y * msg_step + x; + T* data_cost_selected = data_cost_selected_ + y * msg_step + x; + T* data_cost = (T*)ctemp + y * msg_step + x; for(int i = 0; i < nr_plane; i++) { @@ -109,7 +110,7 @@ namespace cv { namespace cuda { namespace device int id = 0; for(int d = 0; d < ndisp; d++) { - T cur = data_cost[d * cdisp_step1]; + T cur = data_cost[d * disp_step]; if(cur < minimum) { minimum = cur; @@ -117,46 +118,47 @@ namespace cv { namespace cuda { namespace device } } - data_cost_selected[i * cdisp_step1] = minimum; - selected_disparity[i * cdisp_step1] = id; - data_cost [id * cdisp_step1] = numeric_limits::max(); + data_cost_selected[i * disp_step] = minimum; + selected_disparity[i * disp_step] = id; + data_cost [id * disp_step] = numeric_limits::max(); } } } template - __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane, int ndisp) + __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane, int ndisp, + size_t msg_step, size_t disp_step) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (y < h && x < w) { - T* selected_disparity = selected_disp_pyr + y * cmsg_step + x; - T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x; - T* data_cost = (T*)ctemp + y * cmsg_step + x; + T* selected_disparity = selected_disp_pyr + y * msg_step + x; + T* data_cost_selected = data_cost_selected_ + y * msg_step + x; + T* data_cost = (T*)ctemp + y * msg_step + x; int nr_local_minimum = 0; - T prev = data_cost[0 * cdisp_step1]; - T cur = data_cost[1 * cdisp_step1]; - T next = data_cost[2 * cdisp_step1]; + T prev = data_cost[0 * disp_step]; + T cur = data_cost[1 * disp_step]; + T next = data_cost[2 * disp_step]; for (int d = 1; d < ndisp - 1 && nr_local_minimum < nr_plane; d++) { if (cur < prev && cur < next) { - data_cost_selected[nr_local_minimum * cdisp_step1] = cur; - selected_disparity[nr_local_minimum * cdisp_step1] = d; + data_cost_selected[nr_local_minimum * disp_step] = cur; + selected_disparity[nr_local_minimum * disp_step] = d; - data_cost[d * cdisp_step1] = numeric_limits::max(); + data_cost[d * disp_step] = numeric_limits::max(); nr_local_minimum++; } prev = cur; cur = next; - next = data_cost[(d + 1) * cdisp_step1]; + next = data_cost[(d + 1) * disp_step]; } for (int i = nr_local_minimum; i < nr_plane; i++) @@ -166,23 +168,25 @@ namespace cv { namespace cuda { namespace device for (int d = 0; d < ndisp; d++) { - cur = data_cost[d * cdisp_step1]; + cur = data_cost[d * disp_step]; if (cur < minimum) { minimum = cur; id = d; } } - data_cost_selected[i * cdisp_step1] = minimum; - selected_disparity[i * cdisp_step1] = id; + data_cost_selected[i * disp_step] = minimum; + selected_disparity[i * disp_step] = id; - data_cost[id * cdisp_step1] = numeric_limits::max(); + data_cost[id * disp_step] = numeric_limits::max(); } } } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp, float data_weight, float max_data_term, int min_disp) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, + int h, int w, int level, int ndisp, float data_weight, float max_data_term, + int min_disp, size_t msg_step, size_t disp_step) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -195,7 +199,7 @@ namespace cv { namespace cuda { namespace device int x0 = x << level; int xt = (x + 1) << level; - T* data_cost = (T*)ctemp + y * cmsg_step + x; + T* data_cost = (T*)ctemp + y * msg_step + x; for(int d = 0; d < ndisp; ++d) { @@ -216,13 +220,15 @@ namespace cv { namespace cuda { namespace device } } } - data_cost[cdisp_step1 * d] = saturate_cast(val); + data_cost[disp_step * d] = saturate_cast(val); } } } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp, float data_weight, float max_data_term, int min_disp) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, + int level, int rows, int cols, int h, int ndisp, float data_weight, float max_data_term, + int min_disp, size_t msg_step, size_t disp_step) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -261,16 +267,16 @@ namespace cv { namespace cuda { namespace device reduce(smem + winsz * threadIdx.z, val, tid, plus()); - T* data_cost = (T*)ctemp + y_out * cmsg_step + x_out; + T* data_cost = (T*)ctemp + y_out * msg_step + x_out; if (tid == 0) - data_cost[cdisp_step1 * d] = saturate_cast(val); + data_cost[disp_step * d] = saturate_cast(val); } } template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -280,15 +286,15 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template - void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) + void init_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int rows, int cols, int h, int w, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -299,9 +305,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp, data_weight, max_data_term, min_disp, msg_step, disp_step); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -311,7 +317,7 @@ namespace cv { namespace cuda { namespace device int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream) { - typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); + typedef void (*InitDataCostCaller)(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int cols, int rows, int w, int h, int level, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step, cudaStream_t stream); static const InitDataCostCaller init_data_cost_callers[] = { @@ -321,10 +327,8 @@ namespace cv { namespace cuda { namespace device }; size_t disp_step = msg_step * h; - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, data_weight, max_data_term, min_disp, stream); + init_data_cost_callers[level](cleft, cright, ctemp, cimg_step, rows, cols, h, w, level, ndisp, channels, data_weight, max_data_term, min_disp, msg_step, disp_step, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -337,9 +341,9 @@ namespace cv { namespace cuda { namespace device grid.y = divUp(h, threads.y); if (use_local_init_data_cost == true) - get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); + get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp, msg_step, disp_step); else - get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); + get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp, msg_step, disp_step); cudaSafeCall( cudaGetLastError() ); @@ -542,7 +546,7 @@ namespace cv { namespace cuda { namespace device int id = 0; for(int j = 0; j < nr_plane2; j++) { - T cur = data_cost_new[j * cdisp_step1]; + T cur = data_cost_new[j * disp_step1]; if(cur < minimum) { minimum = cur; From 85601e03dd345d88f59ebf5f6b81a42fd336ecaa Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 7 Jul 2014 10:28:47 -0700 Subject: [PATCH 24/71] remove constant memory use in compute_data_cost --- modules/cudastereo/src/cuda/stereocsbp.cu | 43 +++++++++++------------ 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 974b503733..eb371c1881 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -362,7 +362,7 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template - __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane, float data_weight, float max_data_term, int min_disp) + __global__ void compute_data_cost(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step1, size_t disp_step2) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -375,8 +375,8 @@ namespace cv { namespace cuda { namespace device int x0 = x << level; int xt = (x + 1) << level; - const T* selected_disparity = selected_disp_pyr + y/2 * cmsg_step + x/2; - T* data_cost = data_cost_ + y * cmsg_step + x; + const T* selected_disparity = selected_disp_pyr + y/2 * msg_step + x/2; + T* data_cost = data_cost_ + y * msg_step + x; for(int d = 0; d < nr_plane; d++) { @@ -385,7 +385,7 @@ namespace cv { namespace cuda { namespace device { for(int xi = x0; xi < xt; xi++) { - int sel_disp = selected_disparity[d * cdisp_step2]; + int sel_disp = selected_disparity[d * disp_step2]; int xr = xi - sel_disp; if (xr < 0 || sel_disp < min_disp) @@ -399,13 +399,13 @@ namespace cv { namespace cuda { namespace device } } } - data_cost[cdisp_step1 * d] = saturate_cast(val); + data_cost[disp_step1 * d] = saturate_cast(val); } } } template - __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane, float data_weight, float max_data_term, int min_disp) + __global__ void compute_data_cost_reduce(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step1, size_t disp_step2) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -413,12 +413,12 @@ namespace cv { namespace cuda { namespace device int tid = threadIdx.x; - const T* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step + x_out/2; - T* data_cost = data_cost_ + y_out * cmsg_step + x_out; + const T* selected_disparity = selected_disp_pyr + y_out/2 * msg_step + x_out/2; + T* data_cost = data_cost_ + y_out * msg_step + x_out; if (d < nr_plane) { - int sel_disp = selected_disparity[d * cdisp_step2]; + int sel_disp = selected_disparity[d * disp_step2]; int x0 = x_out << level; int y0 = y_out << level; @@ -450,13 +450,13 @@ namespace cv { namespace cuda { namespace device reduce(smem + winsz * threadIdx.z, val, tid, plus()); if (tid == 0) - data_cost[cdisp_step1 * d] = saturate_cast(val); + data_cost[disp_step1 * d] = saturate_cast(val); } } template void compute_data_cost_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step1, size_t disp_step2, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -466,16 +466,16 @@ namespace cv { namespace cuda { namespace device switch(channels) { - case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; - case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; - case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp); break; + case 1: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; + case 3: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; + case 4: compute_data_cost<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, h, w, level, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } template void compute_data_cost_reduce_caller_(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream) + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step1, size_t disp_step2, cudaStream_t stream) { const int threadsNum = 256; const size_t smem_size = threadsNum * sizeof(float); @@ -486,9 +486,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; - case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; - case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp); break; + case 1: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; + case 3: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; + case 4: compute_data_cost_reduce<<>>(cleft, cright, cimg_step, disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -499,7 +499,7 @@ namespace cv { namespace cuda { namespace device int min_disp, cudaStream_t stream) { typedef void (*ComputeDataCostCaller)(const uchar *cleft, const uchar *cright, size_t cimg_step, const T* disp_selected_pyr, T* data_cost, int rows, int cols, - int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, cudaStream_t stream); + int h, int w, int level, int nr_plane, int channels, float data_weight, float max_data_term, int min_disp, size_t msg_step, size_t disp_step1, size_t disp_step2, cudaStream_t stream); static const ComputeDataCostCaller callers[] = { @@ -510,11 +510,8 @@ namespace cv { namespace cuda { namespace device size_t disp_step1 = msg_step * h; size_t disp_step2 = msg_step * h2; - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); - callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, data_weight, max_data_term, min_disp, stream); + callers[level](cleft, cright, cimg_step, disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, data_weight, max_data_term, min_disp, msg_step, disp_step1, disp_step2, stream); cudaSafeCall( cudaGetLastError() ); if (stream == 0) From 4644689d5a31f0d80527e511b8f86fa05bc8681f Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Thu, 10 Jul 2014 23:53:44 -0700 Subject: [PATCH 25/71] And remove final vestiges. --- modules/cudastereo/src/cuda/stereocsbp.cu | 8 -------- 1 file changed, 8 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index eb371c1881..dd535e8b20 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -54,14 +54,6 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - /////////////////////////////////////////////////////////////// - /////////////////////// load constants //////////////////////// - /////////////////////////////////////////////////////////////// - - __constant__ size_t cmsg_step; - __constant__ size_t cdisp_step1; - __constant__ size_t cdisp_step2; - /////////////////////////////////////////////////////////////// /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// From d2493393dbfe0402eb8bedbdae7da584ff700657 Mon Sep 17 00:00:00 2001 From: Anish Pednekar Date: Sun, 20 Jul 2014 20:59:17 +0530 Subject: [PATCH 26/71] added macro guard to enable overriding ENABLE_LOG --- modules/stitching/include/opencv2/stitching/detail/util.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/stitching/include/opencv2/stitching/detail/util.hpp b/modules/stitching/include/opencv2/stitching/detail/util.hpp index 6b1c5f34f3..2769dc60b2 100644 --- a/modules/stitching/include/opencv2/stitching/detail/util.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/util.hpp @@ -46,7 +46,9 @@ #include #include "opencv2/core.hpp" +#ifndef ENABLE_LOG #define ENABLE_LOG 0 +#endif // TODO remove LOG macros, add logging class #if ENABLE_LOG From bd144cfa64094966e9fdf95c18e6ea594c34fe82 Mon Sep 17 00:00:00 2001 From: Sergey Nikulov Date: Mon, 21 Jul 2014 18:56:35 +0400 Subject: [PATCH 27/71] upmerged pull req #2974 to master - added more jpeg options to imgcodecs --- .../imgcodecs/include/opencv2/imgcodecs.hpp | 19 ++++++---- .../include/opencv2/imgcodecs/imgcodecs_c.h | 3 ++ modules/imgcodecs/src/grfmt_jpeg.cpp | 37 +++++++++++++++++++ modules/imgcodecs/test/test_grfmt.cpp | 25 +++++++++++++ 4 files changed, 76 insertions(+), 8 deletions(-) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index 81f8a45f65..94948525c9 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -56,14 +56,17 @@ enum { IMREAD_UNCHANGED = -1, // 8bit, color or not IMREAD_ANYCOLOR = 4 // ?, any color }; -enum { IMWRITE_JPEG_QUALITY = 1, - IMWRITE_JPEG_PROGRESSIVE = 2, - IMWRITE_JPEG_OPTIMIZE = 3, - IMWRITE_PNG_COMPRESSION = 16, - IMWRITE_PNG_STRATEGY = 17, - IMWRITE_PNG_BILEVEL = 18, - IMWRITE_PXM_BINARY = 32, - IMWRITE_WEBP_QUALITY = 64 +enum { IMWRITE_JPEG_QUALITY = 1, + IMWRITE_JPEG_PROGRESSIVE = 2, + IMWRITE_JPEG_OPTIMIZE = 3, + IMWRITE_JPEG_RST_INTERVAL = 4, + IMWRITE_JPEG_LUM_QUALITY = 5, + IMWRITE_JPEG_CHROM_QUALITY = 6, + IMWRITE_PNG_COMPRESSION = 16, + IMWRITE_PNG_STRATEGY = 17, + IMWRITE_PNG_BILEVEL = 18, + IMWRITE_PXM_BINARY = 32, + IMWRITE_WEBP_QUALITY = 64 }; enum { IMWRITE_PNG_STRATEGY_DEFAULT = 0, diff --git a/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h b/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h index f0c2ae13fe..a36b454058 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h +++ b/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h @@ -76,6 +76,9 @@ enum CV_IMWRITE_JPEG_QUALITY =1, CV_IMWRITE_JPEG_PROGRESSIVE =2, CV_IMWRITE_JPEG_OPTIMIZE =3, + CV_IMWRITE_JPEG_RST_INTERVAL =4, + CV_IMWRITE_JPEG_LUM_QUALITY =5, + CV_IMWRITE_JPEG_CHROM_QUALITY =6, CV_IMWRITE_PNG_COMPRESSION =16, CV_IMWRITE_PNG_STRATEGY =17, CV_IMWRITE_PNG_BILEVEL =18, diff --git a/modules/imgcodecs/src/grfmt_jpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg.cpp index 147f185e4c..3157ebfcb1 100644 --- a/modules/imgcodecs/src/grfmt_jpeg.cpp +++ b/modules/imgcodecs/src/grfmt_jpeg.cpp @@ -600,6 +600,9 @@ bool JpegEncoder::write( const Mat& img, const std::vector& params ) int quality = 95; int progressive = 0; int optimize = 0; + int rst_interval = 0; + int lum_quality = 100; + int chrom_quality = 100; for( size_t i = 0; i < params.size(); i += 2 ) { @@ -618,15 +621,49 @@ bool JpegEncoder::write( const Mat& img, const std::vector& params ) { optimize = params[i+1]; } + + if( params[i] == CV_IMWRITE_JPEG_LUM_QUALITY ) + { + lum_quality = params[i+1]; + lum_quality = MIN(MAX(lum_quality, 0), 100); + } + + if( params[i] == CV_IMWRITE_JPEG_CHROM_QUALITY ) + { + chrom_quality = params[i+1]; + chrom_quality = MIN(MAX(chrom_quality, 0), 100); + } + + if( params[i] == CV_IMWRITE_JPEG_RST_INTERVAL ) + { + rst_interval = params[i+1]; + rst_interval = MIN(MAX(rst_interval, 0), 65535L); + } } jpeg_set_defaults( &cinfo ); + cinfo.restart_interval = rst_interval; jpeg_set_quality( &cinfo, quality, TRUE /* limit to baseline-JPEG values */ ); if( progressive ) jpeg_simple_progression( &cinfo ); if( optimize ) cinfo.optimize_coding = TRUE; + +#if JPEG_LIB_VERSION >= 70 + cinfo.q_scale_factor[0] = jpeg_quality_scaling(lum_quality); + cinfo.q_scale_factor[1] = jpeg_quality_scaling(chrom_quality); + if ( lum_quality != chrom_quality ) + { + /* disable subsampling - ref. Libjpeg.txt */ + cinfo.comp_info[0].v_samp_factor = 1; + cinfo.comp_info[0].h_samp_factor = 1; + cinfo.comp_info[1].v_samp_factor = 1; + cinfo.comp_info[1].h_samp_factor = 1; + } + jpeg_default_qtables( &cinfo, TRUE ); +#endif // #if JPEG_LIB_VERSION >= 70 + jpeg_start_compress( &cinfo, TRUE ); if( channels > 1 ) diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 9b06c5744c..05bd1227ff 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -433,6 +433,31 @@ TEST(Imgcodecs_Jpeg, encode_decode_optimize_jpeg) remove(output_optimized.c_str()); } + +TEST(Imgcodecs_Jpeg, encode_decode_rst_jpeg) +{ + cvtest::TS& ts = *cvtest::TS::ptr(); + string input = string(ts.get_data_path()) + "../cv/shared/lena.png"; + cv::Mat img = cv::imread(input); + ASSERT_FALSE(img.empty()); + + std::vector params; + params.push_back(IMWRITE_JPEG_RST_INTERVAL); + params.push_back(1); + + string output_rst = cv::tempfile(".jpg"); + EXPECT_NO_THROW(cv::imwrite(output_rst, img, params)); + cv::Mat img_jpg_rst = cv::imread(output_rst); + + string output_normal = cv::tempfile(".jpg"); + EXPECT_NO_THROW(cv::imwrite(output_normal, img)); + cv::Mat img_jpg_normal = cv::imread(output_normal); + + EXPECT_EQ(0, cvtest::norm(img_jpg_rst, img_jpg_normal, NORM_INF)); + + remove(output_optimized.c_str()); +} + #endif From 9d107fb2d5c0fdfe27273f4de1c9669517a82346 Mon Sep 17 00:00:00 2001 From: Sergey Nikulov Date: Mon, 21 Jul 2014 19:26:26 +0400 Subject: [PATCH 28/71] fixed test compilation --- modules/imgcodecs/test/test_grfmt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 05bd1227ff..09b0bdc89e 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -455,7 +455,7 @@ TEST(Imgcodecs_Jpeg, encode_decode_rst_jpeg) EXPECT_EQ(0, cvtest::norm(img_jpg_rst, img_jpg_normal, NORM_INF)); - remove(output_optimized.c_str()); + remove(output_rst.c_str()); } #endif From d1c7983fe5e94d064b274fbc5acf658ff8f7f07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A1s=20Kov=C3=A1cs?= Date: Mon, 21 Jul 2014 17:27:03 +0200 Subject: [PATCH 29/71] calibrateDebevec index fix (issue 3664) --- modules/photo/src/calibrate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/photo/src/calibrate.cpp b/modules/photo/src/calibrate.cpp index 23b612995a..e9fb461f5b 100644 --- a/modules/photo/src/calibrate.cpp +++ b/modules/photo/src/calibrate.cpp @@ -104,7 +104,7 @@ public: for(size_t i = 0; i < sample_points.size(); i++) { for(size_t j = 0; j < images.size(); j++) { - int val = images[j].ptr()[3*(sample_points[i].y * images[j].cols + sample_points[j].x) + channel]; + int val = images[j].ptr()[3*(sample_points[i].y * images[j].cols + sample_points[i].x) + channel]; A.at(eq, val) = w.at(val); A.at(eq, LDR_SIZE + (int)i) = -w.at(val); B.at(eq, 0) = w.at(val) * log(times.at((int)j)); From 716218cac4009a67eb95fc2570990c9e7a7fe266 Mon Sep 17 00:00:00 2001 From: vbystricky Date: Tue, 22 Jul 2014 13:22:40 +0400 Subject: [PATCH 30/71] Optimize ocl version of warp_affine --- modules/imgproc/src/opencl/warp_affine.cl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/imgproc/src/opencl/warp_affine.cl b/modules/imgproc/src/opencl/warp_affine.cl index 8ee34d0d65..649f10db7a 100644 --- a/modules/imgproc/src/opencl/warp_affine.cl +++ b/modules/imgproc/src/opencl/warp_affine.cl @@ -98,15 +98,15 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of { int round_delta = (AB_SCALE >> 1); - int X0_ = rint(M[0] * dx * AB_SCALE); - int Y0_ = rint(M[3] * dx * AB_SCALE); + int X0 = rint(fma(M[0], dx, fma(M[1], dy0, M[2])) * AB_SCALE) + round_delta; + int Y0 = rint(fma(M[3], dx, fma(M[4], dy0, M[5])) * AB_SCALE) + round_delta; + + int XSTEP = (int)(M[1] * AB_SCALE); + int YSTEP = (int)(M[4] * AB_SCALE); int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset)); for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy, dst_index += dst_step) { - int X0 = X0_ + rint(fma(M[1], dy, M[2]) * AB_SCALE) + round_delta; - int Y0 = Y0_ + rint(fma(M[4], dy, M[5]) * AB_SCALE) + round_delta; - short sx = convert_short_sat(X0 >> AB_BITS); short sy = convert_short_sat(Y0 >> AB_BITS); @@ -117,6 +117,9 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of } else storepix(scalar, dstptr + dst_index); + + X0 += XSTEP; + Y0 += YSTEP; } } } From 5dd92638485085805fc31034a7a64dc4a5e4e178 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 26 Jun 2014 10:09:15 +0400 Subject: [PATCH 31/71] Multi-radix with kernel generation --- modules/core/perf/opencl/perf_dxt.cpp | 6 +- modules/core/src/dxt.cpp | 257 +++++++++++++++++++++- modules/core/src/opencl/fft.cl | 297 ++++++++++++++++++++++++++ modules/core/test/ocl/test_dft.cpp | 73 +++++-- samples/cpp/dft.cpp | 71 +++++- 5 files changed, 667 insertions(+), 37 deletions(-) create mode 100644 modules/core/src/opencl/fft.cl diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index d0219913b5..09d657d7a1 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -57,9 +57,9 @@ namespace ocl { typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), - Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, - (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE))) +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(/*OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, */Size(1024, 1024), Size(1024, 2048), Size(512, 512), Size(2048, 2048)), + Values((int)DFT_ROWS/*, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, + (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); const Size srcSize = get<0>(params); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 2a08899167..68256eaa07 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1960,7 +1960,7 @@ static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) } -static bool ocl_dft(InputArray _src, OutputArray _dst, int flags) +static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); Size ssize = _src.size(); @@ -2029,12 +2029,257 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags) #endif // HAVE_CLAMDFFT +namespace cv +{ + +#ifdef HAVE_OPENCL + +static bool fft_radixN(InputArray _src, OutputArray _dst, int radix, int block_size, int nonzero_rows, int flags) +{ + int N = _src.size().width; + if (N % radix) + return false; + + UMat src = _src.getUMat(); + UMat dst = _dst.getUMat(); + + int thread_count = N / radix; + size_t globalsize[2] = { thread_count, nonzero_rows }; + String kernel_name = format("fft_radix%d", radix); + ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, (flags & DFT_INVERSE) != 0 ? "-D INVERSE" : ""); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst), block_size, thread_count, nonzero_rows); + return k.run(2, globalsize, NULL, false); +} + +static bool ocl_packToCCS(InputArray _buffer, OutputArray _dst, int flags) +{ + UMat buffer = _buffer.getUMat(); + UMat dst = _dst.getUMat(); + + buffer = buffer.reshape(1); + if ((flags & DFT_ROWS) == 0 && buffer.rows > 1) + { + // pack to CCS by rows + if (dst.cols > 2) + buffer.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2))); + + Mat dst_mat = dst.getMat(ACCESS_WRITE); + Mat buffer_mat = buffer.getMat(ACCESS_READ); + + dst_mat.at(0,0) = buffer_mat.at(0,0); + dst_mat.at(dst_mat.rows-1,0) = buffer_mat.at(buffer.rows/2,0); + for (int i=1; i(i,0) = buffer_mat.at((i+1)/2,0); + dst_mat.at(i+1,0) = buffer_mat.at((i+1)/2,1); + } + + if (dst_mat.cols % 2 == 0) + { + dst_mat.at(0,dst_mat.cols-1) = buffer_mat.at(0,buffer.cols/2); + dst_mat.at(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at(buffer.rows/2,buffer.cols/2); + + for (int i=1; i(i,dst_mat.cols-1) = buffer_mat.at((i+1)/2,buffer.cols/2); + dst_mat.at(i+1,dst_mat.cols-1) = buffer_mat.at((i+1)/2,buffer.cols/2+1); + } + } + } + else + { + // pack to CCS each row + buffer.colRange(0,1).copyTo(dst.colRange(0,1)); + buffer.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols)); + } + return true; +} + +static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); + UMat src = _src.getUMat(); + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if (depth == CV_64F && !doubleSupport) + return false; + + int factors[34]; + int nf = DFTFactorize( src.cols, factors ); + + int n = 1; + int factor_index = 0; + + String radix_processing; + int min_radix = INT_MAX; + // 1. 2^n transforms + if ( (factors[factor_index] & 1) == 0 ) + { + for( ; n < factors[factor_index]; ) + { + int radix = 2; + if (8*n <= factors[0]) + radix = 8; + else if (4*n <= factors[0]) + radix = 4; + + radix_processing += format("fft_radix%d(smem,x,%d,%d);", radix, n, src.cols/radix); + min_radix = min(radix, min_radix); + n *= radix; + } + factor_index++; + } + + // 2. all the other transforms + for( ; factor_index < nf; factor_index++ ) + { + int radix = factors[factor_index]; + radix_processing += format("fft_radix%d(smem,x,%d,%d);", radix, n, src.cols/radix); + min_radix = min(radix, min_radix); + n *= radix; + } + + UMat dst = _dst.getUMat(); + + int thread_count = src.cols / min_radix; + size_t globalsize[2] = { thread_count, nonzero_rows }; + size_t localsize[2] = { thread_count, 1 }; + + String buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s", + src.cols, src.cols/thread_count, radix_processing.c_str()); + ocl::Kernel k("fft_multi_radix", ocl::core::fft_oclsrc, buildOptions); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst), thread_count, nonzero_rows); + return k.run(2, globalsize, localsize, false); +} + +static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + Size ssize = _src.size(); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if ( (!doubleSupport && depth == CV_64F) || + !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2)) + return false; + + // if is not a multiplication of prime numbers { 2, 3, 5 } + if (ssize.area() != getOptimalDFTSize(ssize.area())) + return false; + + UMat src = _src.getUMat(); + int complex_input = cn == 2 ? 1 : 0; + int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; + int real_input = cn == 1 ? 1 : 0; + int real_output = (flags & DFT_REAL_OUTPUT) != 0; + bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0; + bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1; + + // if output format is not specified + if (complex_output + real_output == 0) + { + if (!inv) + { + if (real_input) + real_output = 1; + else + complex_output = 1; + } + } + + if (complex_output) + { + //if (is1d) + // _dst.create(Size(src.cols/2+1, src.rows), CV_MAKE_TYPE(depth, 2)); + //else + _dst.create(src.size(), CV_MAKE_TYPE(depth, 2)); + } + else + _dst.create(src.size(), CV_MAKE_TYPE(depth, 1)); + UMat dst = _dst.getUMat(); + + bool inplace = src.u == dst.u; + //UMat buffer; + + //if (complex_input) + //{ + // if (inplace) + // buffer = src; + // else + // src.copyTo(buffer); + //} + //else + //{ + // if (!inv) + // { + // // in case real input convert it to complex + // buffer.create(src.size(), CV_MAKE_TYPE(depth, 2)); + // std::vector planes; + // planes.push_back(src); + // planes.push_back(UMat::zeros(src.size(), CV_32F)); + // merge(planes, buffer); + // } + // else + // { + // // TODO: unpack from CCS format + // } + //} + + if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) + nonzero_rows = _src.rows(); + + + if (!ocl_dft_C2C_row(src, dst, nonzero_rows, flags)) + return false; + + if ((flags & DFT_ROWS) == 0 && nonzero_rows > 1) + { + transpose(dst, dst); + if (!ocl_dft_C2C_row(dst, dst, dst.rows, flags)) + return false; + transpose(dst, dst); + } + + //if (complex_output) + //{ + // if (real_input && is1d) + // _dst.assign(buffer.colRange(0, buffer.cols/2+1)); + // else + // _dst.assign(buffer); + //} + //else + //{ + // if (!inv) + // ocl_packToCCS(buffer, _dst, flags); + // else + // { + // // copy real part to dst + // } + //} + return true; +} + +#endif + +} // namespace cv; + + + void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) { #ifdef HAVE_CLAMDFFT CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0, - ocl_dft(_src0, _dst, flags)) + ocl_dft_amdfft(_src0, _dst, flags)) +#endif + +#ifdef HAVE_OPENCL + CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2, + ocl_dft(_src0, _dst, flags, nonzero_rows)) #endif static DFTFunc dft_tbl[6] = @@ -2046,10 +2291,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) (DFTFunc)RealDFT_64f, (DFTFunc)CCSIDFT_64f }; - AutoBuffer buf; void *spec = 0; - Mat src0 = _src0.getMat(), src = src0; int prev_len = 0, stage = 0; bool inv = (flags & DFT_INVERSE) != 0; @@ -2058,6 +2301,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; int factors[34]; bool inplace_transform = false; + bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1; #ifdef USE_IPP_DFT AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; @@ -2066,7 +2310,10 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) - _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); + if (!is1d) + _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); + else + _dst.create( Size(src.cols/2+1, src.rows), CV_MAKETYPE(depth, 2) ); else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) _dst.create( src.size(), depth ); else diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl new file mode 100644 index 0000000000..7006b92e68 --- /dev/null +++ b/modules/core/src/opencl/fft.cl @@ -0,0 +1,297 @@ +__constant float PI = 3.14159265f; +__constant float SQRT_2 = 0.707106781188f; + +__constant float sin_120 = 0.866025403784f; +__constant float fft5_2 = 0.559016994374f; +__constant float fft5_3 = -0.951056516295f; +__constant float fft5_4 = -1.538841768587f; +__constant float fft5_5 = 0.363271264002f; + +inline float2 mul_float2(float2 a, float2 b){ + float2 res; + res.x = a.x * b.x - a.y * b.y; + res.y = a.x * b.y + a.y * b.x; + return res; +} + +inline float2 sincos_float2(float alpha) { + float cs, sn; + sn = sincos(alpha, &cs); // sincos + return (float2)(cs, sn); +} + +inline float2 twiddle(float2 a) { + return (float2)(a.y, -a.x); +} + +inline float2 square(float2 a) { + return (float2)(a.x * a.x - a.y * a.y, 2.0f * a.x * a.y); +} + +inline float2 square3(float2 a) { + return (float2)(a.x * a.x - a.y * a.y, 3.0f * a.x * a.y); +} + +inline float2 mul_p1q4(float2 a) { + return (float2)(SQRT_2) * (float2)(a.x + a.y, -a.x + a.y); +} + +inline float2 mul_p3q4(float2 a) { + return (float2)(SQRT_2) * (float2)(-a.x + a.y, -a.x - a.y); +} + +__attribute__((always_inline)) +void fft_radix2(__local float2* smem, const int x, const int block_size, const int t) +{ + const int k = x & (block_size - 1); + float2 in1, temp; + + if (x < t) + { + in1 = smem[x]; + float2 in2 = smem[x+t]; + + float theta = -PI * k / block_size; + float cs; + float sn = sincos(theta, &cs); + temp = (float2) (in2.x * cs - in2.y * sn, + in2.y * cs + in2.x * sn); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t) + { + const int dst_ind = (x << 1) - k; + + smem[dst_ind] = in1 + temp; + smem[dst_ind+block_size] = in1 - temp; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix4(__local float2* smem, const int x, const int block_size, const int t) +{ + const int k = x & (block_size - 1); + float2 b0, b1, b2, b3; + + if (x < t) + { + float theta = -PI * k / (2 * block_size); + + float2 tw = sincos_float2(theta); + float2 a0 = smem[x]; + float2 a1 = mul_float2(tw, smem[x+t]); + float2 a2 = smem[x + 2*t]; + float2 a3 = mul_float2(tw, smem[x + 3*t]); + tw = square(tw); + a2 = mul_float2(tw, a2); + a3 = mul_float2(tw, a3); + + b0 = a0 + a2; + b1 = a0 - a2; + b2 = a1 + a3; + b3 = twiddle(a1 - a3); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t) + { + const int dst_ind = ((x - k) << 2) + k; + smem[dst_ind] = b0 + b2; + smem[dst_ind + block_size] = b1 + b3; + smem[dst_ind + 2*block_size] = b0 - b2; + smem[dst_ind + 3*block_size] = b1 - b3; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix8(__local float2* smem, const int x, const int block_size, const int t) +{ + const int k = x % block_size; + float2 a0, a1, a2, a3, a4, a5, a6, a7; + + if (x < t) + { + float theta = -PI * k / (4 * block_size); + + float2 tw = sincos_float2(theta); // W + a0 = smem[x]; + a1 = mul_float2(tw, smem[x + t]); + a2 = smem[x + 2 * t]; + a3 = mul_float2(tw, smem[x + 3 * t]); + a4 = smem[x + 4 * t]; + a5 = mul_float2(tw, smem[x + 5 * t]); + a6 = smem[x + 6 * t]; + a7 = mul_float2(tw, smem[x + 7 * t]); + + tw = square(tw); // W^2 + a2 = mul_float2(tw, a2); + a3 = mul_float2(tw, a3); + a6 = mul_float2(tw, a6); + a7 = mul_float2(tw, a7); + tw = square(tw); // W^4 + a4 = mul_float2(tw, a4); + a5 = mul_float2(tw, a5); + a6 = mul_float2(tw, a6); + a7 = mul_float2(tw, a7); + + float2 b0 = a0 + a4; + float2 b4 = a0 - a4; + float2 b1 = a1 + a5; + float2 b5 = mul_p1q4(a1 - a5); + float2 b2 = a2 + a6; + float2 b6 = twiddle(a2 - a6); + float2 b3 = a3 + a7; + float2 b7 = mul_p3q4(a3 - a7); + + a0 = b0 + b2; + a2 = b0 - b2; + a1 = b1 + b3; + a3 = twiddle(b1 - b3); + a4 = b4 + b6; + a6 = b4 - b6; + a5 = b5 + b7; + a7 = twiddle(b5 - b7); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t) + { + const int dst_ind = ((x - k) << 3) + k; + __local float2* dst = smem + dst_ind; + + dst[0] = a0 + a1; + dst[block_size] = a4 + a5; + dst[2 * block_size] = a2 + a3; + dst[3 * block_size] = a6 + a7; + dst[4 * block_size] = a0 - a1; + dst[5 * block_size] = a4 - a5; + dst[6 * block_size] = a2 - a3; + dst[7 * block_size] = a6 - a7; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix3(__local float2* smem, const int x, const int block_size, const int t) +{ + const int k = x % block_size; + float2 a0, a1, a2, b0, b1; + + if (x < t) + { + const float theta = -PI * k * 2 / (3 * block_size); + + a0 = smem[x]; + a1 = mul_float2(sincos_float2(theta), smem[x+t]); + a2 = mul_float2(sincos_float2(2 * theta), smem[x+2*t]); + b1 = a1 + a2; + a2 = twiddle((float2)sin_120*(a1 - a2)); + b0 = a0 - (float2)(0.5f)*b1; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t) + { + const int dst_ind = ((x - k) * 3) + k; + + smem[dst_ind] = a0 + b1; + smem[dst_ind + block_size] = b0 + a2; + smem[dst_ind + 2*block_size] = b0 - a2; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix5(__local float2* smem, const int x, const int block_size, const int t) +{ + const int k = x % block_size; + float2 a0, a1, a2, a3, a4, b0, b1, b2, b5; + + if (x < t) + { + const float theta = -PI * k * 2 / (5 * block_size); + + a0 = smem[x]; + a1 = mul_float2(sincos_float2(theta), smem[x + t]); + a2 = mul_float2(sincos_float2(theta*2),smem[x+2*t]); + a3 = mul_float2(sincos_float2(theta*3),smem[x+3*t]); + a4 = mul_float2(sincos_float2(theta*4),smem[x+4*t]); + + b1 = a1 + a4; + a1 -= a4; + + a4 = a3 + a2; + a3 -= a2; + + b2 = b1 + a4; + b0 = a0 - (float2)0.25f * b2; + + b1 = (float2)fft5_2 * (b1 - a4); + a4 = -(float2)fft5_3 * (a1 + a3); + a4 = twiddle(a4); + + b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); + + a4.x += fft5_4 * a3.y; + a4.y -= fft5_4 * a3.x; + + a1 = b0 + b1; + b0 -= b1; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t) + { + const int dst_ind = ((x - k) * 5) + k; + __local float2* dst = smem + dst_ind; + + dst[0] = a0 + b2; + dst[block_size] = a1 + a4; + dst[2 * block_size] = b0 + b5; + dst[3 * block_size] = b0 - b5; + dst[4 * block_size] = a1 - a4; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__kernel void fft_multi_radix(__global const uchar* srcptr, int src_step, int src_offset, + __global uchar* dstptr, int dst_step, int dst_offset, + const int t, const int nz) +{ + const int x = get_global_id(0); + const int y = get_group_id(1); + + if (y < nz) + { + __local float2 smem[LOCAL_SIZE]; + __global const float2* src = (__global const float2*)(srcptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); + __global float2* dst = (__global float2*)(dstptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); + + const int block_size = LOCAL_SIZE/kercn; + #pragma unroll + for (int i=0; i +#include +#include using namespace cv; using namespace std; @@ -24,6 +26,31 @@ const char* keys = int main(int argc, const char ** argv) { + //int cols = 4; + //int rows = 768; + //srand(0); + //Mat input(Size(cols, rows), CV_32FC2); + //for (int i=0; i(j,i) = Vec2f((float) rand() / RAND_MAX, (float) rand() / RAND_MAX); + //Mat dst; + // + //UMat gpu_input, gpu_dst; + //input.copyTo(gpu_input); + //auto start = std::chrono::system_clock::now(); + //dft(input, dst, DFT_ROWS); + //auto cpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); + // + //start = std::chrono::system_clock::now(); + //dft(gpu_input, gpu_dst, DFT_ROWS); + //auto gpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); + + //double n = norm(dst, gpu_dst); + //cout << "norm = " << n << endl; + //cout << "CPU time: " << cpu_duration.count() << "ms" << endl; + //cout << "GPU time: " << gpu_duration.count() << "ms" << endl; + + help(); CommandLineParser parser(argc, argv, keys); string filename = parser.get(0); @@ -35,16 +62,46 @@ int main(int argc, const char ** argv) printf("Cannot read image file: %s\n", filename.c_str()); return -1; } - int M = getOptimalDFTSize( img.rows ); - int N = getOptimalDFTSize( img.cols ); - Mat padded; - copyMakeBorder(img, padded, 0, M - img.rows, 0, N - img.cols, BORDER_CONSTANT, Scalar::all(0)); - Mat planes[] = {Mat_(padded), Mat::zeros(padded.size(), CV_32F)}; - Mat complexImg; + Mat small_img = img(Rect(0,0,6,6)); + + int M = getOptimalDFTSize( small_img.rows ); + int N = getOptimalDFTSize( small_img.cols ); + Mat padded; + copyMakeBorder(small_img, padded, 0, M - small_img.rows, 0, N - small_img.cols, BORDER_CONSTANT, Scalar::all(0)); + + Mat planes[] = {Mat_(padded), Mat::ones(padded.size(), CV_32F)}; + Mat complexImg, complexImg1, complexInput; merge(planes, 2, complexImg); - dft(complexImg, complexImg); + Mat realInput; + padded.convertTo(realInput, CV_32F); + complexInput = complexImg; + //cout << complexImg << endl; + //dft(complexImg, complexImg, DFT_REAL_OUTPUT); + //cout << "Complex to Complex" << endl; + //cout << complexImg << endl; + cout << "Complex input" << endl << complexInput << endl; + cout << "Real input" << endl << realInput << endl; + + dft(complexInput, complexImg1, DFT_COMPLEX_OUTPUT); + cout << "Complex to Complex image: " << endl; + cout << endl << complexImg1 << endl; + + Mat realImg1; + dft(complexInput, realImg1, DFT_REAL_OUTPUT); + cout << "Complex to Real image: " << endl; + cout << endl << realImg1 << endl; + + Mat realOut; + dft(complexImg1, realOut, DFT_INVERSE | DFT_COMPLEX_OUTPUT); + cout << "Complex to Complex (inverse):" << endl; + cout << realOut << endl; + + Mat complexOut; + dft(realImg1, complexOut, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE); + cout << "Complex to Real (inverse):" << endl; + cout << complexOut << endl; // compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2)) split(complexImg, planes); From 0318d2772086d5aaf6ca07bc994e2bed0943b64b Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 10 Jul 2014 18:10:46 +0400 Subject: [PATCH 32/71] Enabled precalculated wave --- modules/core/include/opencv2/core/cvdef.h | 1 + modules/core/perf/opencl/perf_dxt.cpp | 4 +- modules/core/src/dxt.cpp | 127 +++++++----- modules/core/src/opencl/fft.cl | 234 ++++++++++------------ modules/core/test/ocl/test_dft.cpp | 8 +- 5 files changed, 187 insertions(+), 187 deletions(-) diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 8108a61e60..765c54cbe1 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -244,6 +244,7 @@ typedef signed char schar; /* fundamental constants */ #define CV_PI 3.1415926535897932384626433832795 +#define CV_TWO_PI 6.283185307179586476925286766559 #define CV_LOG2 0.69314718055994530941723212145818 /****************************************************************************************\ diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index 09d657d7a1..c0da96b373 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -57,8 +57,8 @@ namespace ocl { typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(/*OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, */Size(1024, 1024), Size(1024, 2048), Size(512, 512), Size(2048, 2048)), - Values((int)DFT_ROWS/*, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(1024, 2048), Size(512, 512), Size(2048, 2048)), + Values((int)DFT_ROWS, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 68256eaa07..de17f07b23 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2034,26 +2034,6 @@ namespace cv #ifdef HAVE_OPENCL -static bool fft_radixN(InputArray _src, OutputArray _dst, int radix, int block_size, int nonzero_rows, int flags) -{ - int N = _src.size().width; - if (N % radix) - return false; - - UMat src = _src.getUMat(); - UMat dst = _dst.getUMat(); - - int thread_count = N / radix; - size_t globalsize[2] = { thread_count, nonzero_rows }; - String kernel_name = format("fft_radix%d", radix); - ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, (flags & DFT_INVERSE) != 0 ? "-D INVERSE" : ""); - if (k.empty()) - return false; - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst), block_size, thread_count, nonzero_rows); - return k.run(2, globalsize, NULL, false); -} - static bool ocl_packToCCS(InputArray _buffer, OutputArray _dst, int flags) { UMat buffer = _buffer.getUMat(); @@ -2098,24 +2078,18 @@ static bool ocl_packToCCS(InputArray _buffer, OutputArray _dst, int flags) return true; } -static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) +static std::vector ocl_getRadixes(int cols, int& min_radix) { - int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - UMat src = _src.getUMat(); - - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if (depth == CV_64F && !doubleSupport) - return false; - int factors[34]; - int nf = DFTFactorize( src.cols, factors ); + int nf = DFTFactorize( cols, factors ); int n = 1; int factor_index = 0; - - String radix_processing; - int min_radix = INT_MAX; - // 1. 2^n transforms + + // choose radix order + std::vector radixes; + + // 2^n transforms if ( (factors[factor_index] & 1) == 0 ) { for( ; n < factors[factor_index]; ) @@ -2126,24 +2100,76 @@ static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, int nonzero_rows, else if (4*n <= factors[0]) radix = 4; - radix_processing += format("fft_radix%d(smem,x,%d,%d);", radix, n, src.cols/radix); - min_radix = min(radix, min_radix); + radixes.push_back(radix); + min_radix = min(min_radix, radix); n *= radix; } factor_index++; } - // 2. all the other transforms + // all the other transforms for( ; factor_index < nf; factor_index++ ) { - int radix = factors[factor_index]; - radix_processing += format("fft_radix%d(smem,x,%d,%d);", radix, n, src.cols/radix); - min_radix = min(radix, min_radix); + radixes.push_back(factors[factor_index]); + min_radix = min(min_radix, factors[factor_index]); + } + return radixes; +} + +static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, InputOutputArray _twiddles, int nonzero_rows, int flags) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); + UMat src = _src.getUMat(); + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if (depth == CV_64F && !doubleSupport) + return false; + + int min_radix = INT_MAX; + std::vector radixes = ocl_getRadixes(src.cols, min_radix); + + // generate string with radix calls + String radix_processing; + int n = 1, twiddle_index = 0; + for (size_t i=0; i(); + int ptr_index = 0; + + int n = 1; + for (size_t i=0; i _src.rows() ) nonzero_rows = _src.rows(); + UMat buffer; - if (!ocl_dft_C2C_row(src, dst, nonzero_rows, flags)) + if (!ocl_dft_C2C_row(src, dst, buffer, nonzero_rows, flags)) return false; if ((flags & DFT_ROWS) == 0 && nonzero_rows > 1) { transpose(dst, dst); - if (!ocl_dft_C2C_row(dst, dst, dst.rows, flags)) + if (!ocl_dft_C2C_row(dst, dst, buffer, dst.rows, flags)) return false; transpose(dst, dst); } - //if (complex_output) - //{ - // if (real_input && is1d) - // _dst.assign(buffer.colRange(0, buffer.cols/2+1)); - // else - // _dst.assign(buffer); - //} + if (complex_output) + { + if (real_input && is1d) + _dst.assign(dst.colRange(0, dst.cols/2+1)); + else + _dst.assign(dst); + } //else //{ // if (!inv) diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 7006b92e68..bd2b863c6c 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -7,55 +7,36 @@ __constant float fft5_3 = -0.951056516295f; __constant float fft5_4 = -1.538841768587f; __constant float fft5_5 = 0.363271264002f; -inline float2 mul_float2(float2 a, float2 b){ +__attribute__((always_inline)) +float2 mul_float2(float2 a, float2 b){ float2 res; res.x = a.x * b.x - a.y * b.y; res.y = a.x * b.y + a.y * b.x; return res; } -inline float2 sincos_float2(float alpha) { +__attribute__((always_inline)) +float2 sincos_float2(float alpha) { float cs, sn; sn = sincos(alpha, &cs); // sincos return (float2)(cs, sn); } -inline float2 twiddle(float2 a) { +__attribute__((always_inline)) +float2 twiddle(float2 a) { return (float2)(a.y, -a.x); } -inline float2 square(float2 a) { - return (float2)(a.x * a.x - a.y * a.y, 2.0f * a.x * a.y); -} - -inline float2 square3(float2 a) { - return (float2)(a.x * a.x - a.y * a.y, 3.0f * a.x * a.y); -} - -inline float2 mul_p1q4(float2 a) { - return (float2)(SQRT_2) * (float2)(a.x + a.y, -a.x + a.y); -} - -inline float2 mul_p3q4(float2 a) { - return (float2)(SQRT_2) * (float2)(-a.x + a.y, -a.x - a.y); -} - __attribute__((always_inline)) -void fft_radix2(__local float2* smem, const int x, const int block_size, const int t) +void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x & (block_size - 1); - float2 in1, temp; + float2 a0, a1; if (x < t) { - in1 = smem[x]; - float2 in2 = smem[x+t]; - - float theta = -PI * k / block_size; - float cs; - float sn = sincos(theta, &cs); - temp = (float2) (in2.x * cs - in2.y * sn, - in2.y * cs + in2.x * sn); + a0 = smem[x]; + a1 = mul_float2(twiddles[k],smem[x+t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -64,36 +45,25 @@ void fft_radix2(__local float2* smem, const int x, const int block_size, const i { const int dst_ind = (x << 1) - k; - smem[dst_ind] = in1 + temp; - smem[dst_ind+block_size] = in1 - temp; + smem[dst_ind] = a0 + a1; + smem[dst_ind+block_size] = a0 - a1; } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix4(__local float2* smem, const int x, const int block_size, const int t) +void fft_radix4(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x & (block_size - 1); - float2 b0, b1, b2, b3; + float2 a0, a1, a2, a3; if (x < t) { - float theta = -PI * k / (2 * block_size); - - float2 tw = sincos_float2(theta); - float2 a0 = smem[x]; - float2 a1 = mul_float2(tw, smem[x+t]); - float2 a2 = smem[x + 2*t]; - float2 a3 = mul_float2(tw, smem[x + 3*t]); - tw = square(tw); - a2 = mul_float2(tw, a2); - a3 = mul_float2(tw, a3); - - b0 = a0 + a2; - b1 = a0 - a2; - b2 = a1 + a3; - b3 = twiddle(a1 - a3); + a0 = smem[x]; + a1 = mul_float2(twiddles[3*k],smem[x+t]); + a2 = mul_float2(twiddles[3*k + 1],smem[x+2*t]); + a3 = mul_float2(twiddles[3*k + 2],smem[x+3*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -101,63 +71,62 @@ void fft_radix4(__local float2* smem, const int x, const int block_size, const i if (x < t) { const int dst_ind = ((x - k) << 2) + k; - smem[dst_ind] = b0 + b2; - smem[dst_ind + block_size] = b1 + b3; - smem[dst_ind + 2*block_size] = b0 - b2; - smem[dst_ind + 3*block_size] = b1 - b3; + + float2 b0 = a0 + a2; + a2 = a0 - a2; + float2 b1 = a1 + a3; + a3 = twiddle(a1 - a3); + + smem[dst_ind] = b0 + b1; + smem[dst_ind + block_size] = a2 + a3; + smem[dst_ind + 2*block_size] = b0 - b1; + smem[dst_ind + 3*block_size] = a2 - a3; } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix8(__local float2* smem, const int x, const int block_size, const int t) +void fft_radix8(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2, a3, a4, a5, a6, a7; if (x < t) { - float theta = -PI * k / (4 * block_size); + int tw_ind = block_size / 8; - float2 tw = sincos_float2(theta); // W a0 = smem[x]; - a1 = mul_float2(tw, smem[x + t]); - a2 = smem[x + 2 * t]; - a3 = mul_float2(tw, smem[x + 3 * t]); - a4 = smem[x + 4 * t]; - a5 = mul_float2(tw, smem[x + 5 * t]); - a6 = smem[x + 6 * t]; - a7 = mul_float2(tw, smem[x + 7 * t]); + a1 = mul_float2(twiddles[7*k], smem[x + t]); + a2 = mul_float2(twiddles[7*k+1],smem[x+2*t]); + a3 = mul_float2(twiddles[7*k+2],smem[x+3*t]); + a4 = mul_float2(twiddles[7*k+3],smem[x+4*t]); + a5 = mul_float2(twiddles[7*k+4],smem[x+5*t]); + a6 = mul_float2(twiddles[7*k+5],smem[x+6*t]); + a7 = mul_float2(twiddles[7*k+6],smem[x+7*t]); - tw = square(tw); // W^2 - a2 = mul_float2(tw, a2); - a3 = mul_float2(tw, a3); - a6 = mul_float2(tw, a6); - a7 = mul_float2(tw, a7); - tw = square(tw); // W^4 - a4 = mul_float2(tw, a4); - a5 = mul_float2(tw, a5); - a6 = mul_float2(tw, a6); - a7 = mul_float2(tw, a7); + float2 b0, b1, b6, b7; + + b0 = a0 + a4; + a4 = a0 - a4; + b1 = a1 + a5; + a5 = a1 - a5; + a5 = (float2)(SQRT_2) * (float2)(a5.x + a5.y, -a5.x + a5.y); + b6 = twiddle(a2 - a6); + a2 = a2 + a6; + b7 = a3 - a7; + b7 = (float2)(SQRT_2) * (float2)(-b7.x + b7.y, -b7.x - b7.y); + a3 = a3 + a7; - float2 b0 = a0 + a4; - float2 b4 = a0 - a4; - float2 b1 = a1 + a5; - float2 b5 = mul_p1q4(a1 - a5); - float2 b2 = a2 + a6; - float2 b6 = twiddle(a2 - a6); - float2 b3 = a3 + a7; - float2 b7 = mul_p3q4(a3 - a7); + a0 = b0 + a2; + a2 = b0 - a2; + a1 = b1 + a3; + a3 = twiddle(b1 - a3); + a6 = a4 - b6; + a4 = a4 + b6; + a7 = twiddle(a5 - b7); + a5 = a5 + b7; - a0 = b0 + b2; - a2 = b0 - b2; - a1 = b1 + b3; - a3 = twiddle(b1 - b3); - a4 = b4 + b6; - a6 = b4 - b6; - a5 = b5 + b7; - a7 = twiddle(b5 - b7); } barrier(CLK_LOCAL_MEM_FENCE); @@ -181,21 +150,16 @@ void fft_radix8(__local float2* smem, const int x, const int block_size, const i } __attribute__((always_inline)) -void fft_radix3(__local float2* smem, const int x, const int block_size, const int t) +void fft_radix3(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; - float2 a0, a1, a2, b0, b1; + float2 a0, a1, a2; if (x < t) { - const float theta = -PI * k * 2 / (3 * block_size); - a0 = smem[x]; - a1 = mul_float2(sincos_float2(theta), smem[x+t]); - a2 = mul_float2(sincos_float2(2 * theta), smem[x+2*t]); - b1 = a1 + a2; - a2 = twiddle((float2)sin_120*(a1 - a2)); - b0 = a0 - (float2)(0.5f)*b1; + a1 = mul_float2(twiddles[2*k], smem[x+t]); + a2 = mul_float2(twiddles[2*k+1], smem[x+2*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -204,6 +168,10 @@ void fft_radix3(__local float2* smem, const int x, const int block_size, const i { const int dst_ind = ((x - k) * 3) + k; + float2 b1 = a1 + a2; + a2 = twiddle((float2)sin_120*(a1 - a2)); + float2 b0 = a0 - (float2)(0.5f)*b1; + smem[dst_ind] = a0 + b1; smem[dst_ind + block_size] = b0 + a2; smem[dst_ind + 2*block_size] = b0 - a2; @@ -213,41 +181,20 @@ void fft_radix3(__local float2* smem, const int x, const int block_size, const i } __attribute__((always_inline)) -void fft_radix5(__local float2* smem, const int x, const int block_size, const int t) +void fft_radix5(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; - float2 a0, a1, a2, a3, a4, b0, b1, b2, b5; + float2 a0, a1, a2, a3, a4; if (x < t) { - const float theta = -PI * k * 2 / (5 * block_size); - + int tw_ind = block_size / 5; + a0 = smem[x]; - a1 = mul_float2(sincos_float2(theta), smem[x + t]); - a2 = mul_float2(sincos_float2(theta*2),smem[x+2*t]); - a3 = mul_float2(sincos_float2(theta*3),smem[x+3*t]); - a4 = mul_float2(sincos_float2(theta*4),smem[x+4*t]); - - b1 = a1 + a4; - a1 -= a4; - - a4 = a3 + a2; - a3 -= a2; - - b2 = b1 + a4; - b0 = a0 - (float2)0.25f * b2; - - b1 = (float2)fft5_2 * (b1 - a4); - a4 = -(float2)fft5_3 * (a1 + a3); - a4 = twiddle(a4); - - b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); - - a4.x += fft5_4 * a3.y; - a4.y -= fft5_4 * a3.x; - - a1 = b0 + b1; - b0 -= b1; + a1 = mul_float2(twiddles[4*k], smem[x + t]); + a2 = mul_float2(twiddles[4*k+1],smem[x+2*t]); + a3 = mul_float2(twiddles[4*k+2],smem[x+3*t]); + a4 = mul_float2(twiddles[4*k+3],smem[x+4*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -257,7 +204,28 @@ void fft_radix5(__local float2* smem, const int x, const int block_size, const i const int dst_ind = ((x - k) * 5) + k; __local float2* dst = smem + dst_ind; - dst[0] = a0 + b2; + float2 b0, b1, b5; + + b1 = a1 + a4; + a1 -= a4; + + a4 = a3 + a2; + a3 -= a2; + + a2 = b1 + a4; + b0 = a0 - (float2)0.25f * a2; + + b1 = (float2)fft5_2 * (b1 - a4); + a4 = (float2)fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); + b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); + + a4.x += fft5_4 * a3.y; + a4.y -= fft5_4 * a3.x; + + a1 = b0 + b1; + b0 -= b1; + + dst[0] = a0 + a2; dst[block_size] = a1 + a4; dst[2 * block_size] = b0 + b5; dst[3 * block_size] = b0 - b5; @@ -267,8 +235,9 @@ void fft_radix5(__local float2* smem, const int x, const int block_size, const i barrier(CLK_LOCAL_MEM_FENCE); } -__kernel void fft_multi_radix(__global const uchar* srcptr, int src_step, int src_offset, - __global uchar* dstptr, int dst_step, int dst_offset, +__kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int src_offset, + __global uchar* dst_ptr, int dst_step, int dst_offset, + __global const uchar* twiddles_ptr, int twiddles_step, int twiddles_offset, const int t, const int nz) { const int x = get_global_id(0); @@ -277,8 +246,9 @@ __kernel void fft_multi_radix(__global const uchar* srcptr, int src_step, int sr if (y < nz) { __local float2 smem[LOCAL_SIZE]; - __global const float2* src = (__global const float2*)(srcptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); - __global float2* dst = (__global float2*)(dstptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); + __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); + __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); + __global const float2* twiddles = (__global float2*) twiddles_ptr; const int block_size = LOCAL_SIZE/kercn; #pragma unroll @@ -292,6 +262,8 @@ __kernel void fft_multi_radix(__global const uchar* srcptr, int src_step, int sr // copy data to dst #pragma unroll for (int i=0; i Date: Fri, 11 Jul 2014 15:01:46 +0400 Subject: [PATCH 33/71] Added fftplan cache --- modules/core/src/dxt.cpp | 330 +++++++++++++++++------------ modules/core/src/opencl/fft.cl | 55 ++--- modules/core/test/ocl/test_dft.cpp | 4 +- 3 files changed, 225 insertions(+), 164 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index de17f07b23..c11b699503 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2034,50 +2034,6 @@ namespace cv #ifdef HAVE_OPENCL -static bool ocl_packToCCS(InputArray _buffer, OutputArray _dst, int flags) -{ - UMat buffer = _buffer.getUMat(); - UMat dst = _dst.getUMat(); - - buffer = buffer.reshape(1); - if ((flags & DFT_ROWS) == 0 && buffer.rows > 1) - { - // pack to CCS by rows - if (dst.cols > 2) - buffer.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2))); - - Mat dst_mat = dst.getMat(ACCESS_WRITE); - Mat buffer_mat = buffer.getMat(ACCESS_READ); - - dst_mat.at(0,0) = buffer_mat.at(0,0); - dst_mat.at(dst_mat.rows-1,0) = buffer_mat.at(buffer.rows/2,0); - for (int i=1; i(i,0) = buffer_mat.at((i+1)/2,0); - dst_mat.at(i+1,0) = buffer_mat.at((i+1)/2,1); - } - - if (dst_mat.cols % 2 == 0) - { - dst_mat.at(0,dst_mat.cols-1) = buffer_mat.at(0,buffer.cols/2); - dst_mat.at(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at(buffer.rows/2,buffer.cols/2); - - for (int i=1; i(i,dst_mat.cols-1) = buffer_mat.at((i+1)/2,buffer.cols/2); - dst_mat.at(i+1,dst_mat.cols-1) = buffer_mat.at((i+1)/2,buffer.cols/2+1); - } - } - } - else - { - // pack to CCS each row - buffer.colRange(0,1).copyTo(dst.colRange(0,1)); - buffer.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols)); - } - return true; -} - static std::vector ocl_getRadixes(int cols, int& min_radix) { int factors[34]; @@ -2116,72 +2072,175 @@ static std::vector ocl_getRadixes(int cols, int& min_radix) return radixes; } -static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, InputOutputArray _twiddles, int nonzero_rows, int flags) +struct OCL_FftPlan { - int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - UMat src = _src.getUMat(); + UMat twiddles; + String buildOptions; + int thread_count; - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if (depth == CV_64F && !doubleSupport) - return false; - - int min_radix = INT_MAX; - std::vector radixes = ocl_getRadixes(src.cols, min_radix); - - // generate string with radix calls - String radix_processing; - int n = 1, twiddle_index = 0; - for (size_t i=0; i radixes = ocl_getRadixes(dft_size, min_radix); + thread_count = dft_size / min_radix; + + // generate string with radix calls + String radix_processing; + int n = 1, twiddle_size = 0; + for (size_t i=0; i(); int ptr_index = 0; - int n = 1; + n = 1; for (size_t i=0; idft_size == dft_size) + { + return plan; + } + } + + OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size, flags); + planStorage.push_back(newPlan); + return newPlan; + } + + ~OCL_FftPlanCache() + { + for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) + delete (*i); + planStorage.clear(); + } + +protected: + OCL_FftPlanCache() : + planStorage() + { + } + + std::vector planStorage; +}; + +static bool ocl_packToCCS(InputArray _src, OutputArray _dst, int flags) +{ + UMat src = _src.getUMat(); + _dst.create(src.size(), CV_32F); + UMat dst = _dst.getUMat(); + + src = src.reshape(1); + if ((flags & DFT_ROWS) == 0 && src.rows > 1) + { + // pack to CCS by rows + if (dst.cols > 2) + src.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2))); + + Mat dst_mat = dst.getMat(ACCESS_WRITE); + Mat buffer_mat = src.getMat(ACCESS_READ); + + dst_mat.at(0,0) = buffer_mat.at(0,0); + dst_mat.at(dst_mat.rows-1,0) = buffer_mat.at(src.rows/2,0); + for (int i=1; i(i,0) = buffer_mat.at((i+1)/2,0); + dst_mat.at(i+1,0) = buffer_mat.at((i+1)/2,1); + } + + if (dst_mat.cols % 2 == 0) + { + dst_mat.at(0,dst_mat.cols-1) = buffer_mat.at(0,src.cols/2); + dst_mat.at(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at(src.rows/2,src.cols/2); + + for (int i=1; i(i,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2); + dst_mat.at(i+1,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2+1); + } + } + } + else + { + // pack to CCS each row + src.colRange(0,1).copyTo(dst.colRange(0,1)); + src.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols)); + } + return true; +} + +static bool ocl_dft_C2C_row(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if (depth == CV_64F && !doubleSupport) return false; - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, nonzero_rows); - return k.run(2, globalsize, localsize, false); + + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); + return plan->enqueueTransform(_src, _dst, nonzero_rows); } static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) @@ -2217,76 +2276,71 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } - if (complex_output) + UMat input, output; + if (complex_input) { - //if (is1d) - // _dst.create(Size(src.cols/2+1, src.rows), CV_MAKE_TYPE(depth, 2)); - //else - _dst.create(src.size(), CV_MAKE_TYPE(depth, 2)); + input = src; } else - _dst.create(src.size(), CV_MAKE_TYPE(depth, 1)); + { + if (!inv) + { + // in case real input convert it to complex + input.create(src.size(), CV_MAKE_TYPE(depth, 2)); + std::vector planes; + planes.push_back(src); + planes.push_back(UMat::zeros(src.size(), CV_32F)); + merge(planes, input); + } + else + { + // TODO: unpack from CCS format + } + } + + UMat dst = _dst.getUMat(); - - bool inplace = src.u == dst.u; - //UMat buffer; - - //if (complex_input) - //{ - // if (inplace) - // buffer = src; - // else - // src.copyTo(buffer); - //} - //else - //{ - // if (!inv) - // { - // // in case real input convert it to complex - // buffer.create(src.size(), CV_MAKE_TYPE(depth, 2)); - // std::vector planes; - // planes.push_back(src); - // planes.push_back(UMat::zeros(src.size(), CV_32F)); - // merge(planes, buffer); - // } - // else - // { - // // TODO: unpack from CCS format - // } - //} + if (complex_output) + { + if (real_input && is1d && !inv) + output.create(src.size(), CV_32FC2); + else + output = dst; + } else + { + output.create(src.size(), CV_32FC2); + } if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) nonzero_rows = _src.rows(); - UMat buffer; - - if (!ocl_dft_C2C_row(src, dst, buffer, nonzero_rows, flags)) + if (!ocl_dft_C2C_row(input, output, nonzero_rows, flags)) return false; if ((flags & DFT_ROWS) == 0 && nonzero_rows > 1) { - transpose(dst, dst); - if (!ocl_dft_C2C_row(dst, dst, buffer, dst.rows, flags)) + transpose(output, output); + if (!ocl_dft_C2C_row(output, output, output.rows, flags)) return false; - transpose(dst, dst); + transpose(output, output); } if (complex_output) { - if (real_input && is1d) - _dst.assign(dst.colRange(0, dst.cols/2+1)); + if (real_input && is1d && !inv) + _dst.assign(output.colRange(0, output.cols/2+1)); else - _dst.assign(dst); + _dst.assign(output); + } + else + { + if (!inv) + ocl_packToCCS(output, _dst, flags); + else + { + // copy real part to dst + } } - //else - //{ - // if (!inv) - // ocl_packToCCS(buffer, _dst, flags); - // else - // { - // // copy real part to dst - // } - //} return true; } diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index bd2b863c6c..34da79fafb 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -28,7 +28,7 @@ float2 twiddle(float2 a) { } __attribute__((always_inline)) -void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x & (block_size - 1); float2 a0, a1; @@ -53,17 +53,18 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int } __attribute__((always_inline)) -void fft_radix4(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x & (block_size - 1); float2 a0, a1, a2, a3; if (x < t) { + const int twiddle_block = block_size / 4; a0 = smem[x]; - a1 = mul_float2(twiddles[3*k],smem[x+t]); - a2 = mul_float2(twiddles[3*k + 1],smem[x+2*t]); - a3 = mul_float2(twiddles[3*k + 2],smem[x+3*t]); + a1 = mul_float2(twiddles[k],smem[x+t]); + a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); + a3 = mul_float2(twiddles[k + 2*block_size],smem[x+3*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -87,7 +88,7 @@ void fft_radix4(__local float2* smem, __global const float2* twiddles, const int } __attribute__((always_inline)) -void fft_radix8(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix8(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2, a3, a4, a5, a6, a7; @@ -97,13 +98,13 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int int tw_ind = block_size / 8; a0 = smem[x]; - a1 = mul_float2(twiddles[7*k], smem[x + t]); - a2 = mul_float2(twiddles[7*k+1],smem[x+2*t]); - a3 = mul_float2(twiddles[7*k+2],smem[x+3*t]); - a4 = mul_float2(twiddles[7*k+3],smem[x+4*t]); - a5 = mul_float2(twiddles[7*k+4],smem[x+5*t]); - a6 = mul_float2(twiddles[7*k+5],smem[x+6*t]); - a7 = mul_float2(twiddles[7*k+6],smem[x+7*t]); + a1 = mul_float2(twiddles[k], smem[x + t]); + a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); + a3 = mul_float2(twiddles[k+2*block_size],smem[x+3*t]); + a4 = mul_float2(twiddles[k+3*block_size],smem[x+4*t]); + a5 = mul_float2(twiddles[k+4*block_size],smem[x+5*t]); + a6 = mul_float2(twiddles[k+5*block_size],smem[x+6*t]); + a7 = mul_float2(twiddles[k+6*block_size],smem[x+7*t]); float2 b0, b1, b6, b7; @@ -150,16 +151,23 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int } __attribute__((always_inline)) -void fft_radix3(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix3(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2; if (x < t) { + //const int twiddle_block = block_size / 3; + //const float theta = -PI * k * 2 / (3 * block_size); + //float2 tw = sincos_float2(theta); + //printf("radix3 %d (%f,%f)(%f,%f)\n", k, tw.x, tw.y, twiddles[k].x, twiddles[k].y); + //tw = sincos_float2(2*theta); + //printf("radix3- %d %d (%f,%f)(%f,%f)\n", k, twiddle_block, tw.x, tw.y, twiddles[k+block_size].x, twiddles[k+block_size].y); + a0 = smem[x]; - a1 = mul_float2(twiddles[2*k], smem[x+t]); - a2 = mul_float2(twiddles[2*k+1], smem[x+2*t]); + a1 = mul_float2(twiddles[k], smem[x+t]); + a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -181,7 +189,7 @@ void fft_radix3(__local float2* smem, __global const float2* twiddles, const int } __attribute__((always_inline)) -void fft_radix5(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix5(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2, a3, a4; @@ -191,10 +199,10 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int int tw_ind = block_size / 5; a0 = smem[x]; - a1 = mul_float2(twiddles[4*k], smem[x + t]); - a2 = mul_float2(twiddles[4*k+1],smem[x+2*t]); - a3 = mul_float2(twiddles[4*k+2],smem[x+3*t]); - a4 = mul_float2(twiddles[4*k+3],smem[x+4*t]); + a1 = mul_float2(twiddles[k], smem[x + t]); + a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); + a3 = mul_float2(twiddles[k+2*block_size],smem[x+3*t]); + a4 = mul_float2(twiddles[k+3*block_size],smem[x+4*t]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -237,8 +245,7 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int src_offset, __global uchar* dst_ptr, int dst_step, int dst_offset, - __global const uchar* twiddles_ptr, int twiddles_step, int twiddles_offset, - const int t, const int nz) + __constant float2 * twiddles_ptr, const int t, const int nz) { const int x = get_global_id(0); const int y = get_group_id(1); @@ -248,7 +255,7 @@ __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int s __local float2 smem[LOCAL_SIZE]; __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); - __global const float2* twiddles = (__global float2*) twiddles_ptr; + __constant const float2* twiddles = (__constant float2*) twiddles_ptr; const int block_size = LOCAL_SIZE/kercn; #pragma unroll diff --git a/modules/core/test/ocl/test_dft.cpp b/modules/core/test/ocl/test_dft.cpp index 7a7a98852a..2529e949e0 100644 --- a/modules/core/test/ocl/test_dft.cpp +++ b/modules/core/test/ocl/test_dft.cpp @@ -181,9 +181,9 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(30, 20), +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(1920, 1), cv::Size(5, 4), cv::Size(30, 20), cv::Size(512, 1), cv::Size(1024, 1024)), - Values((OCL_FFT_TYPE) C2C/*, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) R2C/*, (OCL_FFT_TYPE) C2R*/), + Values(/*(OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2C,*/ (OCL_FFT_TYPE) R2R/*, (OCL_FFT_TYPE) C2R*/), Bool() // DFT_ROWS ) ); From ed07241f89849c4d91a0c78494ed9a5823c09342 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 15 Jul 2014 18:25:46 +0400 Subject: [PATCH 34/71] Completed all forward transforms. --- modules/core/perf/opencl/perf_arithm.cpp | 2 +- modules/core/perf/opencl/perf_dxt.cpp | 31 +++- modules/core/src/dxt.cpp | 139 +++++++++++------ modules/core/src/opencl/fft.cl | 184 ++++++++++++++++++----- modules/core/test/ocl/test_dft.cpp | 21 +-- 5 files changed, 276 insertions(+), 101 deletions(-) diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 17badca765..ba808b494f 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine( typedef Size_MatType TransposeFixture; OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine( - OCL_TEST_SIZES, OCL_TEST_TYPES_134)) + OCL_TEST_SIZES, Values(CV_8UC1, CV_32FC1, CV_8UC2, CV_32FC2, CV_8UC4, CV_32FC4))) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index c0da96b373..edeeda7f0c 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -54,21 +54,40 @@ namespace ocl { ///////////// dft //////////////////////// -typedef tuple DftParams; +enum OCL_FFT_TYPE +{ + R2R = 0, // real to real (CCS) + C2R = 1, // complex to real + R2C = 2, // real to complex + C2C = 3 // complex to complex +}; + +typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(1024, 2048), Size(512, 512), Size(2048, 2048)), +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), + Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), Values((int)DFT_ROWS, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); - const Size srcSize = get<0>(params); - const int flags = get<1>(params); + const int dft_type = get<0>(params); + const Size srcSize = get<1>(params); + int flags = get<2>(params); + + int in_cn, out_cn; + switch (dft_type) + { + case R2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 1; out_cn = 1; break; + case C2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 2; out_cn = 2; break; + case R2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 1; out_cn = 2; break; + case C2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 2; out_cn = 2; break; + } - UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2); + UMat src(srcSize, CV_MAKE_TYPE(CV_32F, in_cn)), dst(srcSize, CV_MAKE_TYPE(CV_32F, out_cn)); declare.in(src, WARMUP_RNG).out(dst); - OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT); + OCL_TEST_CYCLE() cv::dft(src, dst, flags); SANITY_CHECK(dst, 1e-3); } diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index c11b699503..a3df694364 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2034,7 +2034,7 @@ namespace cv #ifdef HAVE_OPENCL -static std::vector ocl_getRadixes(int cols, int& min_radix) +static std::vector ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) { int factors[34]; int nf = DFTFactorize( cols, factors ); @@ -2042,9 +2042,6 @@ static std::vector ocl_getRadixes(int cols, int& min_radix) int n = 1; int factor_index = 0; - // choose radix order - std::vector radixes; - // 2^n transforms if ( (factors[factor_index] & 1) == 0 ) { @@ -2057,7 +2054,10 @@ static std::vector ocl_getRadixes(int cols, int& min_radix) radix = 4; radixes.push_back(radix); - min_radix = min(min_radix, radix); + if (radix == 2 && cols % 4 == 0) + min_radix = min(min_radix, 2*radix); + else + min_radix = min(min_radix, radix); n *= radix; } factor_index++; @@ -2067,7 +2067,10 @@ static std::vector ocl_getRadixes(int cols, int& min_radix) for( ; factor_index < nf; factor_index++ ) { radixes.push_back(factors[factor_index]); - min_radix = min(min_radix, factors[factor_index]); + if (factors[factor_index] == 3 && cols % 6 == 0) + min_radix = min(min_radix, 2*factors[factor_index]); + else + min_radix = min(min_radix, factors[factor_index]); } return radixes; } @@ -2084,8 +2087,16 @@ struct OCL_FftPlan OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags) { int min_radix = INT_MAX; - std::vector radixes = ocl_getRadixes(dft_size, min_radix); - thread_count = dft_size / min_radix; + std::vector radixes, blocks; + ocl_getRadixes(dft_size, radixes, blocks, min_radix); + thread_count = (dft_size + min_radix-1) / min_radix; + + printf("cols: %d - ", dft_size); + for (int i=0; i 0; - if (depth == CV_64F && !doubleSupport) - return false; - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); - return plan->enqueueTransform(_src, _dst, nonzero_rows); + return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, true); +} + +static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int flags) +{ + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); + return plan->enqueueTransform(_src, _dst, _src.cols(), flags, false); } static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) @@ -2262,7 +2295,10 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro int real_input = cn == 1 ? 1 : 0; int real_output = (flags & DFT_REAL_OUTPUT) != 0; bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0; - bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1; + + if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) + nonzero_rows = _src.rows(); + bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1; // if output format is not specified if (complex_output + real_output == 0) @@ -2276,6 +2312,19 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } + // Forward Complex to CCS not supported + if (complex_input && real_output && !inv) + { + real_output = 0; + complex_output = 1; + } + // Inverse CCS to Complex not supported + if (real_input && complex_output && inv) + { + complex_output = 0; + real_output = 1; + } + UMat input, output; if (complex_input) { @@ -2285,12 +2334,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro { if (!inv) { - // in case real input convert it to complex - input.create(src.size(), CV_MAKE_TYPE(depth, 2)); - std::vector planes; - planes.push_back(src); - planes.push_back(UMat::zeros(src.size(), CV_32F)); - merge(planes, input); + input = src; } else { @@ -2298,31 +2342,34 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } - - UMat dst = _dst.getUMat(); if (complex_output) { if (real_input && is1d && !inv) output.create(src.size(), CV_32FC2); else - output = dst; + { + _dst.create(src.size(), CV_32FC2); + output = _dst.getUMat(); + } } else { - output.create(src.size(), CV_32FC2); + // CCS + if (is1d) + { + _dst.create(src.size(), CV_32FC1); + output = _dst.getUMat(); + } + else + output.create(src.size(), CV_32FC2); } - if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) - nonzero_rows = _src.rows(); - - if (!ocl_dft_C2C_row(input, output, nonzero_rows, flags)) + if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags)) return false; - if ((flags & DFT_ROWS) == 0 && nonzero_rows > 1) + if (!is1d) { - transpose(output, output); - if (!ocl_dft_C2C_row(output, output, output.rows, flags)) + if (!ocl_dft_C2C_cols(output, output, flags)) return false; - transpose(output, output); } if (complex_output) @@ -2335,12 +2382,18 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro else { if (!inv) - ocl_packToCCS(output, _dst, flags); + { + if (!is1d) + ocl_packToCCS(output, _dst, flags); + else + _dst.assign(output); + } else { // copy real part to dst } } + //printf("OCL!\n"); return true; } diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 34da79fafb..7803cdbc21 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -1,25 +1,13 @@ -__constant float PI = 3.14159265f; -__constant float SQRT_2 = 0.707106781188f; - -__constant float sin_120 = 0.866025403784f; -__constant float fft5_2 = 0.559016994374f; -__constant float fft5_3 = -0.951056516295f; -__constant float fft5_4 = -1.538841768587f; -__constant float fft5_5 = 0.363271264002f; +#define SQRT_2 0.707106781188f +#define sin_120 0.866025403784f +#define fft5_2 0.559016994374f +#define fft5_3 -0.951056516295f +#define fft5_4 -1.538841768587f +#define fft5_5 0.363271264002f __attribute__((always_inline)) -float2 mul_float2(float2 a, float2 b){ - float2 res; - res.x = a.x * b.x - a.y * b.y; - res.y = a.x * b.y + a.y * b.x; - return res; -} - -__attribute__((always_inline)) -float2 sincos_float2(float alpha) { - float cs, sn; - sn = sincos(alpha, &cs); // sincos - return (float2)(cs, sn); +float2 mul_float2(float2 a, float2 b) { + return (float2)(fma(a.x, b.x, -a.y * b.y), fma(a.x, b.y, a.y * b.x)); } __attribute__((always_inline)) @@ -52,6 +40,38 @@ void fft_radix2(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int k1 = x & (block_size - 1); + const int x2 = x + (t+1)/2; + const int k2 = x2 & (block_size - 1); + float2 a0, a1, a2, a3; + + if (x < (t+1)/2) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k1],smem[x+t]); + a2 = smem[x2]; + a3 = mul_float2(twiddles[k2],smem[x2+t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < (t+1)/2) + { + int dst_ind = (x << 1) - k1; + smem[dst_ind] = a0 + a1; + smem[dst_ind+block_size] = a0 - a1; + + dst_ind = (x2 << 1) - k2; + smem[dst_ind] = a2 + a3; + smem[dst_ind+block_size] = a2 - a3; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + __attribute__((always_inline)) void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { @@ -158,13 +178,6 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i if (x < t) { - //const int twiddle_block = block_size / 3; - //const float theta = -PI * k * 2 / (3 * block_size); - //float2 tw = sincos_float2(theta); - //printf("radix3 %d (%f,%f)(%f,%f)\n", k, tw.x, tw.y, twiddles[k].x, twiddles[k].y); - //tw = sincos_float2(2*theta); - //printf("radix3- %d %d (%f,%f)(%f,%f)\n", k, twiddle_block, tw.x, tw.y, twiddles[k+block_size].x, twiddles[k+block_size].y); - a0 = smem[x]; a1 = mul_float2(twiddles[k], smem[x+t]); a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); @@ -177,7 +190,7 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i const int dst_ind = ((x - k) * 3) + k; float2 b1 = a1 + a2; - a2 = twiddle((float2)sin_120*(a1 - a2)); + a2 = twiddle(sin_120*(a1 - a2)); float2 b0 = a0 - (float2)(0.5f)*b1; smem[dst_ind] = a0 + b1; @@ -188,6 +201,53 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int k = x % block_size; + const int x2 = x + (t+1)/2; + const int k2 = x2 % block_size; + float2 a0, a1, a2, a3, a4, a5; + + if (x < (t+1)/2) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k], smem[x+t]); + a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); + + a3 = smem[x2]; + a4 = mul_float2(twiddles[k2], smem[x2+t]); + a5 = mul_float2(twiddles[k2+block_size], smem[x2+2*t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < (t+1)/2) + { + int dst_ind = ((x - k) * 3) + k; + + float2 b1 = a1 + a2; + a2 = twiddle(sin_120*(a1 - a2)); + float2 b0 = a0 - (float2)(0.5f)*b1; + + smem[dst_ind] = a0 + b1; + smem[dst_ind + block_size] = b0 + a2; + smem[dst_ind + 2*block_size] = b0 - a2; + + dst_ind = ((x2 - k2) * 3) + k2; + + b1 = a4 + a5; + a5 = twiddle(sin_120*(a4 - a5)); + b0 = a3 - (float2)(0.5f)*b1; + + smem[dst_ind] = a3 + b1; + smem[dst_ind + block_size] = b0 + a5; + smem[dst_ind + 2*block_size] = b0 - a5; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + __attribute__((always_inline)) void fft_radix5(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { @@ -196,8 +256,6 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i if (x < t) { - int tw_ind = block_size / 5; - a0 = smem[x]; a1 = mul_float2(twiddles[k], smem[x + t]); a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); @@ -223,8 +281,8 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i a2 = b1 + a4; b0 = a0 - (float2)0.25f * a2; - b1 = (float2)fft5_2 * (b1 - a4); - a4 = (float2)fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); + b1 = fft5_2 * (b1 - a4); + a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); a4.x += fft5_4 * a3.y; @@ -243,9 +301,9 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } -__kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int src_offset, - __global uchar* dst_ptr, int dst_step, int dst_offset, - __constant float2 * twiddles_ptr, const int t, const int nz) +__kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __constant float2 * twiddles_ptr, const int t, const int nz) { const int x = get_global_id(0); const int y = get_group_id(1); @@ -253,14 +311,60 @@ __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int s if (y < nz) { __local float2 smem[LOCAL_SIZE]; - __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); - __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); __constant const float2* twiddles = (__constant float2*) twiddles_ptr; - + const int ind = x; const int block_size = LOCAL_SIZE/kercn; + +#ifndef REAL_INPUT + __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); #pragma unroll for (int i=0; i Date: Thu, 17 Jul 2014 12:31:41 +0400 Subject: [PATCH 35/71] Added packing to CCS format --- modules/core/src/dxt.cpp | 117 +++++++---------------------- modules/core/src/opencl/fft.cl | 48 ++++++++++-- modules/core/test/ocl/test_dft.cpp | 18 +++-- 3 files changed, 82 insertions(+), 101 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index a3df694364..69ec2c9efe 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2083,20 +2083,19 @@ struct OCL_FftPlan int dft_size; int flags; - - OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags) + bool status; + OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true) { int min_radix = INT_MAX; std::vector radixes, blocks; ocl_getRadixes(dft_size, radixes, blocks, min_radix); thread_count = (dft_size + min_radix-1) / min_radix; - printf("cols: %d - ", dft_size); - for (int i=0; i ocl::Device::getDefault().maxWorkGroupSize()) { - printf("%d ", radixes[i]); + status = false; + return; } - printf("min radix - %d\n", min_radix); // generate string with radix calls String radix_processing; @@ -2142,6 +2141,9 @@ struct OCL_FftPlan bool enqueueTransform(InputArray _src, OutputArray _dst, int dft_size, int flags, bool rows = true) const { + if (!status) + return false; + UMat src = _src.getUMat(); UMat dst = _dst.getUMat(); @@ -2162,11 +2164,14 @@ struct OCL_FftPlan kernel_name = "fft_multi_radix_cols"; } + bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; String options = buildOptions; if (src.channels() == 1) options += " -D REAL_INPUT"; if (dst.channels() == 1) options += " -D CCS_OUTPUT"; + if ((is1d && src.channels() == 1) || (rows && (flags & DFT_REAL_OUTPUT))) + options += " -D NO_CONJUGATE"; ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); if (k.empty()) @@ -2219,61 +2224,16 @@ protected: std::vector planStorage; }; -static bool ocl_packToCCS(InputArray _src, OutputArray _dst, int flags) -{ - UMat src = _src.getUMat(); - _dst.create(src.size(), CV_32F); - UMat dst = _dst.getUMat(); - - src = src.reshape(1); - if ((flags & DFT_ROWS) == 0 && src.rows > 1) - { - // pack to CCS by rows - if (dst.cols > 2) - src.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2))); - - Mat dst_mat = dst.getMat(ACCESS_WRITE); - Mat buffer_mat = src.getMat(ACCESS_READ); - - dst_mat.at(0,0) = buffer_mat.at(0,0); - dst_mat.at(dst_mat.rows-1,0) = buffer_mat.at(src.rows/2,0); - for (int i=1; i(i,0) = buffer_mat.at((i+1)/2,0); - dst_mat.at(i+1,0) = buffer_mat.at((i+1)/2,1); - } - - if (dst_mat.cols % 2 == 0) - { - dst_mat.at(0,dst_mat.cols-1) = buffer_mat.at(0,src.cols/2); - dst_mat.at(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at(src.rows/2,src.cols/2); - - for (int i=1; i(i,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2); - dst_mat.at(i+1,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2+1); - } - } - } - else - { - // pack to CCS each row - src.colRange(0,1).copyTo(dst.colRange(0,1)); - src.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols)); - } - return true; -} - static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, true); } -static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int flags) +static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); - return plan->enqueueTransform(_src, _dst, _src.cols(), flags, false); + return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, false); } static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) @@ -2315,6 +2275,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro // Forward Complex to CCS not supported if (complex_input && real_output && !inv) { + flags ^= DFT_REAL_OUTPUT; + flags |= DFT_COMPLEX_OUTPUT; real_output = 0; complex_output = 1; } @@ -2344,23 +2306,21 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (complex_output) { - if (real_input && is1d && !inv) - output.create(src.size(), CV_32FC2); - else - { - _dst.create(src.size(), CV_32FC2); - output = _dst.getUMat(); - } - } else + _dst.create(src.size(), CV_32FC2); + output = _dst.getUMat(); + } + else { - // CCS if (is1d) { _dst.create(src.size(), CV_32FC1); output = _dst.getUMat(); } else + { + _dst.create(src.size(), CV_32FC1); output.create(src.size(), CV_32FC2); + } } if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags)) @@ -2368,32 +2328,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (!is1d) { - if (!ocl_dft_C2C_cols(output, output, flags)) + int nonzero_cols = real_input && real_output ? output.cols/2 + 1 : output.cols; + if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags)) return false; - } - - if (complex_output) + } else { - if (real_input && is1d && !inv) - _dst.assign(output.colRange(0, output.cols/2+1)); - else - _dst.assign(output); + _dst.assign(output); } - else - { - if (!inv) - { - if (!is1d) - ocl_packToCCS(output, _dst, flags); - else - _dst.assign(output); - } - else - { - // copy real part to dst - } - } - //printf("OCL!\n"); return true; } @@ -2435,7 +2376,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; int factors[34]; bool inplace_transform = false; - bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1; #ifdef USE_IPP_DFT AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; @@ -2444,10 +2384,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) - if (!is1d) - _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); - else - _dst.create( Size(src.cols/2+1, src.rows), CV_MAKETYPE(depth, 2) ); + _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) _dst.create( src.size(), depth ); else diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 7803cdbc21..a778d59f22 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -331,10 +331,17 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, RADIX_PROCESS; #ifndef CCS_OUTPUT - __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); +#ifdef NO_CONJUGATE + // copy result without complex conjugate + const int cols = dst_cols/2 + 1; +#else + const int cols = dst_cols; +#endif + + __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); #pragma unroll - for (int i=0; i(df) << std::endl; double eps = src.size().area() * 1e-4; EXPECT_MAT_NEAR(dst, udst, eps); @@ -181,9 +189,9 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 1), cv::Size(5, 8), cv::Size(30, 20), +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 4), cv::Size(5, 8), cv::Size(6, 6), cv::Size(512, 1), cv::Size(1280, 768)), - Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R/*, (OCL_FFT_TYPE) C2R*/), + Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), Bool(), // DFT_ROWS Bool() // inplace ) From b17bf031f696f1bc89b871466360d675f5a1b3fb Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 17 Jul 2014 16:20:04 +0400 Subject: [PATCH 36/71] Added DFT_SCALE for forward transforms --- modules/core/src/dxt.cpp | 13 ++++-- modules/core/src/opencl/fft.cl | 31 ++++++++++---- modules/core/test/ocl/test_dft.cpp | 20 ++++----- samples/cpp/dft.cpp | 69 +++--------------------------- 4 files changed, 48 insertions(+), 85 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 69ec2c9efe..449e19db4b 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2151,27 +2151,34 @@ struct OCL_FftPlan size_t localsize[2]; String kernel_name; + bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; + String options = buildOptions; + if (rows) { globalsize[0] = thread_count; globalsize[1] = dft_size; localsize[0] = thread_count; localsize[1] = 1; kernel_name = "fft_multi_radix_rows"; + if (is1d && (flags & DFT_SCALE)) + options += " -D DFT_SCALE"; } else { globalsize[0] = dft_size; globalsize[1] = thread_count; localsize[0] = 1; localsize[1] = thread_count; kernel_name = "fft_multi_radix_cols"; + if (flags & DFT_SCALE) + options += " -D DFT_SCALE"; } - - bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; - String options = buildOptions; + if (src.channels() == 1) options += " -D REAL_INPUT"; if (dst.channels() == 1) options += " -D CCS_OUTPUT"; if ((is1d && src.channels() == 1) || (rows && (flags & DFT_REAL_OUTPUT))) options += " -D NO_CONJUGATE"; + if (is1d) + options += " -D IS_1D"; ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); if (k.empty()) diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index a778d59f22..d59e0d9b48 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -301,6 +301,12 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +#ifdef DFT_SCALE +#define VAL(x, scale) x*scale +#else +#define VAL(x, scale) x +#endif + __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, __constant float2 * twiddles_ptr, const int t, const int nz) @@ -314,6 +320,11 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, __constant const float2* twiddles = (__constant float2*) twiddles_ptr; const int ind = x; const int block_size = LOCAL_SIZE/kercn; +#ifdef IS_1D + float scale = 1.f/dst_cols; +#else + float scale = 1.f/(dst_cols*dst_rows); +#endif #ifndef REAL_INPUT __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); @@ -341,15 +352,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); #pragma unroll for (int i=x; i -#include -#include using namespace cv; using namespace std; @@ -26,31 +24,6 @@ const char* keys = int main(int argc, const char ** argv) { - //int cols = 4; - //int rows = 768; - //srand(0); - //Mat input(Size(cols, rows), CV_32FC2); - //for (int i=0; i(j,i) = Vec2f((float) rand() / RAND_MAX, (float) rand() / RAND_MAX); - //Mat dst; - // - //UMat gpu_input, gpu_dst; - //input.copyTo(gpu_input); - //auto start = std::chrono::system_clock::now(); - //dft(input, dst, DFT_ROWS); - //auto cpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); - // - //start = std::chrono::system_clock::now(); - //dft(gpu_input, gpu_dst, DFT_ROWS); - //auto gpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); - - //double n = norm(dst, gpu_dst); - //cout << "norm = " << n << endl; - //cout << "CPU time: " << cpu_duration.count() << "ms" << endl; - //cout << "GPU time: " << gpu_duration.count() << "ms" << endl; - - help(); CommandLineParser parser(argc, argv, keys); string filename = parser.get(0); @@ -62,46 +35,16 @@ int main(int argc, const char ** argv) printf("Cannot read image file: %s\n", filename.c_str()); return -1; } - - Mat small_img = img(Rect(0,0,6,6)); - - int M = getOptimalDFTSize( small_img.rows ); - int N = getOptimalDFTSize( small_img.cols ); + int M = getOptimalDFTSize( img.rows ); + int N = getOptimalDFTSize( img.cols ); Mat padded; - copyMakeBorder(small_img, padded, 0, M - small_img.rows, 0, N - small_img.cols, BORDER_CONSTANT, Scalar::all(0)); + copyMakeBorder(img, padded, 0, M - img.rows, 0, N - img.cols, BORDER_CONSTANT, Scalar::all(0)); - Mat planes[] = {Mat_(padded), Mat::ones(padded.size(), CV_32F)}; - Mat complexImg, complexImg1, complexInput; + Mat planes[] = {Mat_(padded), Mat::zeros(padded.size(), CV_32F)}; + Mat complexImg; merge(planes, 2, complexImg); - Mat realInput; - padded.convertTo(realInput, CV_32F); - complexInput = complexImg; - //cout << complexImg << endl; - //dft(complexImg, complexImg, DFT_REAL_OUTPUT); - //cout << "Complex to Complex" << endl; - //cout << complexImg << endl; - cout << "Complex input" << endl << complexInput << endl; - cout << "Real input" << endl << realInput << endl; - - dft(complexInput, complexImg1, DFT_COMPLEX_OUTPUT); - cout << "Complex to Complex image: " << endl; - cout << endl << complexImg1 << endl; - - Mat realImg1; - dft(complexInput, realImg1, DFT_REAL_OUTPUT); - cout << "Complex to Real image: " << endl; - cout << endl << realImg1 << endl; - - Mat realOut; - dft(complexImg1, realOut, DFT_INVERSE | DFT_COMPLEX_OUTPUT); - cout << "Complex to Complex (inverse):" << endl; - cout << realOut << endl; - - Mat complexOut; - dft(realImg1, complexOut, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE); - cout << "Complex to Real (inverse):" << endl; - cout << complexOut << endl; + dft(complexImg, complexImg); // compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2)) split(complexImg, planes); From 2b9e5560556541b5331e41413be17c0cca98af52 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Fri, 18 Jul 2014 13:41:57 +0400 Subject: [PATCH 37/71] Added Elena's changes with implemented DFT_INVERSE C2C mode. --- modules/core/perf/opencl/perf_dxt.cpp | 2 +- modules/core/src/dxt.cpp | 40 +++------ modules/core/src/opencl/fft.cl | 113 ++++++++++++++++++++++++++ modules/core/test/ocl/test_dft.cpp | 11 +-- 4 files changed, 131 insertions(+), 35 deletions(-) diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index edeeda7f0c..3980a191fa 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -67,7 +67,7 @@ typedef TestBaseWithParam DftFixture; OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), - Values((int)DFT_ROWS, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, + Values((int)DFT_ROWS, (int) 0, (int)DFT_SCALE/*, (int)DFT_INVERSE, (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 449e19db4b..879a70613f 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2129,8 +2129,8 @@ struct OCL_FftPlan for (int k=0; k<(n/radix); k++) { - ptr[ptr_index++] = cos(k*theta); - ptr[ptr_index++] = sin(k*theta); + ptr[ptr_index++] = (float) cos(k*theta); + ptr[ptr_index++] = (float) sin(k*theta); } } } @@ -2152,13 +2152,14 @@ struct OCL_FftPlan String kernel_name; bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; + bool inv = (flags & DFT_INVERSE) != 0; String options = buildOptions; if (rows) { globalsize[0] = thread_count; globalsize[1] = dft_size; localsize[0] = thread_count; localsize[1] = 1; - kernel_name = "fft_multi_radix_rows"; + kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows"; if (is1d && (flags & DFT_SCALE)) options += " -D DFT_SCALE"; } @@ -2166,7 +2167,7 @@ struct OCL_FftPlan { globalsize[0] = dft_size; globalsize[1] = thread_count; localsize[0] = 1; localsize[1] = thread_count; - kernel_name = "fft_multi_radix_cols"; + kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols"; if (flags & DFT_SCALE) options += " -D DFT_SCALE"; } @@ -2270,13 +2271,10 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro // if output format is not specified if (complex_output + real_output == 0) { - if (!inv) - { - if (real_input) - real_output = 1; - else - complex_output = 1; - } + if (real_input) + real_output = 1; + else + complex_output = 1; } // Forward Complex to CCS not supported @@ -2294,23 +2292,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro real_output = 1; } - UMat input, output; - if (complex_input) - { - input = src; - } - else - { - if (!inv) - { - input = src; - } - else - { - // TODO: unpack from CCS format - } - } - + UMat output; if (complex_output) { _dst.create(src.size(), CV_32FC2); @@ -2330,7 +2312,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } - if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags)) + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags)) return false; if (!is1d) diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index d59e0d9b48..8aecfc056a 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -424,4 +424,117 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, } #endif } +} + +__kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __constant float2 * twiddles_ptr, const int t, const int nz) +{ + const int x = get_global_id(0); + const int y = get_group_id(1); + + if (y < nz) + { + __local float2 smem[LOCAL_SIZE]; + __constant const float2* twiddles = (__constant float2*) twiddles_ptr; + const int ind = x; + const int block_size = LOCAL_SIZE/kercn; +#ifdef IS_1D + float scale = 1.f/dst_cols; +#else + float scale = 1.f/(dst_cols*dst_rows); +#endif + +#ifndef REAL_INPUT + __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); + #pragma unroll + for (int i=0; i Date: Mon, 21 Jul 2014 10:42:56 +0400 Subject: [PATCH 38/71] Added multi-block scheme --- modules/core/perf/opencl/perf_dxt.cpp | 6 +- modules/core/src/dxt.cpp | 49 +++-- modules/core/src/opencl/fft.cl | 282 +++++++++++++++++++++++++- modules/core/test/ocl/test_dft.cpp | 14 +- 4 files changed, 321 insertions(+), 30 deletions(-) diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index 3980a191fa..f4b6b49a9f 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -65,10 +65,10 @@ enum OCL_FFT_TYPE typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C/*, R2R, C2R, R2C*/), Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), - Values((int)DFT_ROWS, (int) 0, (int)DFT_SCALE/*, (int)DFT_INVERSE, - (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) + Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, + /*(int)DFT_INVERSE | DFT_SCALE,*/ (int)DFT_ROWS | DFT_INVERSE))) { const DftParams params = GetParam(); const int dft_type = get<0>(params); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 879a70613f..d5b1cb383d 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2041,23 +2041,33 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: int n = 1; int factor_index = 0; + min_radix = INT_MAX; // 2^n transforms if ( (factors[factor_index] & 1) == 0 ) { for( ; n < factors[factor_index]; ) { - int radix = 2; + int radix = 2, block = 1; if (8*n <= factors[0]) radix = 8; else if (4*n <= factors[0]) + { radix = 4; + if (cols % 8 == 0) + block = 2; + } + else + { + if (cols % 8 == 0) + block = 4; + else if (cols % 4 == 0) + block = 2; + } radixes.push_back(radix); - if (radix == 2 && cols % 4 == 0) - min_radix = min(min_radix, 2*radix); - else - min_radix = min(min_radix, radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); n *= radix; } factor_index++; @@ -2066,11 +2076,22 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: // all the other transforms for( ; factor_index < nf; factor_index++ ) { - radixes.push_back(factors[factor_index]); - if (factors[factor_index] == 3 && cols % 6 == 0) - min_radix = min(min_radix, 2*factors[factor_index]); - else - min_radix = min(min_radix, factors[factor_index]); + int radix = factors[factor_index], block = 1; + if (radix == 3) + { + if (cols % 12 == 0) + block = 4; + else if (cols % 6 == 0) + block = 2; + } + else if (radix == 5) + { + if (cols % 10 == 0) + block = 2; + } + radixes.push_back(radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); } return radixes; } @@ -2086,7 +2107,7 @@ struct OCL_FftPlan bool status; OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true) { - int min_radix = INT_MAX; + int min_radix; std::vector radixes, blocks; ocl_getRadixes(dft_size, radixes, blocks, min_radix); thread_count = (dft_size + min_radix-1) / min_radix; @@ -2102,9 +2123,9 @@ struct OCL_FftPlan int n = 1, twiddle_size = 0; for (size_t i=0; i 1) + radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix); else radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix); twiddle_size += (radix-1)*n; diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 8aecfc056a..fdbad19ad0 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -44,11 +44,11 @@ __attribute__((always_inline)) void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k1 = x & (block_size - 1); - const int x2 = x + (t+1)/2; + const int x2 = x + t/2; const int k2 = x2 & (block_size - 1); float2 a0, a1, a2, a3; - if (x < (t+1)/2) + if (x < t/2) { a0 = smem[x]; a1 = mul_float2(twiddles[k1],smem[x+t]); @@ -58,7 +58,7 @@ void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, cons barrier(CLK_LOCAL_MEM_FENCE); - if (x < (t+1)/2) + if (x < t/2) { int dst_ind = (x << 1) - k1; smem[dst_ind] = a0 + a1; @@ -72,6 +72,55 @@ void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, cons barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int thread_block = t/4; + const int k1 = x & (block_size - 1); + const int x2 = x + thread_block; + const int k2 = x2 & (block_size - 1); + const int x3 = x + 2*thread_block; + const int k3 = x3 & (block_size - 1); + const int x4 = x + 3*thread_block; + const int k4 = x4 & (block_size - 1); + float2 a0, a1, a2, a3, a4, a5, a6, a7; + + if (x < t/4) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k1],smem[x+t]); + a2 = smem[x2]; + a3 = mul_float2(twiddles[k2],smem[x2+t]); + a4 = smem[x3]; + a5 = mul_float2(twiddles[k3],smem[x3+t]); + a6 = smem[x4]; + a7 = mul_float2(twiddles[k4],smem[x4+t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t/4) + { + int dst_ind = (x << 1) - k1; + smem[dst_ind] = a0 + a1; + smem[dst_ind+block_size] = a0 - a1; + + dst_ind = (x2 << 1) - k2; + smem[dst_ind] = a2 + a3; + smem[dst_ind+block_size] = a2 - a3; + + dst_ind = (x3 << 1) - k3; + smem[dst_ind] = a4 + a5; + smem[dst_ind+block_size] = a4 - a5; + + dst_ind = (x4 << 1) - k4; + smem[dst_ind] = a6 + a7; + smem[dst_ind+block_size] = a6 - a7; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + __attribute__((always_inline)) void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { @@ -107,6 +156,58 @@ void fft_radix4(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int k = x & (block_size - 1); + const int x2 = x + t/2; + const int k2 = x2 & (block_size - 1); + float2 a0, a1, a2, a3, a4, a5, a6, a7; + + if (x < t/2) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k], smem[x+t]); + a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); + a3 = mul_float2(twiddles[k + 2*block_size],smem[x+3*t]); + + a4 = smem[x2]; + a5 = mul_float2(twiddles[k2], smem[x2+t]); + a6 = mul_float2(twiddles[k2 + block_size],smem[x2+2*t]); + a7 = mul_float2(twiddles[k2 + 2*block_size],smem[x2+3*t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t/2) + { + int dst_ind = ((x - k) << 2) + k; + + float2 b0 = a0 + a2; + a2 = a0 - a2; + float2 b1 = a1 + a3; + a3 = twiddle(a1 - a3); + + smem[dst_ind] = b0 + b1; + smem[dst_ind + block_size] = a2 + a3; + smem[dst_ind + 2*block_size] = b0 - b1; + smem[dst_ind + 3*block_size] = a2 - a3; + + dst_ind = ((x2 - k2) << 2) + k2; + b0 = a4 + a6; + a6 = a4 - a6; + b1 = a5 + a7; + a7 = twiddle(a5 - a7); + + smem[dst_ind] = b0 + b1; + smem[dst_ind + block_size] = a6 + a7; + smem[dst_ind + 2*block_size] = b0 - b1; + smem[dst_ind + 3*block_size] = a6 - a7; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + __attribute__((always_inline)) void fft_radix8(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { @@ -205,11 +306,11 @@ __attribute__((always_inline)) void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; - const int x2 = x + (t+1)/2; + const int x2 = x + t/2; const int k2 = x2 % block_size; float2 a0, a1, a2, a3, a4, a5; - if (x < (t+1)/2) + if (x < t/2) { a0 = smem[x]; a1 = mul_float2(twiddles[k], smem[x+t]); @@ -222,7 +323,7 @@ void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, cons barrier(CLK_LOCAL_MEM_FENCE); - if (x < (t+1)/2) + if (x < t/2) { int dst_ind = ((x - k) * 3) + k; @@ -248,6 +349,86 @@ void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, cons barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int thread_block = t/4; + const int k = x % block_size; + const int x2 = x + thread_block; + const int k2 = x2 % block_size; + const int x3 = x + 2*thread_block; + const int k3 = x3 % block_size; + const int x4 = x + 3*thread_block; + const int k4 = x4 % block_size; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; + + if (x < t/4) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k], smem[x+t]); + a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); + + a3 = smem[x2]; + a4 = mul_float2(twiddles[k2], smem[x2+t]); + a5 = mul_float2(twiddles[k2+block_size], smem[x2+2*t]); + + a6 = smem[x3]; + a7 = mul_float2(twiddles[k3], smem[x3+t]); + a8 = mul_float2(twiddles[k3+block_size], smem[x3+2*t]); + + a9 = smem[x4]; + a10 = mul_float2(twiddles[k4], smem[x4+t]); + a11 = mul_float2(twiddles[k4+block_size], smem[x4+2*t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t/4) + { + int dst_ind = ((x - k) * 3) + k; + + float2 b1 = a1 + a2; + a2 = twiddle(sin_120*(a1 - a2)); + float2 b0 = a0 - (float2)(0.5f)*b1; + + smem[dst_ind] = a0 + b1; + smem[dst_ind + block_size] = b0 + a2; + smem[dst_ind + 2*block_size] = b0 - a2; + + dst_ind = ((x2 - k2) * 3) + k2; + + b1 = a4 + a5; + a5 = twiddle(sin_120*(a4 - a5)); + b0 = a3 - (float2)(0.5f)*b1; + + smem[dst_ind] = a3 + b1; + smem[dst_ind + block_size] = b0 + a5; + smem[dst_ind + 2*block_size] = b0 - a5; + + dst_ind = ((x3 - k3) * 3) + k3; + + b1 = a7 + a8; + a8 = twiddle(sin_120*(a7 - a8)); + b0 = a6 - (float2)(0.5f)*b1; + + smem[dst_ind] = a6 + b1; + smem[dst_ind + block_size] = b0 + a8; + smem[dst_ind + 2*block_size] = b0 - a8; + + dst_ind = ((x4 - k4) * 3) + k4; + + b1 = a10 + a11; + a11 = twiddle(sin_120*(a10 - a11)); + b0 = a9 - (float2)(0.5f)*b1; + + smem[dst_ind] = a9 + b1; + smem[dst_ind + block_size] = b0 + a11; + smem[dst_ind + 2*block_size] = b0 - a11; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + __attribute__((always_inline)) void fft_radix5(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { @@ -301,6 +482,95 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +__attribute__((always_inline)) +void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ + const int k = x % block_size; + const int x2 = x+t/2; + const int k2 = x2 % block_size; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + + if (x < t/2) + { + a0 = smem[x]; + a1 = mul_float2(twiddles[k], smem[x + t]); + a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); + a3 = mul_float2(twiddles[k+2*block_size],smem[x+3*t]); + a4 = mul_float2(twiddles[k+3*block_size],smem[x+4*t]); + + a5 = smem[x2]; + a6 = mul_float2(twiddles[k2], smem[x2 + t]); + a7 = mul_float2(twiddles[k2 + block_size],smem[x2+2*t]); + a8 = mul_float2(twiddles[k2+2*block_size],smem[x2+3*t]); + a9 = mul_float2(twiddles[k2+3*block_size],smem[x2+4*t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x < t/2) + { + int dst_ind = ((x - k) * 5) + k; + __local float2* dst = smem + dst_ind; + + float2 b0, b1, b5; + + b1 = a1 + a4; + a1 -= a4; + + a4 = a3 + a2; + a3 -= a2; + + a2 = b1 + a4; + b0 = a0 - (float2)0.25f * a2; + + b1 = fft5_2 * (b1 - a4); + a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); + b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); + + a4.x += fft5_4 * a3.y; + a4.y -= fft5_4 * a3.x; + + a1 = b0 + b1; + b0 -= b1; + + dst[0] = a0 + a2; + dst[block_size] = a1 + a4; + dst[2 * block_size] = b0 + b5; + dst[3 * block_size] = b0 - b5; + dst[4 * block_size] = a1 - a4; + + dst_ind = ((x2 - k2) * 5) + k2; + dst = smem + dst_ind; + + b1 = a6 + a9; + a6 -= a9; + + a9 = a8 + a7; + a8 -= a7; + + a7 = b1 + a9; + b0 = a5 - (float2)0.25f * a7; + + b1 = fft5_2 * (b1 - a9); + a9 = fft5_3 * (float2)(-a6.y - a8.y, a6.x + a8.x); + b5 = (float2)(a9.x - fft5_5 * a6.y, a9.y + fft5_5 * a6.x); + + a9.x += fft5_4 * a8.y; + a9.y -= fft5_4 * a8.x; + + a6 = b0 + b1; + b0 -= b1; + + dst[0] = a5 + a7; + dst[block_size] = a6 + a9; + dst[2 * block_size] = b0 + b5; + dst[3 * block_size] = b0 - b5; + dst[4 * block_size] = a6 - a9; + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + #ifdef DFT_SCALE #define VAL(x, scale) x*scale #else diff --git a/modules/core/test/ocl/test_dft.cpp b/modules/core/test/ocl/test_dft.cpp index 64f6c63843..1fa03ae472 100644 --- a/modules/core/test/ocl/test_dft.cpp +++ b/modules/core/test/ocl/test_dft.cpp @@ -62,7 +62,7 @@ namespace ocl { //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) +PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool) { cv::Size dft_size; int dft_flags, depth, cn, dft_type; @@ -91,9 +91,9 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) dft_flags |= cv::DFT_ROWS; if (GET_PARAM(3)) dft_flags |= cv::DFT_SCALE; - if (GET_PARAM(4)) - dft_flags |= cv::DFT_INVERSE; - inplace = GET_PARAM(5); + /*if (GET_PARAM(4)) + dft_flags |= cv::DFT_INVERSE;*/ + inplace = GET_PARAM(4); is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1; @@ -188,12 +188,12 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 4), cv::Size(5, 8), cv::Size(6, 6), +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(16, 4), cv::Size(5, 8), cv::Size(6, 6), cv::Size(512, 1), cv::Size(1280, 768)), - Values(/*(OCL_FFT_TYPE) R2C, */(OCL_FFT_TYPE) C2C/*, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R*/), + Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), Bool(), // DFT_ROWS Bool(), // DFT_SCALE - Bool(), // DFT_INVERSE + //Bool(), // DFT_INVERSE Bool() // inplace ) ); From 52f76a32838019b4eb8e3f889dd25623a5983c74 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 22 Jul 2014 11:24:19 +0400 Subject: [PATCH 39/71] Added rest Elena's changes --- modules/core/perf/opencl/perf_dxt.cpp | 6 +- modules/core/src/dxt.cpp | 131 +++-- modules/core/src/opencl/fft.cl | 702 +++++++++++++------------- modules/core/test/ocl/test_dft.cpp | 32 +- 4 files changed, 462 insertions(+), 409 deletions(-) diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index f4b6b49a9f..797b2c5334 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -65,10 +65,10 @@ enum OCL_FFT_TYPE typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C/*, R2R, C2R, R2C*/), +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), - Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, - /*(int)DFT_INVERSE | DFT_SCALE,*/ (int)DFT_ROWS | DFT_INVERSE))) + Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE/*, (int)DFT_INVERSE, + (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); const int dft_type = get<0>(params); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index d5b1cb383d..eaef53ad23 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1791,14 +1791,6 @@ namespace cv { CV_Assert(s == CLFFT_SUCCESS); \ } -enum FftType -{ - R2R = 0, // real to real - C2R = 1, // opencl HERMITIAN_INTERLEAVED to real - R2C = 2, // real to opencl HERMITIAN_INTERLEAVED - C2C = 3 // complex to complex -}; - class PlanCache { struct FftPlan @@ -2034,6 +2026,14 @@ namespace cv #ifdef HAVE_OPENCL +enum FftType +{ + R2R = 0, + C2R = 1, + R2C = 2, + C2C = 3 +}; + static std::vector ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) { int factors[34]; @@ -2054,13 +2054,19 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: else if (4*n <= factors[0]) { radix = 4; - if (cols % 8 == 0) + if (cols % 12 == 0) + block = 3; + else if (cols % 8 == 0) block = 2; } else { - if (cols % 8 == 0) + if (cols % 10 == 0) + block = 5; + else if (cols % 8 == 0) block = 4; + else if (cols % 6 == 0) + block = 3; else if (cols % 4 == 0) block = 2; } @@ -2081,6 +2087,8 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: { if (cols % 12 == 0) block = 4; + else if (cols % 9 == 0) + block = 3; else if (cols % 6 == 0) block = 2; } @@ -2142,7 +2150,6 @@ struct OCL_FftPlan { int radix = radixes[i]; n *= radix; - for (int j=1; j planStorage; }; -static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) +static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); - return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, true); + return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); } -static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags) +static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); - return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, false); + return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); } static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) @@ -2298,29 +2318,26 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro complex_output = 1; } + FftType fftType = (FftType)(complex_input << 0 | complex_output << 1); + // Forward Complex to CCS not supported - if (complex_input && real_output && !inv) - { - flags ^= DFT_REAL_OUTPUT; - flags |= DFT_COMPLEX_OUTPUT; - real_output = 0; - complex_output = 1; - } + if (fftType == C2R && !inv) + fftType = C2C; + // Inverse CCS to Complex not supported - if (real_input && complex_output && inv) - { - complex_output = 0; - real_output = 1; - } + if (fftType == R2C && inv) + fftType = R2R; UMat output; - if (complex_output) + if (fftType == C2C || fftType == R2C) { + // complex output _dst.create(src.size(), CV_32FC2); output = _dst.getUMat(); - } + } else { + // real output if (is1d) { _dst.create(src.size(), CV_32FC1); @@ -2333,17 +2350,49 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags)) - return false; - - if (!is1d) + if (!inv) { - int nonzero_cols = real_input && real_output ? output.cols/2 + 1 : output.cols; - if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags)) + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) return false; - } else + + if (!is1d) + { + int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols; + if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags, fftType)) + return false; + } + } + else { - _dst.assign(output); + if (fftType == C2C) + { + // complex output + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + return false; + + if (!is1d) + { + if (!ocl_dft_C2C_cols(output, output, output.cols, flags, fftType)) + return false; + } + } + else + { + if (is1d) + { + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + return false; + } + else + { + int nonzero_cols = src.cols/2 + 1;// : src.cols; + if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) + return false; + + if (!ocl_dft_C2C_rows(output, _dst, nonzero_rows, flags, fftType)) + return false; + } + } } return true; } diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index fdbad19ad0..dd8ff59850 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -16,106 +16,224 @@ float2 twiddle(float2 a) { } __attribute__((always_inline)) -void fft_radix2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const float2* twiddles, + const int x, const int block_size) +{ + const int k = x & (block_size - 1); + a1 = mul_float2(twiddles[k], a1); + const int dst_ind = (x << 1) - k; + + smem[dst_ind] = a0 + a1; + smem[dst_ind+block_size] = a0 - a1; +} + +__attribute__((always_inline)) +void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __constant const float2* twiddles, + const int x, const int block_size) { const int k = x & (block_size - 1); + a1 = mul_float2(twiddles[k], a1); + a2 = mul_float2(twiddles[k + block_size], a2); + a3 = mul_float2(twiddles[k + 2*block_size], a3); + + const int dst_ind = ((x - k) << 2) + k; + + float2 b0 = a0 + a2; + a2 = a0 - a2; + float2 b1 = a1 + a3; + a3 = twiddle(a1 - a3); + + smem[dst_ind] = b0 + b1; + smem[dst_ind + block_size] = a2 + a3; + smem[dst_ind + 2*block_size] = b0 - b1; + smem[dst_ind + 3*block_size] = a2 - a3; +} + +__attribute__((always_inline)) +void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constant const float2* twiddles, + const int x, const int block_size) +{ + const int k = x & (block_size - 1); + a1 = mul_float2(twiddles[k], a1); + a2 = mul_float2(twiddles[k+block_size], a2); + const int dst_ind = ((x - k) * 3) + k; + + float2 b1 = a1 + a2; + a2 = twiddle(sin_120*(a1 - a2)); + float2 b0 = a0 - (float2)(0.5f)*b1; + + smem[dst_ind] = a0 + b1; + smem[dst_ind + block_size] = b0 + a2; + smem[dst_ind + 2*block_size] = b0 - a2; +} + +__attribute__((always_inline)) +void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __constant const float2* twiddles, + const int x, const int block_size) +{ + const int k = x & (block_size - 1); + a1 = mul_float2(twiddles[k], a1); + a2 = mul_float2(twiddles[k + block_size], a2); + a3 = mul_float2(twiddles[k+2*block_size], a3); + a4 = mul_float2(twiddles[k+3*block_size], a4); + + const int dst_ind = ((x - k) * 5) + k; + __local float2* dst = smem + dst_ind; + + float2 b0, b1, b5; + + b1 = a1 + a4; + a1 -= a4; + + a4 = a3 + a2; + a3 -= a2; + + a2 = b1 + a4; + b0 = a0 - (float2)0.25f * a2; + + b1 = fft5_2 * (b1 - a4); + a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); + b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); + + a4.x += fft5_4 * a3.y; + a4.y -= fft5_4 * a3.x; + + a1 = b0 + b1; + b0 -= b1; + + dst[0] = a0 + a2; + dst[block_size] = a1 + a4; + dst[2 * block_size] = b0 + b5; + dst[3 * block_size] = b0 - b5; + dst[4 * block_size] = a1 - a4; +} + +__attribute__((always_inline)) +void fft_radix2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +{ float2 a0, a1; if (x < t) { a0 = smem[x]; - a1 = mul_float2(twiddles[k],smem[x+t]); + a1 = smem[x+t]; } barrier(CLK_LOCAL_MEM_FENCE); if (x < t) - { - const int dst_ind = (x << 1) - k; - - smem[dst_ind] = a0 + a1; - smem[dst_ind+block_size] = a0 - a1; - } + butterfly2(a0, a1, smem, twiddles, x, block_size); barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int k1 = x & (block_size - 1); - const int x2 = x + t/2; - const int k2 = x2 & (block_size - 1); + const int x2 = x1 + t/2; float2 a0, a1, a2, a3; - if (x < t/2) + if (x1 < t/2) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k1],smem[x+t]); - a2 = smem[x2]; - a3 = mul_float2(twiddles[k2],smem[x2+t]); + a0 = smem[x1]; a1 = smem[x1+t]; + a2 = smem[x2]; a3 = smem[x2+t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/2) + if (x1 < t/2) { - int dst_ind = (x << 1) - k1; - smem[dst_ind] = a0 + a1; - smem[dst_ind+block_size] = a0 - a1; - - dst_ind = (x2 << 1) - k2; - smem[dst_ind] = a2 + a3; - smem[dst_ind+block_size] = a2 - a3; + butterfly2(a0, a1, smem, twiddles, x1, block_size); + butterfly2(a2, a3, smem, twiddles, x2, block_size); } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int thread_block = t/4; - const int k1 = x & (block_size - 1); - const int x2 = x + thread_block; - const int k2 = x2 & (block_size - 1); - const int x3 = x + 2*thread_block; - const int k3 = x3 & (block_size - 1); - const int x4 = x + 3*thread_block; - const int k4 = x4 & (block_size - 1); - float2 a0, a1, a2, a3, a4, a5, a6, a7; + const int x2 = x1 + t/3; + const int x3 = x1 + 2*t/3; + float2 a0, a1, a2, a3, a4, a5; - if (x < t/4) + if (x1 < t/3) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k1],smem[x+t]); - a2 = smem[x2]; - a3 = mul_float2(twiddles[k2],smem[x2+t]); - a4 = smem[x3]; - a5 = mul_float2(twiddles[k3],smem[x3+t]); - a6 = smem[x4]; - a7 = mul_float2(twiddles[k4],smem[x4+t]); + a0 = smem[x1]; a1 = smem[x1+t]; + a2 = smem[x2]; a3 = smem[x2+t]; + a4 = smem[x3]; a5 = smem[x3+t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/4) + if (x1 < t/3) { - int dst_ind = (x << 1) - k1; - smem[dst_ind] = a0 + a1; - smem[dst_ind+block_size] = a0 - a1; + butterfly2(a0, a1, smem, twiddles, x1, block_size); + butterfly2(a2, a3, smem, twiddles, x2, block_size); + butterfly2(a4, a5, smem, twiddles, x3, block_size); + } - dst_ind = (x2 << 1) - k2; - smem[dst_ind] = a2 + a3; - smem[dst_ind+block_size] = a2 - a3; + barrier(CLK_LOCAL_MEM_FENCE); +} - dst_ind = (x3 << 1) - k3; - smem[dst_ind] = a4 + a5; - smem[dst_ind+block_size] = a4 - a5; +__attribute__((always_inline)) +void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +{ + const int thread_block = t/4; + const int x2 = x1 + thread_block; + const int x3 = x1 + 2*thread_block; + const int x4 = x1 + 3*thread_block; + float2 a0, a1, a2, a3, a4, a5, a6, a7; - dst_ind = (x4 << 1) - k4; - smem[dst_ind] = a6 + a7; - smem[dst_ind+block_size] = a6 - a7; + if (x1 < t/4) + { + a0 = smem[x1]; a1 = smem[x1+t]; + a2 = smem[x2]; a3 = smem[x2+t]; + a4 = smem[x3]; a5 = smem[x3+t]; + a6 = smem[x4]; a7 = smem[x4+t]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x1 < t/4) + { + butterfly2(a0, a1, smem, twiddles, x1, block_size); + butterfly2(a2, a3, smem, twiddles, x2, block_size); + butterfly2(a4, a5, smem, twiddles, x3, block_size); + butterfly2(a6, a7, smem, twiddles, x4, block_size); + } + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +{ + const int thread_block = t/5; + const int x2 = x1 + thread_block; + const int x3 = x1 + 2*thread_block; + const int x4 = x1 + 3*thread_block; + const int x5 = x1 + 4*thread_block; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + + if (x1 < t/5) + { + a0 = smem[x1]; a1 = smem[x1+t]; + a2 = smem[x2]; a3 = smem[x2+t]; + a4 = smem[x3]; a5 = smem[x3+t]; + a6 = smem[x4]; a7 = smem[x4+t]; + a8 = smem[x5]; a9 = smem[x5+t]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x1 < t/5) + { + butterfly2(a0, a1, smem, twiddles, x1, block_size); + butterfly2(a2, a3, smem, twiddles, x2, block_size); + butterfly2(a4, a5, smem, twiddles, x3, block_size); + butterfly2(a6, a7, smem, twiddles, x4, block_size); + butterfly2(a8, a9, smem, twiddles, x5, block_size); } barrier(CLK_LOCAL_MEM_FENCE); @@ -124,85 +242,65 @@ void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, cons __attribute__((always_inline)) void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { - const int k = x & (block_size - 1); float2 a0, a1, a2, a3; if (x < t) { - const int twiddle_block = block_size / 4; - a0 = smem[x]; - a1 = mul_float2(twiddles[k],smem[x+t]); - a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); - a3 = mul_float2(twiddles[k + 2*block_size],smem[x+3*t]); + a0 = smem[x]; a1 = smem[x+t]; a2 = smem[x+2*t]; a3 = smem[x+3*t]; } barrier(CLK_LOCAL_MEM_FENCE); if (x < t) + butterfly4(a0, a1, a2, a3, smem, twiddles, x, block_size); + + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((always_inline)) +void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +{ + const int x2 = x1 + t/2; + float2 a0, a1, a2, a3, a4, a5, a6, a7; + + if (x1 < t/2) { - const int dst_ind = ((x - k) << 2) + k; + a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; + a4 = smem[x2]; a5 = smem[x2+t]; a6 = smem[x2+2*t]; a7 = smem[x2+3*t]; + } - float2 b0 = a0 + a2; - a2 = a0 - a2; - float2 b1 = a1 + a3; - a3 = twiddle(a1 - a3); + barrier(CLK_LOCAL_MEM_FENCE); - smem[dst_ind] = b0 + b1; - smem[dst_ind + block_size] = a2 + a3; - smem[dst_ind + 2*block_size] = b0 - b1; - smem[dst_ind + 3*block_size] = a2 - a3; + if (x1 < t/2) + { + butterfly4(a0, a1, a2, a3, smem, twiddles, x1, block_size); + butterfly4(a4, a5, a6, a7, smem, twiddles, x2, block_size); } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int k = x & (block_size - 1); - const int x2 = x + t/2; - const int k2 = x2 & (block_size - 1); - float2 a0, a1, a2, a3, a4, a5, a6, a7; + const int x2 = x1 + t/3; + const int x3 = x2 + t/3; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; - if (x < t/2) + if (x1 < t/3) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x+t]); - a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); - a3 = mul_float2(twiddles[k + 2*block_size],smem[x+3*t]); - - a4 = smem[x2]; - a5 = mul_float2(twiddles[k2], smem[x2+t]); - a6 = mul_float2(twiddles[k2 + block_size],smem[x2+2*t]); - a7 = mul_float2(twiddles[k2 + 2*block_size],smem[x2+3*t]); + a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; + a4 = smem[x2]; a5 = smem[x2+t]; a6 = smem[x2+2*t]; a7 = smem[x2+3*t]; + a8 = smem[x3]; a9 = smem[x3+t]; a10 = smem[x3+2*t]; a11 = smem[x3+3*t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/2) + if (x1 < t/3) { - int dst_ind = ((x - k) << 2) + k; - - float2 b0 = a0 + a2; - a2 = a0 - a2; - float2 b1 = a1 + a3; - a3 = twiddle(a1 - a3); - - smem[dst_ind] = b0 + b1; - smem[dst_ind + block_size] = a2 + a3; - smem[dst_ind + 2*block_size] = b0 - b1; - smem[dst_ind + 3*block_size] = a2 - a3; - - dst_ind = ((x2 - k2) << 2) + k2; - b0 = a4 + a6; - a6 = a4 - a6; - b1 = a5 + a7; - a7 = twiddle(a5 - a7); - - smem[dst_ind] = b0 + b1; - smem[dst_ind + block_size] = a6 + a7; - smem[dst_ind + 2*block_size] = b0 - b1; - smem[dst_ind + 3*block_size] = a6 - a7; + butterfly4(a0, a1, a2, a3, smem, twiddles, x1, block_size); + butterfly4(a4, a5, a6, a7, smem, twiddles, x2, block_size); + butterfly4(a8, a9, a10, a11, smem, twiddles, x3, block_size); } barrier(CLK_LOCAL_MEM_FENCE); @@ -274,156 +372,95 @@ void fft_radix8(__local float2* smem, __constant const float2* twiddles, const i __attribute__((always_inline)) void fft_radix3(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) { - const int k = x % block_size; float2 a0, a1, a2; if (x < t) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x+t]); - a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); + a0 = smem[x]; a1 = smem[x+t]; a2 = smem[x+2*t]; } barrier(CLK_LOCAL_MEM_FENCE); if (x < t) - { - const int dst_ind = ((x - k) * 3) + k; - - float2 b1 = a1 + a2; - a2 = twiddle(sin_120*(a1 - a2)); - float2 b0 = a0 - (float2)(0.5f)*b1; - - smem[dst_ind] = a0 + b1; - smem[dst_ind + block_size] = b0 + a2; - smem[dst_ind + 2*block_size] = b0 - a2; - } + butterfly3(a0, a1, a2, smem, twiddles, x, block_size); barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int k = x % block_size; - const int x2 = x + t/2; - const int k2 = x2 % block_size; + const int x2 = x1 + t/2; float2 a0, a1, a2, a3, a4, a5; - if (x < t/2) + if (x1 < t/2) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x+t]); - a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); - - a3 = smem[x2]; - a4 = mul_float2(twiddles[k2], smem[x2+t]); - a5 = mul_float2(twiddles[k2+block_size], smem[x2+2*t]); + a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; + a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/2) + if (x1 < t/2) { - int dst_ind = ((x - k) * 3) + k; - - float2 b1 = a1 + a2; - a2 = twiddle(sin_120*(a1 - a2)); - float2 b0 = a0 - (float2)(0.5f)*b1; - - smem[dst_ind] = a0 + b1; - smem[dst_ind + block_size] = b0 + a2; - smem[dst_ind + 2*block_size] = b0 - a2; - - dst_ind = ((x2 - k2) * 3) + k2; - - b1 = a4 + a5; - a5 = twiddle(sin_120*(a4 - a5)); - b0 = a3 - (float2)(0.5f)*b1; - - smem[dst_ind] = a3 + b1; - smem[dst_ind + block_size] = b0 + a5; - smem[dst_ind + 2*block_size] = b0 - a5; + butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); + butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int thread_block = t/4; - const int k = x % block_size; - const int x2 = x + thread_block; - const int k2 = x2 % block_size; - const int x3 = x + 2*thread_block; - const int k3 = x3 % block_size; - const int x4 = x + 3*thread_block; - const int k4 = x4 % block_size; - float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; + const int x2 = x1 + t/3; + const int x3 = x2 + t/3; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8; - if (x < t/4) + if (x1 < t/2) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x+t]); - a2 = mul_float2(twiddles[k+block_size], smem[x+2*t]); - - a3 = smem[x2]; - a4 = mul_float2(twiddles[k2], smem[x2+t]); - a5 = mul_float2(twiddles[k2+block_size], smem[x2+2*t]); - - a6 = smem[x3]; - a7 = mul_float2(twiddles[k3], smem[x3+t]); - a8 = mul_float2(twiddles[k3+block_size], smem[x3+2*t]); - - a9 = smem[x4]; - a10 = mul_float2(twiddles[k4], smem[x4+t]); - a11 = mul_float2(twiddles[k4+block_size], smem[x4+2*t]); + a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; + a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; + a6 = smem[x3]; a7 = smem[x3+t]; a8 = smem[x3+2*t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/4) + if (x1 < t/2) { - int dst_ind = ((x - k) * 3) + k; + butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); + butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); + butterfly3(a6, a7, a8, smem, twiddles, x3, block_size); + } - float2 b1 = a1 + a2; - a2 = twiddle(sin_120*(a1 - a2)); - float2 b0 = a0 - (float2)(0.5f)*b1; + barrier(CLK_LOCAL_MEM_FENCE); +} - smem[dst_ind] = a0 + b1; - smem[dst_ind + block_size] = b0 + a2; - smem[dst_ind + 2*block_size] = b0 - a2; +__attribute__((always_inline)) +void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +{ + const int thread_block = t/4; + const int x2 = x1 + thread_block; + const int x3 = x1 + 2*thread_block; + const int x4 = x1 + 3*thread_block; + float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; - dst_ind = ((x2 - k2) * 3) + k2; + if (x1 < t/4) + { + a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; + a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; + a6 = smem[x3]; a7 = smem[x3+t]; a8 = smem[x3+2*t]; + a9 = smem[x4]; a10 = smem[x4+t]; a11 = smem[x4+2*t]; + } - b1 = a4 + a5; - a5 = twiddle(sin_120*(a4 - a5)); - b0 = a3 - (float2)(0.5f)*b1; + barrier(CLK_LOCAL_MEM_FENCE); - smem[dst_ind] = a3 + b1; - smem[dst_ind + block_size] = b0 + a5; - smem[dst_ind + 2*block_size] = b0 - a5; - - dst_ind = ((x3 - k3) * 3) + k3; - - b1 = a7 + a8; - a8 = twiddle(sin_120*(a7 - a8)); - b0 = a6 - (float2)(0.5f)*b1; - - smem[dst_ind] = a6 + b1; - smem[dst_ind + block_size] = b0 + a8; - smem[dst_ind + 2*block_size] = b0 - a8; - - dst_ind = ((x4 - k4) * 3) + k4; - - b1 = a10 + a11; - a11 = twiddle(sin_120*(a10 - a11)); - b0 = a9 - (float2)(0.5f)*b1; - - smem[dst_ind] = a9 + b1; - smem[dst_ind + block_size] = b0 + a11; - smem[dst_ind + 2*block_size] = b0 - a11; + if (x1 < t/4) + { + butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); + butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); + butterfly3(a6, a7, a8, smem, twiddles, x3, block_size); + butterfly3(a9, a10, a11, smem, twiddles, x4, block_size); } barrier(CLK_LOCAL_MEM_FENCE); @@ -437,135 +474,35 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i if (x < t) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x + t]); - a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); - a3 = mul_float2(twiddles[k+2*block_size],smem[x+3*t]); - a4 = mul_float2(twiddles[k+3*block_size],smem[x+4*t]); + a0 = smem[x]; a1 = smem[x + t]; a2 = smem[x+2*t]; a3 = smem[x+3*t]; a4 = smem[x+4*t]; } barrier(CLK_LOCAL_MEM_FENCE); if (x < t) - { - const int dst_ind = ((x - k) * 5) + k; - __local float2* dst = smem + dst_ind; - - float2 b0, b1, b5; - - b1 = a1 + a4; - a1 -= a4; - - a4 = a3 + a2; - a3 -= a2; - - a2 = b1 + a4; - b0 = a0 - (float2)0.25f * a2; - - b1 = fft5_2 * (b1 - a4); - a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); - b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); - - a4.x += fft5_4 * a3.y; - a4.y -= fft5_4 * a3.x; - - a1 = b0 + b1; - b0 -= b1; - - dst[0] = a0 + a2; - dst[block_size] = a1 + a4; - dst[2 * block_size] = b0 + b5; - dst[3 * block_size] = b0 - b5; - dst[4 * block_size] = a1 - a4; - } + butterfly5(a0, a1, a2, a3, a4, smem, twiddles, x, block_size); barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) { - const int k = x % block_size; - const int x2 = x+t/2; - const int k2 = x2 % block_size; + const int x2 = x1+t/2; float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; - if (x < t/2) + if (x1 < t/2) { - a0 = smem[x]; - a1 = mul_float2(twiddles[k], smem[x + t]); - a2 = mul_float2(twiddles[k + block_size],smem[x+2*t]); - a3 = mul_float2(twiddles[k+2*block_size],smem[x+3*t]); - a4 = mul_float2(twiddles[k+3*block_size],smem[x+4*t]); - - a5 = smem[x2]; - a6 = mul_float2(twiddles[k2], smem[x2 + t]); - a7 = mul_float2(twiddles[k2 + block_size],smem[x2+2*t]); - a8 = mul_float2(twiddles[k2+2*block_size],smem[x2+3*t]); - a9 = mul_float2(twiddles[k2+3*block_size],smem[x2+4*t]); + a0 = smem[x1]; a1 = smem[x1 + t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; a4 = smem[x1+4*t]; + a5 = smem[x2]; a6 = smem[x2 + t]; a7 = smem[x2+2*t]; a8 = smem[x2+3*t]; a9 = smem[x2+4*t]; } barrier(CLK_LOCAL_MEM_FENCE); - if (x < t/2) + if (x1 < t/2) { - int dst_ind = ((x - k) * 5) + k; - __local float2* dst = smem + dst_ind; - - float2 b0, b1, b5; - - b1 = a1 + a4; - a1 -= a4; - - a4 = a3 + a2; - a3 -= a2; - - a2 = b1 + a4; - b0 = a0 - (float2)0.25f * a2; - - b1 = fft5_2 * (b1 - a4); - a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); - b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); - - a4.x += fft5_4 * a3.y; - a4.y -= fft5_4 * a3.x; - - a1 = b0 + b1; - b0 -= b1; - - dst[0] = a0 + a2; - dst[block_size] = a1 + a4; - dst[2 * block_size] = b0 + b5; - dst[3 * block_size] = b0 - b5; - dst[4 * block_size] = a1 - a4; - - dst_ind = ((x2 - k2) * 5) + k2; - dst = smem + dst_ind; - - b1 = a6 + a9; - a6 -= a9; - - a9 = a8 + a7; - a8 -= a7; - - a7 = b1 + a9; - b0 = a5 - (float2)0.25f * a7; - - b1 = fft5_2 * (b1 - a9); - a9 = fft5_3 * (float2)(-a6.y - a8.y, a6.x + a8.x); - b5 = (float2)(a9.x - fft5_5 * a6.y, a9.y + fft5_5 * a6.x); - - a9.x += fft5_4 * a8.y; - a9.y -= fft5_4 * a8.x; - - a6 = b0 + b1; - b0 -= b1; - - dst[0] = a5 + a7; - dst[block_size] = a6 + a9; - dst[2 * block_size] = b0 + b5; - dst[3 * block_size] = b0 - b5; - dst[4 * block_size] = a6 - a9; + butterfly5(a0, a1, a2, a3, a4, smem, twiddles, x1, block_size); + butterfly5(a5, a6, a7, a8, a9, smem, twiddles, x2, block_size); } barrier(CLK_LOCAL_MEM_FENCE); @@ -611,7 +548,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, RADIX_PROCESS; -#ifndef CCS_OUTPUT +#ifndef REAL_OUTPUT #ifdef NO_CONJUGATE // copy result without complex conjugate const int cols = dst_cols/2 + 1; @@ -659,7 +596,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, RADIX_PROCESS; -#ifndef CCS_OUTPUT +#ifndef REAL_OUTPUT __global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset)); #pragma unroll for (int i=0; i Date: Tue, 22 Jul 2014 14:54:38 +0400 Subject: [PATCH 40/71] Added nonzero_rows support --- modules/core/src/dxt.cpp | 49 +++++------- modules/core/src/ocl.cpp | 8 +- modules/core/src/opencl/fft.cl | 120 +++++++++++++++++------------ modules/core/test/ocl/test_dft.cpp | 32 ++++---- 4 files changed, 109 insertions(+), 100 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index eaef53ad23..869409f50d 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2034,19 +2034,19 @@ enum FftType C2C = 3 }; -static std::vector ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) +static void ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) { int factors[34]; - int nf = DFTFactorize( cols, factors ); + int nf = DFTFactorize(cols, factors); int n = 1; int factor_index = 0; min_radix = INT_MAX; // 2^n transforms - if ( (factors[factor_index] & 1) == 0 ) + if ((factors[factor_index] & 1) == 0) { - for( ; n < factors[factor_index]; ) + for( ; n < factors[factor_index];) { int radix = 2, block = 1; if (8*n <= factors[0]) @@ -2080,7 +2080,7 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: } // all the other transforms - for( ; factor_index < nf; factor_index++ ) + for( ; factor_index < nf; factor_index++) { int radix = factors[factor_index], block = 1; if (radix == 3) @@ -2101,7 +2101,6 @@ static std::vector ocl_getRadixes(int cols, std::vector& radixes, std: blocks.push_back(block); min_radix = min(min_radix, block*radix); } - return radixes; } struct OCL_FftPlan @@ -2111,14 +2110,13 @@ struct OCL_FftPlan int thread_count; int dft_size; - int flags; bool status; - OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true) + OCL_FftPlan(int _size): dft_size(_size), status(true) { int min_radix; std::vector radixes, blocks; ocl_getRadixes(dft_size, radixes, blocks, min_radix); - thread_count = (dft_size + min_radix-1) / min_radix; + thread_count = dft_size / min_radix; if (thread_count > ocl::Device::getDefault().maxWorkGroupSize()) { @@ -2140,8 +2138,7 @@ struct OCL_FftPlan n *= radix; } - twiddles.create(1, twiddle_size, CV_32FC2); - Mat tw = twiddles.getMat(ACCESS_WRITE); + Mat tw(1, twiddle_size, CV_32FC2); float* ptr = tw.ptr(); int ptr_index = 0; @@ -2162,6 +2159,7 @@ struct OCL_FftPlan } } } + twiddles = tw.getUMat(ACCESS_READ); buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s", dft_size, dft_size/thread_count, radix_processing.c_str()); @@ -2185,10 +2183,10 @@ struct OCL_FftPlan if (rows) { - globalsize[0] = thread_count; globalsize[1] = dft_size; + globalsize[0] = thread_count; globalsize[1] = src.rows; localsize[0] = thread_count; localsize[1] = 1; kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows"; - if (is1d && (flags & DFT_SCALE)) + if ((is1d || inv) && (flags & DFT_SCALE)) options += " -D DFT_SCALE"; } else @@ -2200,14 +2198,9 @@ struct OCL_FftPlan options += " -D DFT_SCALE"; } - if (src.channels() == 1) - options += " -D REAL_INPUT"; - else - options += " -D COMPLEX_INPUT"; - if (dst.channels() == 1) - options += " -D REAL_OUTPUT"; - if (is1d) - options += " -D IS_1D"; + options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT"; + options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT"; + options += is1d ? " -D IS_1D" : ""; if (!inv) { @@ -2216,10 +2209,10 @@ struct OCL_FftPlan } else { - if (is1d && fftType == C2R || (rows && fftType == R2R)) + if (rows && (fftType == C2R || fftType == R2R)) options += " -D NO_CONJUGATE"; if (dst.cols % 2 == 0) - options += " -D EVEN"; + options += " -D EVEN"; } ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); @@ -2240,7 +2233,7 @@ public: return planCache; } - OCL_FftPlan* getFftPlan(int dft_size, int flags) + OCL_FftPlan* getFftPlan(int dft_size) { for (size_t i = 0, size = planStorage.size(); i < size; ++i) { @@ -2252,7 +2245,7 @@ public: } } - OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size, flags); + OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size); planStorage.push_back(newPlan); return newPlan; } @@ -2275,13 +2268,13 @@ protected: static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) { - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols()); return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); } static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) { - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows()); return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); } @@ -2385,7 +2378,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } else { - int nonzero_cols = src.cols/2 + 1;// : src.cols; + int nonzero_cols = src.cols/2 + 1; if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) return false; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 32db8c91b4..a2110f6cc2 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3002,7 +3002,8 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], sync ? 0 : &p->e); if( sync || retval != CL_SUCCESS ) { - CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS); + int a = clFinish(qq); + CV_OclDbgAssert(a == CL_SUCCESS); p->cleanupUMats(); } else @@ -3898,8 +3899,9 @@ public: if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) { AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); - CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); + int a = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0); + CV_Assert( a == CL_SUCCESS ); u->markHostCopyObsolete(false); } } diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index dd8ff59850..b8d2c6716d 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -16,7 +16,7 @@ float2 twiddle(float2 a) { } __attribute__((always_inline)) -void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const float2* twiddles, +void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float2* twiddles, const int x, const int block_size) { const int k = x & (block_size - 1); @@ -28,7 +28,7 @@ void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const flo } __attribute__((always_inline)) -void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __constant const float2* twiddles, +void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __global const float2* twiddles, const int x, const int block_size) { const int k = x & (block_size - 1); @@ -50,10 +50,10 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem } __attribute__((always_inline)) -void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constant const float2* twiddles, +void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global const float2* twiddles, const int x, const int block_size) { - const int k = x & (block_size - 1); + const int k = x % block_size; a1 = mul_float2(twiddles[k], a1); a2 = mul_float2(twiddles[k+block_size], a2); const int dst_ind = ((x - k) * 3) + k; @@ -68,10 +68,10 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constan } __attribute__((always_inline)) -void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __constant const float2* twiddles, +void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __global const float2* twiddles, const int x, const int block_size) { - const int k = x & (block_size - 1); + const int k = x % block_size; a1 = mul_float2(twiddles[k], a1); a2 = mul_float2(twiddles[k + block_size], a2); a3 = mul_float2(twiddles[k+2*block_size], a3); @@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f } __attribute__((always_inline)) -void fft_radix2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { float2 a0, a1; @@ -128,7 +128,7 @@ void fft_radix2(__local float2* smem, __constant const float2* twiddles, const i } __attribute__((always_inline)) -void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/2; float2 a0, a1, a2, a3; @@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/3; const int x3 = x1 + 2*t/3; @@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int thread_block = t/4; const int x2 = x1 + thread_block; @@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int thread_block = t/5; const int x2 = x1 + thread_block; @@ -240,7 +240,7 @@ void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix4(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { float2 a0, a1, a2, a3; @@ -258,7 +258,7 @@ void fft_radix4(__local float2* smem, __constant const float2* twiddles, const i } __attribute__((always_inline)) -void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix4_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/2; float2 a0, a1, a2, a3, a4, a5, a6, a7; @@ -281,7 +281,7 @@ void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix4_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/3; const int x3 = x2 + t/3; @@ -307,7 +307,7 @@ void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix8(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix8(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2, a3, a4, a5, a6, a7; @@ -370,7 +370,7 @@ void fft_radix8(__local float2* smem, __constant const float2* twiddles, const i } __attribute__((always_inline)) -void fft_radix3(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix3(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { float2 a0, a1, a2; @@ -388,7 +388,7 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i } __attribute__((always_inline)) -void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix3_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/2; float2 a0, a1, a2, a3, a4, a5; @@ -411,7 +411,7 @@ void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/3; const int x3 = x2 + t/3; @@ -437,7 +437,7 @@ void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix3_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int thread_block = t/4; const int x2 = x1 + thread_block; @@ -467,7 +467,7 @@ void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, cons } __attribute__((always_inline)) -void fft_radix5(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix5(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { const int k = x % block_size; float2 a0, a1, a2, a3, a4; @@ -486,7 +486,7 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i } __attribute__((always_inline)) -void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1+t/2; float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; @@ -516,24 +516,23 @@ void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, cons __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __constant float2 * twiddles_ptr, const int t, const int nz) + __global float2* twiddles_ptr, const int t, const int nz) { const int x = get_global_id(0); const int y = get_group_id(1); - + const int block_size = LOCAL_SIZE/kercn; if (y < nz) { __local float2 smem[LOCAL_SIZE]; - __constant const float2* twiddles = (__constant float2*) twiddles_ptr; + __global const float2* twiddles = (__global float2*) twiddles_ptr; const int ind = x; - const int block_size = LOCAL_SIZE/kercn; #ifdef IS_1D float scale = 1.f/dst_cols; #else float scale = 1.f/(dst_cols*dst_rows); #endif -#ifndef REAL_INPUT +#ifdef COMPLEX_INPUT __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); #pragma unroll for (int i=0; i(df) << std::endl; double eps = src.size().area() * 1e-4; @@ -188,13 +185,12 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(4, 1), cv::Size(5, 8), cv::Size(6, 6), - cv::Size(512, 1), cv::Size(1280, 768)), - Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)), + Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), Bool(), // DFT_INVERSE Bool(), // DFT_ROWS Bool(), // DFT_SCALE - Bool() // inplace + Bool() // hint ) ); From f8442da945361ef229e0ad503fb573c5968ef772 Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Mon, 21 Jul 2014 10:12:25 +0200 Subject: [PATCH 41/71] Initialize all pointers of CvCapture_GStreamer correctly --- modules/videoio/src/cap_gstreamer.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index 00562aa41c..c5a78a0273 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -160,13 +160,17 @@ protected: void CvCapture_GStreamer::init() { pipeline = NULL; - frame = NULL; - buffer = NULL; - buffer_caps = NULL; + uridecodebin = NULL; + color = NULL; + sink = NULL; #if GST_VERSION_MAJOR > 0 sample = NULL; info = new GstMapInfo; #endif + buffer = NULL; + caps = NULL; + buffer_caps = NULL; + frame = NULL; } /*! From facb2f9a810b61e7d1e9c242fb1588effb0af5e3 Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Wed, 23 Jul 2014 19:15:16 +0200 Subject: [PATCH 42/71] Reset class members to NULL after gst_*_unref() --- modules/videoio/src/cap_gstreamer.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index c5a78a0273..fea148f91f 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -185,31 +185,41 @@ void CvCapture_GStreamer::close() if(pipeline) { gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_NULL); gst_object_unref(GST_OBJECT(pipeline)); + pipeline = NULL; } if(uridecodebin){ gst_object_unref(GST_OBJECT(uridecodebin)); + uridecodebin = NULL; } if(color){ gst_object_unref(GST_OBJECT(color)); + color = NULL; } if(sink){ gst_object_unref(GST_OBJECT(sink)); + sink = NULL; } - if(buffer) + if(buffer) { gst_buffer_unref(buffer); + buffer = NULL; + } if(frame) { frame->imageData = 0; cvReleaseImage(&frame); + frame = NULL; } if(caps){ gst_caps_unref(caps); + caps = NULL; } if(buffer_caps){ gst_caps_unref(buffer_caps); + buffer_caps = NULL; } #if GST_VERSION_MAJOR > 0 if(sample){ gst_sample_unref(sample); + sample = NULL; } #endif From 16a82b17475fad26a45b76e8e18a886a6c50aa9e Mon Sep 17 00:00:00 2001 From: Joan Puigcerver Date: Wed, 23 Jul 2014 22:47:51 +0200 Subject: [PATCH 43/71] Added an option to traincascade program to select the maximum number of threads to use during training --- apps/traincascade/traincascade.cpp | 7 +++++++ doc/user_guide/ug_traincascade.rst | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/apps/traincascade/traincascade.cpp b/apps/traincascade/traincascade.cpp index e6e16ba896..52bacc8083 100644 --- a/apps/traincascade/traincascade.cpp +++ b/apps/traincascade/traincascade.cpp @@ -13,6 +13,7 @@ int main( int argc, char* argv[] ) int numPos = 2000; int numNeg = 1000; int numStages = 20; + int numThreads = getNumThreads(); int precalcValBufSize = 256, precalcIdxBufSize = 256; bool baseFormatSave = false; @@ -36,6 +37,7 @@ int main( int argc, char* argv[] ) cout << " [-precalcValBufSize ]" << endl; cout << " [-precalcIdxBufSize ]" << endl; cout << " [-baseFormatSave]" << endl; + cout << " [-numThreads ]" << endl; cascadeParams.printDefaults(); stageParams.printDefaults(); for( int fi = 0; fi < fc; fi++ ) @@ -82,6 +84,10 @@ int main( int argc, char* argv[] ) { baseFormatSave = true; } + else if( !strcmp( argv[i], "-numThreads" ) ) + { + numThreads = atoi(argv[++i]); + } else if ( cascadeParams.scanAttr( argv[i], argv[i+1] ) ) { i++; } else if ( stageParams.scanAttr( argv[i], argv[i+1] ) ) { i++; } else if ( !set ) @@ -98,6 +104,7 @@ int main( int argc, char* argv[] ) } } + setNumThreads( numThreads ); classifier.train( cascadeDirName, vecName, bgName, diff --git a/doc/user_guide/ug_traincascade.rst b/doc/user_guide/ug_traincascade.rst index 601f504382..20c1b1683d 100644 --- a/doc/user_guide/ug_traincascade.rst +++ b/doc/user_guide/ug_traincascade.rst @@ -200,6 +200,12 @@ Command line arguments of ``opencv_traincascade`` application grouped by purpose This argument is actual in case of Haar-like features. If it is specified, the cascade will be saved in the old format. + * ``-numThreads `` + + Maximum number of threads to use during training. Notice that + the actual number of used threads may be lower, depending on + your machine and compilation options. + #. Cascade parameters: From 66ac46214d4bb7685b78d0b3aed4c67b5cdb76a7 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Wed, 23 Jul 2014 12:13:09 +0400 Subject: [PATCH 44/71] Final refactoring, fixes --- modules/core/perf/opencl/perf_arithm.cpp | 2 +- modules/core/perf/opencl/perf_dxt.cpp | 35 +- modules/core/src/dxt.cpp | 747 +++++++++++------------ modules/core/src/ocl.cpp | 8 +- modules/core/src/opencl/fft.cl | 105 ++-- modules/core/test/ocl/test_dft.cpp | 75 +-- 6 files changed, 468 insertions(+), 504 deletions(-) diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index ba808b494f..17badca765 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine( typedef Size_MatType TransposeFixture; OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine( - OCL_TEST_SIZES, Values(CV_8UC1, CV_32FC1, CV_8UC2, CV_32FC2, CV_8UC4, CV_32FC4))) + OCL_TEST_SIZES, OCL_TEST_TYPES_134)) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index 797b2c5334..d0219913b5 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -54,40 +54,21 @@ namespace ocl { ///////////// dft //////////////////////// -enum OCL_FFT_TYPE -{ - R2R = 0, // real to real (CCS) - C2R = 1, // complex to real - R2C = 2, // real to complex - C2C = 3 // complex to complex -}; - -typedef tuple DftParams; +typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), - Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), - Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE/*, (int)DFT_INVERSE, - (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), + Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, + (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE))) { const DftParams params = GetParam(); - const int dft_type = get<0>(params); - const Size srcSize = get<1>(params); - int flags = get<2>(params); - - int in_cn, out_cn; - switch (dft_type) - { - case R2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 1; out_cn = 1; break; - case C2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 2; out_cn = 2; break; - case R2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 1; out_cn = 2; break; - case C2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 2; out_cn = 2; break; - } + const Size srcSize = get<0>(params); + const int flags = get<1>(params); - UMat src(srcSize, CV_MAKE_TYPE(CV_32F, in_cn)), dst(srcSize, CV_MAKE_TYPE(CV_32F, out_cn)); + UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2); declare.in(src, WARMUP_RNG).out(dst); - OCL_TEST_CYCLE() cv::dft(src, dst, flags); + OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT); SANITY_CHECK(dst, 1e-3); } diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 869409f50d..cb0b118bca 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1781,6 +1781,377 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) #endif } +#ifdef HAVE_OPENCL + +namespace cv +{ + +enum FftType +{ + R2R = 0, + C2R = 1, + R2C = 2, + C2C = 3 +}; + +static void ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) +{ + int factors[34]; + int nf = DFTFactorize(cols, factors); + + int n = 1; + int factor_index = 0; + min_radix = INT_MAX; + + // 2^n transforms + if ((factors[factor_index] & 1) == 0) + { + for( ; n < factors[factor_index];) + { + int radix = 2, block = 1; + if (8*n <= factors[0]) + radix = 8; + else if (4*n <= factors[0]) + { + radix = 4; + if (cols % 12 == 0) + block = 3; + else if (cols % 8 == 0) + block = 2; + } + else + { + if (cols % 10 == 0) + block = 5; + else if (cols % 8 == 0) + block = 4; + else if (cols % 6 == 0) + block = 3; + else if (cols % 4 == 0) + block = 2; + } + + radixes.push_back(radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); + n *= radix; + } + factor_index++; + } + + // all the other transforms + for( ; factor_index < nf; factor_index++) + { + int radix = factors[factor_index], block = 1; + if (radix == 3) + { + if (cols % 12 == 0) + block = 4; + else if (cols % 9 == 0) + block = 3; + else if (cols % 6 == 0) + block = 2; + } + else if (radix == 5) + { + if (cols % 10 == 0) + block = 2; + } + radixes.push_back(radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); + } +} + +struct OCL_FftPlan +{ + UMat twiddles; + String buildOptions; + int thread_count; + + int dft_size; + bool status; + OCL_FftPlan(int _size): dft_size(_size), status(true) + { + int min_radix; + std::vector radixes, blocks; + ocl_getRadixes(dft_size, radixes, blocks, min_radix); + thread_count = dft_size / min_radix; + + if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize()) + { + status = false; + return; + } + + // generate string with radix calls + String radix_processing; + int n = 1, twiddle_size = 0; + for (size_t i=0; i 1) + radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix); + else + radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix); + twiddle_size += (radix-1)*n; + n *= radix; + } + + Mat tw(1, twiddle_size, CV_32FC2); + float* ptr = tw.ptr(); + int ptr_index = 0; + + n = 1; + for (size_t i=0; idft_size == dft_size) + { + return plan; + } + } + + OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size); + planStorage.push_back(newPlan); + return newPlan; + } + + ~OCL_FftPlanCache() + { + for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) + delete (*i); + planStorage.clear(); + } + +protected: + OCL_FftPlanCache() : + planStorage() + { + } + + std::vector planStorage; +}; + +static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) +{ + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols()); + return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); +} + +static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) +{ + const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows()); + return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); +} + +static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) +{ + int type = _src.type(), cn = CV_MAT_CN(type); + Size ssize = _src.size(); + if ( !(type == CV_32FC1 || type == CV_32FC2) ) + return false; + + // if is not a multiplication of prime numbers { 2, 3, 5 } + if (ssize.area() != getOptimalDFTSize(ssize.area())) + return false; + + UMat src = _src.getUMat(); + int complex_input = cn == 2 ? 1 : 0; + int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; + int real_input = cn == 1 ? 1 : 0; + int real_output = (flags & DFT_REAL_OUTPUT) != 0; + bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0; + + if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) + nonzero_rows = _src.rows(); + bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1; + + // if output format is not specified + if (complex_output + real_output == 0) + { + if (real_input) + real_output = 1; + else + complex_output = 1; + } + + FftType fftType = (FftType)(complex_input << 0 | complex_output << 1); + + // Forward Complex to CCS not supported + if (fftType == C2R && !inv) + fftType = C2C; + + // Inverse CCS to Complex not supported + if (fftType == R2C && inv) + fftType = R2R; + + UMat output; + if (fftType == C2C || fftType == R2C) + { + // complex output + _dst.create(src.size(), CV_32FC2); + output = _dst.getUMat(); + } + else + { + // real output + if (is1d) + { + _dst.create(src.size(), CV_32FC1); + output = _dst.getUMat(); + } + else + { + _dst.create(src.size(), CV_32FC1); + output.create(src.size(), CV_32FC2); + } + } + + if (!inv) + { + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + return false; + + if (!is1d) + { + int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols; + if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags, fftType)) + return false; + } + } + else + { + if (fftType == C2C) + { + // complex output + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + return false; + + if (!is1d) + { + if (!ocl_dft_C2C_cols(output, output, output.cols, flags, fftType)) + return false; + } + } + else + { + if (is1d) + { + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + return false; + } + else + { + int nonzero_cols = src.cols/2 + 1; + if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) + return false; + + if (!ocl_dft_C2C_rows(output, _dst, nonzero_rows, flags, fftType)) + return false; + } + } + } + return true; +} + +} // namespace cv; + +#endif + #ifdef HAVE_CLAMDFFT namespace cv { @@ -2011,7 +2382,6 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) tmpBuffer.addref(); clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u); - return true; } @@ -2021,381 +2391,6 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) #endif // HAVE_CLAMDFFT -namespace cv -{ - -#ifdef HAVE_OPENCL - -enum FftType -{ - R2R = 0, - C2R = 1, - R2C = 2, - C2C = 3 -}; - -static void ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) -{ - int factors[34]; - int nf = DFTFactorize(cols, factors); - - int n = 1; - int factor_index = 0; - min_radix = INT_MAX; - - // 2^n transforms - if ((factors[factor_index] & 1) == 0) - { - for( ; n < factors[factor_index];) - { - int radix = 2, block = 1; - if (8*n <= factors[0]) - radix = 8; - else if (4*n <= factors[0]) - { - radix = 4; - if (cols % 12 == 0) - block = 3; - else if (cols % 8 == 0) - block = 2; - } - else - { - if (cols % 10 == 0) - block = 5; - else if (cols % 8 == 0) - block = 4; - else if (cols % 6 == 0) - block = 3; - else if (cols % 4 == 0) - block = 2; - } - - radixes.push_back(radix); - blocks.push_back(block); - min_radix = min(min_radix, block*radix); - n *= radix; - } - factor_index++; - } - - // all the other transforms - for( ; factor_index < nf; factor_index++) - { - int radix = factors[factor_index], block = 1; - if (radix == 3) - { - if (cols % 12 == 0) - block = 4; - else if (cols % 9 == 0) - block = 3; - else if (cols % 6 == 0) - block = 2; - } - else if (radix == 5) - { - if (cols % 10 == 0) - block = 2; - } - radixes.push_back(radix); - blocks.push_back(block); - min_radix = min(min_radix, block*radix); - } -} - -struct OCL_FftPlan -{ - UMat twiddles; - String buildOptions; - int thread_count; - - int dft_size; - bool status; - OCL_FftPlan(int _size): dft_size(_size), status(true) - { - int min_radix; - std::vector radixes, blocks; - ocl_getRadixes(dft_size, radixes, blocks, min_radix); - thread_count = dft_size / min_radix; - - if (thread_count > ocl::Device::getDefault().maxWorkGroupSize()) - { - status = false; - return; - } - - // generate string with radix calls - String radix_processing; - int n = 1, twiddle_size = 0; - for (size_t i=0; i 1) - radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix); - else - radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix); - twiddle_size += (radix-1)*n; - n *= radix; - } - - Mat tw(1, twiddle_size, CV_32FC2); - float* ptr = tw.ptr(); - int ptr_index = 0; - - n = 1; - for (size_t i=0; idft_size == dft_size) - { - return plan; - } - } - - OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size); - planStorage.push_back(newPlan); - return newPlan; - } - - ~OCL_FftPlanCache() - { - for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) - delete (*i); - planStorage.clear(); - } - -protected: - OCL_FftPlanCache() : - planStorage() - { - } - - std::vector planStorage; -}; - -static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) -{ - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols()); - return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); -} - -static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) -{ - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows()); - return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); -} - -static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) -{ - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - Size ssize = _src.size(); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if ( (!doubleSupport && depth == CV_64F) || - !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2)) - return false; - - // if is not a multiplication of prime numbers { 2, 3, 5 } - if (ssize.area() != getOptimalDFTSize(ssize.area())) - return false; - - UMat src = _src.getUMat(); - int complex_input = cn == 2 ? 1 : 0; - int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; - int real_input = cn == 1 ? 1 : 0; - int real_output = (flags & DFT_REAL_OUTPUT) != 0; - bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0; - - if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) - nonzero_rows = _src.rows(); - bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1; - - // if output format is not specified - if (complex_output + real_output == 0) - { - if (real_input) - real_output = 1; - else - complex_output = 1; - } - - FftType fftType = (FftType)(complex_input << 0 | complex_output << 1); - - // Forward Complex to CCS not supported - if (fftType == C2R && !inv) - fftType = C2C; - - // Inverse CCS to Complex not supported - if (fftType == R2C && inv) - fftType = R2R; - - UMat output; - if (fftType == C2C || fftType == R2C) - { - // complex output - _dst.create(src.size(), CV_32FC2); - output = _dst.getUMat(); - } - else - { - // real output - if (is1d) - { - _dst.create(src.size(), CV_32FC1); - output = _dst.getUMat(); - } - else - { - _dst.create(src.size(), CV_32FC1); - output.create(src.size(), CV_32FC2); - } - } - - if (!inv) - { - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) - return false; - - if (!is1d) - { - int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols; - if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags, fftType)) - return false; - } - } - else - { - if (fftType == C2C) - { - // complex output - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) - return false; - - if (!is1d) - { - if (!ocl_dft_C2C_cols(output, output, output.cols, flags, fftType)) - return false; - } - } - else - { - if (is1d) - { - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) - return false; - } - else - { - int nonzero_cols = src.cols/2 + 1; - if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) - return false; - - if (!ocl_dft_C2C_rows(output, _dst, nonzero_rows, flags, fftType)) - return false; - } - } - } - return true; -} - -#endif - -} // namespace cv; - - - void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) { #ifdef HAVE_CLAMDFFT diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index a2110f6cc2..32db8c91b4 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], sync ? 0 : &p->e); if( sync || retval != CL_SUCCESS ) { - int a = clFinish(qq); - CV_OclDbgAssert(a == CL_SUCCESS); + CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS); p->cleanupUMats(); } else @@ -3899,9 +3898,8 @@ public: if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) { AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); - int a = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, alignedPtr.getAlignedPtr(), 0, 0, 0); - CV_Assert( a == CL_SUCCESS ); + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); u->markHostCopyObsolete(false); } } diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index b8d2c6716d..1cb2278c0d 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -6,36 +6,36 @@ #define fft5_5 0.363271264002f __attribute__((always_inline)) -float2 mul_float2(float2 a, float2 b) { - return (float2)(fma(a.x, b.x, -a.y * b.y), fma(a.x, b.y, a.y * b.x)); +float2 mul_float2(float2 a, float2 b) { + return (float2)(fma(a.x, b.x, -a.y * b.y), fma(a.x, b.y, a.y * b.x)); } __attribute__((always_inline)) -float2 twiddle(float2 a) { - return (float2)(a.y, -a.x); +float2 twiddle(float2 a) { + return (float2)(a.y, -a.x); } __attribute__((always_inline)) -void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float2* twiddles, - const int x, const int block_size) -{ +void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float2* twiddles, + const int x, const int block_size) +{ const int k = x & (block_size - 1); a1 = mul_float2(twiddles[k], a1); const int dst_ind = (x << 1) - k; - + smem[dst_ind] = a0 + a1; smem[dst_ind+block_size] = a0 - a1; } __attribute__((always_inline)) -void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __global const float2* twiddles, - const int x, const int block_size) +void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __global const float2* twiddles, + const int x, const int block_size) { const int k = x & (block_size - 1); a1 = mul_float2(twiddles[k], a1); a2 = mul_float2(twiddles[k + block_size], a2); a3 = mul_float2(twiddles[k + 2*block_size], a3); - + const int dst_ind = ((x - k) << 2) + k; float2 b0 = a0 + a2; @@ -50,9 +50,9 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem } __attribute__((always_inline)) -void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global const float2* twiddles, - const int x, const int block_size) -{ +void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global const float2* twiddles, + const int x, const int block_size) +{ const int k = x % block_size; a1 = mul_float2(twiddles[k], a1); a2 = mul_float2(twiddles[k+block_size], a2); @@ -69,8 +69,8 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global __attribute__((always_inline)) void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __global const float2* twiddles, - const int x, const int block_size) -{ + const int x, const int block_size) +{ const int k = x % block_size; a1 = mul_float2(twiddles[k], a1); a2 = mul_float2(twiddles[k + block_size], a2); @@ -95,7 +95,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f a4 = fft5_3 * (float2)(-a1.y - a3.y, a1.x + a3.x); b5 = (float2)(a4.x - fft5_5 * a1.y, a4.y + fft5_5 * a1.x); - a4.x += fft5_4 * a3.y; + a4.x += fft5_4 * a3.y; a4.y -= fft5_4 * a3.x; a1 = b0 + b1; @@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f } __attribute__((always_inline)) -void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) +void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t) { float2 a0, a1; @@ -122,13 +122,13 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int barrier(CLK_LOCAL_MEM_FENCE); if (x < t) - butterfly2(a0, a1, smem, twiddles, x, block_size); + butterfly2(a0, a1, smem, twiddles, x, block_size); barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((always_inline)) -void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/2; float2 a0, a1, a2, a3; @@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const } __attribute__((always_inline)) -void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int x2 = x1 + t/3; const int x3 = x1 + 2*t/3; @@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const } __attribute__((always_inline)) -void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int thread_block = t/4; const int x2 = x1 + thread_block; @@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const } __attribute__((always_inline)) -void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) +void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t) { const int thread_block = t/5; const int x2 = x1 + thread_block; @@ -326,7 +326,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int a7 = mul_float2(twiddles[k+6*block_size],smem[x+7*t]); float2 b0, b1, b6, b7; - + b0 = a0 + a4; a4 = a0 - a4; b1 = a1 + a5; @@ -335,7 +335,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int b6 = twiddle(a2 - a6); a2 = a2 + a6; b7 = a3 - a7; - b7 = (float2)(SQRT_2) * (float2)(-b7.x + b7.y, -b7.x - b7.y); + b7 = (float2)(SQRT_2) * (float2)(-b7.x + b7.y, -b7.x - b7.y); a3 = a3 + a7; a0 = b0 + a2; @@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, } else { + // fill with zero other rows +#ifdef COMPLEX_OUTPUT __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); +#else + __global float* dst = (__global float*)(dst_ptr + mad24(y, dst_step, dst_offset)); +#endif #pragma unroll for (int i=x; i(df) << std::endl; + OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); + OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); double eps = src.size().area() * 1e-4; EXPECT_MAT_NEAR(dst, udst, eps); @@ -185,15 +150,15 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)), - Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), - Bool(), // DFT_INVERSE +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), + cv::Size(512, 1), cv::Size(1024, 768)), + Values(CV_32F, CV_64F), + Bool(), // inplace Bool(), // DFT_ROWS Bool(), // DFT_SCALE - Bool() // hint - ) + Bool()) // DFT_INVERSE ); } } // namespace cvtest::ocl -#endif // HAVE_OPENCL \ No newline at end of file +#endif // HAVE_OPENCL From e51c0810b6bd6e5cdd934d172feb8b983bf7a407 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 24 Jul 2014 13:11:58 +0400 Subject: [PATCH 45/71] Added accuracy and performance tests for DFT all modes. --- modules/core/perf/opencl/perf_dxt.cpp | 37 +++++++++++---- modules/core/test/ocl/test_dft.cpp | 65 ++++++++++++++++++--------- 2 files changed, 73 insertions(+), 29 deletions(-) diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index d0219913b5..c0e41485e4 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -54,23 +54,42 @@ namespace ocl { ///////////// dft //////////////////////// -typedef tuple DftParams; +enum OCL_FFT_TYPE +{ + R2R = 0, + C2R = 1, + R2C = 2, + C2C = 3 +}; + +typedef tuple DftParams; typedef TestBaseWithParam DftFixture; -OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), - Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, - (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE))) +OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), + Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(512, 512), Size(1024, 1024), Size(2048, 2048)), + Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE, + (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE))) { const DftParams params = GetParam(); - const Size srcSize = get<0>(params); - const int flags = get<1>(params); + const int dft_type = get<0>(params); + const Size srcSize = get<1>(params); + int flags = get<2>(params); - UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2); + int in_cn, out_cn; + switch (dft_type) + { + case R2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 1; out_cn = 1; break; + case C2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 2; out_cn = 2; break; + case R2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 1; out_cn = 2; break; + case C2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 2; out_cn = 2; break; + } + + UMat src(srcSize, CV_MAKE_TYPE(CV_32F, in_cn)), dst(srcSize, CV_MAKE_TYPE(CV_32F, out_cn)); declare.in(src, WARMUP_RNG).out(dst); - OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT); + OCL_TEST_CYCLE() cv::dft(src, dst, flags); - SANITY_CHECK(dst, 1e-3); + SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); } ///////////// MulSpectrums //////////////////////// diff --git a/modules/core/test/ocl/test_dft.cpp b/modules/core/test/ocl/test_dft.cpp index 1f0e43b20e..cd0c1f07d0 100644 --- a/modules/core/test/ocl/test_dft.cpp +++ b/modules/core/test/ocl/test_dft.cpp @@ -48,17 +48,26 @@ #ifdef HAVE_OPENCL +enum OCL_FFT_TYPE +{ + R2R = 0, + C2R = 1, + R2C = 2, + C2C = 3 +}; + namespace cvtest { namespace ocl { //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool) +PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) { cv::Size dft_size; - int dft_flags, depth; - bool inplace; + int dft_flags, depth, cn, dft_type; + bool hint; + bool is1d; TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_OUTPUT_PARAMETER(dst); @@ -66,34 +75,50 @@ PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool) virtual void SetUp() { dft_size = GET_PARAM(0); - depth = GET_PARAM(1); - inplace = GET_PARAM(2); + dft_type = GET_PARAM(1); + depth = CV_32F; dft_flags = 0; + switch (dft_type) + { + case R2R: dft_flags |= cv::DFT_REAL_OUTPUT; cn = 1; break; + case C2R: dft_flags |= cv::DFT_REAL_OUTPUT; cn = 2; break; + case R2C: dft_flags |= cv::DFT_COMPLEX_OUTPUT; cn = 1; break; + case C2C: dft_flags |= cv::DFT_COMPLEX_OUTPUT; cn = 2; break; + } + + if (GET_PARAM(2)) + dft_flags |= cv::DFT_INVERSE; if (GET_PARAM(3)) dft_flags |= cv::DFT_ROWS; if (GET_PARAM(4)) dft_flags |= cv::DFT_SCALE; - if (GET_PARAM(5)) - dft_flags |= cv::DFT_INVERSE; + hint = GET_PARAM(5); + is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1; } - void generateTestData(int cn = 2) + void generateTestData() { src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0); usrc = src.getUMat(ACCESS_READ); - - if (inplace) - dst = src, udst = usrc; } }; -OCL_TEST_P(Dft, C2C) +OCL_TEST_P(Dft, Mat) { generateTestData(); - OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); - OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); + int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0; + OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows)); + OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows)); + + // In case forward R2C 1d tranform dst contains only half of output + // without complex conjugate + if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0) + { + dst = dst(cv::Range(0, dst.rows), cv::Range(0, dst.cols/2 + 1)); + udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1)); + } double eps = src.size().area() * 1e-4; EXPECT_MAT_NEAR(dst, udst, eps); @@ -150,15 +175,15 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), - cv::Size(512, 1), cv::Size(1024, 768)), - Values(CV_32F, CV_64F), - Bool(), // inplace +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)), + Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), + Bool(), // DFT_INVERSE Bool(), // DFT_ROWS Bool(), // DFT_SCALE - Bool()) // DFT_INVERSE + Bool() // hint + ) ); } } // namespace cvtest::ocl -#endif // HAVE_OPENCL +#endif // HAVE_OPENCL \ No newline at end of file From a98ce86ca32eef38037c9efa17c25ee9b0afa9fa Mon Sep 17 00:00:00 2001 From: Adrian Stratulat Date: Wed, 2 Oct 2013 19:23:42 +0000 Subject: [PATCH 46/71] Add NEON intrinsics to core/src/arithm.cpp --- modules/core/src/arithm.cpp | 109 ++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index b98bf830e4..29501a0715 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -54,21 +54,23 @@ namespace cv struct NOP {}; -#if CV_SSE2 +#if CV_SSE2 || CV_NEON #define FUNCTOR_TEMPLATE(name) \ template struct name {} FUNCTOR_TEMPLATE(VLoadStore128); +#if CV_SSE2 FUNCTOR_TEMPLATE(VLoadStore64); FUNCTOR_TEMPLATE(VLoadStore128Aligned); +#endif #endif template void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, Size sz) { -#if CV_SSE2 +#if CV_SSE2 || CV_NEON VOp vop; #endif Op op; @@ -79,9 +81,11 @@ void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, si { int x = 0; +#if CV_NEON || CV_SSE2 #if CV_SSE2 if( USE_SSE2 ) { +#endif for( ; x <= sz.width - 32/(int)sizeof(T); x += 32/sizeof(T) ) { typename VLoadStore128::reg_type r0 = VLoadStore128::load(src1 + x ); @@ -91,8 +95,10 @@ void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, si VLoadStore128::store(dst + x , r0); VLoadStore128::store(dst + x + 16/sizeof(T), r1); } +#if CV_SSE2 } #endif +#endif #if CV_SSE2 if( USE_SSE2 ) { @@ -125,7 +131,7 @@ template void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, Size sz) { -#if CV_SSE2 +#if CV_SSE2 || CV_NEON Op32 op32; #endif Op op; @@ -153,9 +159,11 @@ void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2, } } #endif +#if CV_NEON || CV_SSE2 #if CV_SSE2 if( USE_SSE2 ) { +#endif for( ; x <= sz.width - 8; x += 8 ) { typename VLoadStore128::reg_type r0 = VLoadStore128::load(src1 + x ); @@ -165,8 +173,10 @@ void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2, VLoadStore128::store(dst + x , r0); VLoadStore128::store(dst + x + 4, r1); } +#if CV_SSE2 } #endif +#endif #if CV_ENABLE_UNROLLED for( ; x <= sz.width - 4; x += 4 ) { @@ -383,7 +393,98 @@ FUNCTOR_TEMPLATE(VNot); FUNCTOR_CLOSURE_1arg(VNot, uchar, return _mm_xor_si128(_mm_set1_epi32(-1), a)); #endif -#if CV_SSE2 +#if CV_NEON + +#define FUNCTOR_LOADSTORE(name, template_arg, register_type, load_body, store_body)\ + template <> \ + struct name{ \ + typedef register_type reg_type; \ + static reg_type load(const template_arg * p) { return load_body (p);}; \ + static void store(template_arg * p, reg_type v) { store_body (p, v);}; \ + } + +#define FUNCTOR_CLOSURE_2arg(name, template_arg, body)\ + template<> \ + struct name \ + { \ + VLoadStore128::reg_type operator()( \ + VLoadStore128::reg_type a, \ + VLoadStore128::reg_type b) const \ + { \ + return body; \ + }; \ + } + +#define FUNCTOR_CLOSURE_1arg(name, template_arg, body)\ + template<> \ + struct name \ + { \ + VLoadStore128::reg_type operator()( \ + VLoadStore128::reg_type a, \ + VLoadStore128::reg_type ) const \ + { \ + return body; \ + }; \ + } + +FUNCTOR_LOADSTORE(VLoadStore128, uchar, uint8x16_t, vld1q_u8 , vst1q_u8 ); +FUNCTOR_LOADSTORE(VLoadStore128, schar, int8x16_t, vld1q_s8 , vst1q_s8 ); +FUNCTOR_LOADSTORE(VLoadStore128, ushort, uint16x8_t, vld1q_u16, vst1q_u16); +FUNCTOR_LOADSTORE(VLoadStore128, short, int16x8_t, vld1q_s16, vst1q_s16); +FUNCTOR_LOADSTORE(VLoadStore128, int, int32x4_t, vld1q_s32, vst1q_s32); +FUNCTOR_LOADSTORE(VLoadStore128, float, float32x4_t, vld1q_f32, vst1q_f32); + +FUNCTOR_TEMPLATE(VAdd); +FUNCTOR_CLOSURE_2arg(VAdd, uchar, vqaddq_u8 (a, b)); +FUNCTOR_CLOSURE_2arg(VAdd, schar, vqaddq_s8 (a, b)); +FUNCTOR_CLOSURE_2arg(VAdd, ushort, vqaddq_u16(a, b)); +FUNCTOR_CLOSURE_2arg(VAdd, short, vqaddq_s16(a, b)); +FUNCTOR_CLOSURE_2arg(VAdd, int, vaddq_s32 (a, b)); +FUNCTOR_CLOSURE_2arg(VAdd, float, vaddq_f32 (a, b)); + +FUNCTOR_TEMPLATE(VSub); +FUNCTOR_CLOSURE_2arg(VSub, uchar, vqsubq_u8 (a, b)); +FUNCTOR_CLOSURE_2arg(VSub, schar, vqsubq_s8 (a, b)); +FUNCTOR_CLOSURE_2arg(VSub, ushort, vqsubq_u16(a, b)); +FUNCTOR_CLOSURE_2arg(VSub, short, vqsubq_s16(a, b)); +FUNCTOR_CLOSURE_2arg(VSub, int, vsubq_s32 (a, b)); +FUNCTOR_CLOSURE_2arg(VSub, float, vsubq_f32 (a, b)); + +FUNCTOR_TEMPLATE(VMin); +FUNCTOR_CLOSURE_2arg(VMin, uchar, vminq_u8 (a, b)); +FUNCTOR_CLOSURE_2arg(VMin, schar, vminq_s8 (a, b)); +FUNCTOR_CLOSURE_2arg(VMin, ushort, vminq_u16(a, b)); +FUNCTOR_CLOSURE_2arg(VMin, short, vminq_s16(a, b)); +FUNCTOR_CLOSURE_2arg(VMin, int, vminq_s32(a, b)); +FUNCTOR_CLOSURE_2arg(VMin, float, vminq_f32(a, b)); + +FUNCTOR_TEMPLATE(VMax); +FUNCTOR_CLOSURE_2arg(VMax, uchar, vmaxq_u8 (a, b)); +FUNCTOR_CLOSURE_2arg(VMax, schar, vmaxq_s8 (a, b)); +FUNCTOR_CLOSURE_2arg(VMax, ushort, vmaxq_u16(a, b)); +FUNCTOR_CLOSURE_2arg(VMax, short, vmaxq_s16(a, b)); +FUNCTOR_CLOSURE_2arg(VMax, int, vmaxq_s32(a, b)); +FUNCTOR_CLOSURE_2arg(VMax, float, vmaxq_f32(a, b)); + +FUNCTOR_TEMPLATE(VAbsDiff); +FUNCTOR_CLOSURE_2arg(VAbsDiff, uchar, vabdq_u8 (a, b)); +FUNCTOR_CLOSURE_2arg(VAbsDiff, schar, vqabsq_s8 (vqsubq_s8(a, b))); +FUNCTOR_CLOSURE_2arg(VAbsDiff, ushort, vabdq_u16 (a, b)); +FUNCTOR_CLOSURE_2arg(VAbsDiff, short, vqabsq_s16(vqsubq_s16(a, b))); +FUNCTOR_CLOSURE_2arg(VAbsDiff, int, vabdq_s32 (a, b)); +FUNCTOR_CLOSURE_2arg(VAbsDiff, float, vabdq_f32 (a, b)); + +FUNCTOR_TEMPLATE(VAnd); +FUNCTOR_CLOSURE_2arg(VAnd, uchar, vandq_u8(a, b)); +FUNCTOR_TEMPLATE(VOr); +FUNCTOR_CLOSURE_2arg(VOr , uchar, vorrq_u8(a, b)); +FUNCTOR_TEMPLATE(VXor); +FUNCTOR_CLOSURE_2arg(VXor, uchar, veorq_u8(a, b)); +FUNCTOR_TEMPLATE(VNot); +FUNCTOR_CLOSURE_1arg(VNot, uchar, vmvnq_u8(a )); +#endif + +#if CV_SSE2 || CV_NEON #define IF_SIMD(op) op #else #define IF_SIMD(op) NOP From 37d01e2d27b6ed6eac7f9f179796a7574d43ae71 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Fri, 25 Jul 2014 13:11:35 +0400 Subject: [PATCH 47/71] Added license header, using cv::Ptr, small fixes. --- modules/core/include/opencv2/core/cvdef.h | 2 +- modules/core/src/dxt.cpp | 197 +++++++++++----------- modules/core/src/opencl/fft.cl | 47 +++--- 3 files changed, 126 insertions(+), 120 deletions(-) diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 765c54cbe1..1f64cd2ace 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -244,7 +244,7 @@ typedef signed char schar; /* fundamental constants */ #define CV_PI 3.1415926535897932384626433832795 -#define CV_TWO_PI 6.283185307179586476925286766559 +#define CV_2PI 6.283185307179586476925286766559 #define CV_LOG2 0.69314718055994530941723212145818 /****************************************************************************************\ diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index cb0b118bca..b57e4e8cc0 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1788,89 +1788,23 @@ namespace cv enum FftType { - R2R = 0, - C2R = 1, - R2C = 2, - C2C = 3 + R2R = 0, // real to CCS in case forward transform, CCS to real otherwise + C2R = 1, // complex to real in case inverse transform + R2C = 2, // real to complex in case forward transform + C2C = 3 // complex to complex }; -static void ocl_getRadixes(int cols, std::vector& radixes, std::vector& blocks, int& min_radix) -{ - int factors[34]; - int nf = DFTFactorize(cols, factors); - - int n = 1; - int factor_index = 0; - min_radix = INT_MAX; - - // 2^n transforms - if ((factors[factor_index] & 1) == 0) - { - for( ; n < factors[factor_index];) - { - int radix = 2, block = 1; - if (8*n <= factors[0]) - radix = 8; - else if (4*n <= factors[0]) - { - radix = 4; - if (cols % 12 == 0) - block = 3; - else if (cols % 8 == 0) - block = 2; - } - else - { - if (cols % 10 == 0) - block = 5; - else if (cols % 8 == 0) - block = 4; - else if (cols % 6 == 0) - block = 3; - else if (cols % 4 == 0) - block = 2; - } - - radixes.push_back(radix); - blocks.push_back(block); - min_radix = min(min_radix, block*radix); - n *= radix; - } - factor_index++; - } - - // all the other transforms - for( ; factor_index < nf; factor_index++) - { - int radix = factors[factor_index], block = 1; - if (radix == 3) - { - if (cols % 12 == 0) - block = 4; - else if (cols % 9 == 0) - block = 3; - else if (cols % 6 == 0) - block = 2; - } - else if (radix == 5) - { - if (cols % 10 == 0) - block = 2; - } - radixes.push_back(radix); - blocks.push_back(block); - min_radix = min(min_radix, block*radix); - } -} - struct OCL_FftPlan { +private: UMat twiddles; String buildOptions; int thread_count; - - int dft_size; bool status; + +public: + int dft_size; + OCL_FftPlan(int _size): dft_size(_size), status(true) { int min_radix; @@ -1910,7 +1844,7 @@ struct OCL_FftPlan for (int j=1; j& radixes, std::vector& blocks, int& min_radix) + { + int factors[34]; + int nf = DFTFactorize(cols, factors); + + int n = 1; + int factor_index = 0; + min_radix = INT_MAX; + + // 2^n transforms + if ((factors[factor_index] & 1) == 0) + { + for( ; n < factors[factor_index];) + { + int radix = 2, block = 1; + if (8*n <= factors[0]) + radix = 8; + else if (4*n <= factors[0]) + { + radix = 4; + if (cols % 12 == 0) + block = 3; + else if (cols % 8 == 0) + block = 2; + } + else + { + if (cols % 10 == 0) + block = 5; + else if (cols % 8 == 0) + block = 4; + else if (cols % 6 == 0) + block = 3; + else if (cols % 4 == 0) + block = 2; + } + + radixes.push_back(radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); + n *= radix; + } + factor_index++; + } + + // all the other transforms + for( ; factor_index < nf; factor_index++) + { + int radix = factors[factor_index], block = 1; + if (radix == 3) + { + if (cols % 12 == 0) + block = 4; + else if (cols % 9 == 0) + block = 3; + else if (cols % 6 == 0) + block = 2; + } + else if (radix == 5) + { + if (cols % 10 == 0) + block = 2; + } + radixes.push_back(radix); + blocks.push_back(block); + min_radix = min(min_radix, block*radix); + } + } }; class OCL_FftPlanCache @@ -1993,27 +1997,24 @@ public: return planCache; } - OCL_FftPlan* getFftPlan(int dft_size) + Ptr getFftPlan(int dft_size) { for (size_t i = 0, size = planStorage.size(); i < size; ++i) { - OCL_FftPlan * const plan = planStorage[i]; - + Ptr plan = planStorage[i]; if (plan->dft_size == dft_size) { return plan; } } - OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size); + Ptr newPlan = Ptr(new OCL_FftPlan(dft_size)); planStorage.push_back(newPlan); return newPlan; } ~OCL_FftPlanCache() { - for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) - delete (*i); planStorage.clear(); } @@ -2023,18 +2024,18 @@ protected: { } - std::vector planStorage; + std::vector > planStorage; }; -static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) +static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) { - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols()); + Ptr plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols()); return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); } -static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) +static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) { - const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows()); + Ptr plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows()); return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); } @@ -2103,13 +2104,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (!inv) { - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) return false; if (!is1d) { int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols; - if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags, fftType)) + if (!ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType)) return false; } } @@ -2118,12 +2119,12 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (fftType == C2C) { // complex output - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) return false; if (!is1d) { - if (!ocl_dft_C2C_cols(output, output, output.cols, flags, fftType)) + if (!ocl_dft_cols(output, output, output.cols, flags, fftType)) return false; } } @@ -2131,16 +2132,16 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro { if (is1d) { - if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags, fftType)) + if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) return false; } else { int nonzero_cols = src.cols/2 + 1; - if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) + if (!ocl_dft_cols(src, output, nonzero_cols, flags, fftType)) return false; - if (!ocl_dft_C2C_rows(output, _dst, nonzero_rows, flags, fftType)) + if (!ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType)) return false; } } @@ -2286,7 +2287,7 @@ public: } // no baked plan is found, so let's create a new one - FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType); + Ptr newPlan = Ptr(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType)); planStorage.push_back(newPlan); return newPlan->plHandle; @@ -2294,8 +2295,6 @@ public: ~PlanCache() { - for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) - delete (*i); planStorage.clear(); } @@ -2305,7 +2304,7 @@ protected: { } - std::vector planStorage; + std::vector > planStorage; }; extern "C" { diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 1cb2278c0d..1268c4d6e4 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -1,3 +1,10 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + #define SQRT_2 0.707106781188f #define sin_120 0.866025403784f #define fft5_2 0.559016994374f @@ -509,9 +516,9 @@ void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const } #ifdef DFT_SCALE -#define VAL(x, scale) x*scale +#define SCALE_VAL(x, scale) x*scale #else -#define VAL(x, scale) x +#define SCALE_VAL(x, scale) x #endif __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, @@ -558,15 +565,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); #pragma unroll for (int i=x; i Date: Fri, 25 Jul 2014 14:51:30 +0400 Subject: [PATCH 48/71] Fixed bug in reduce.cl --- modules/core/src/opencl/reduce.cl | 2 +- modules/core/src/stat.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index f16a742e54..c89f1cf005 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -379,7 +379,7 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = SUM_ABS2(temp, temp2)); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 48d85900d9..60c8dc89bb 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -482,6 +482,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src) : 1, mcn = std::max(cn, kercn); CV_Assert(!haveSrc2 || _src2.type() == type); + int convert_cn = haveSrc2 ? mcn : cn; if ( (!doubleSupport && depth == CV_64F) || cn > 4 ) return false; @@ -513,7 +514,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "", haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", - depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, mcn, cvt[1]) : "noconvert"); + depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert"); ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); if (k.empty()) From d25770ee76ddfb82b15998d39604fa34324b3482 Mon Sep 17 00:00:00 2001 From: Sander Mathijs van Veen Date: Sat, 26 Jul 2014 16:40:09 +0200 Subject: [PATCH 49/71] Fix Python 2 bindings for findContours The sanity check prevents Python 2 from calling findContours, because the Python 2 bindings pass an empty matrix for `_contours`. If `_contours` is not empty, the channel and depth check should determine whether the matrix is of the correct type and size. Note: the Python 2 bindings passes a matrix of type `STD_VECTOR_MAT` to parameters of the type `OutputArrayOfArrays`. --- modules/imgproc/src/contours.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/contours.cpp b/modules/imgproc/src/contours.cpp index 5ea4a07c61..ee7f21ac88 100644 --- a/modules/imgproc/src/contours.cpp +++ b/modules/imgproc/src/contours.cpp @@ -1704,8 +1704,10 @@ void cv::findContours( InputOutputArray _image, OutputArrayOfArrays _contours, OutputArray _hierarchy, int mode, int method, Point offset ) { // Sanity check: output must be of type vector> - CV_Assert( _contours.kind() == _InputArray::STD_VECTOR_VECTOR && - _contours.channels() == 2 && _contours.depth() == CV_32S ); + CV_Assert((_contours.kind() == _InputArray::STD_VECTOR_VECTOR || _contours.kind() == _InputArray::STD_VECTOR_MAT || + _contours.kind() == _InputArray::STD_VECTOR_UMAT)); + + CV_Assert(_contours.empty() || (_contours.channels() == 2 && _contours.depth() == CV_32S)); Mat image = _image.getMat(); MemStorage storage(cvCreateMemStorage()); From 1d1bfd3d7d305339342bf2b0c752c18aae9694d8 Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Tue, 22 Jul 2014 12:36:36 +0400 Subject: [PATCH 50/71] automatic search for headers for python wrapper generation --- modules/python/CMakeLists.txt | 41 +++++++++++++++-------------------- modules/python/src2/gen2.py | 4 +++- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index af062cebc7..4eaa885254 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -19,31 +19,24 @@ ocv_module_include_directories( "${CMAKE_CURRENT_SOURCE_DIR}/src2" ) -set(opencv_hdrs - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core.hpp" - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/base.hpp" - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/types.hpp" - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/persistence.hpp" - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/utility.hpp" - "${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/ocl.hpp" - "${OPENCV_MODULE_opencv_flann_LOCATION}/include/opencv2/flann/miniflann.hpp" - "${OPENCV_MODULE_opencv_imgproc_LOCATION}/include/opencv2/imgproc.hpp" - "${OPENCV_MODULE_opencv_video_LOCATION}/include/opencv2/video/background_segm.hpp" - "${OPENCV_MODULE_opencv_video_LOCATION}/include/opencv2/video/tracking.hpp" - "${OPENCV_MODULE_opencv_photo_LOCATION}/include/opencv2/photo.hpp" - "${OPENCV_MODULE_opencv_imgcodecs_LOCATION}/include/opencv2/imgcodecs.hpp" - "${OPENCV_MODULE_opencv_videoio_LOCATION}/include/opencv2/videoio.hpp" - "${OPENCV_MODULE_opencv_highgui_LOCATION}/include/opencv2/highgui.hpp" - "${OPENCV_MODULE_opencv_ml_LOCATION}/include/opencv2/ml.hpp" - "${OPENCV_MODULE_opencv_features2d_LOCATION}/include/opencv2/features2d.hpp" - "${OPENCV_MODULE_opencv_calib3d_LOCATION}/include/opencv2/calib3d.hpp" - "${OPENCV_MODULE_opencv_objdetect_LOCATION}/include/opencv2/objdetect.hpp" - ) -if(HAVE_opencv_nonfree) - list(APPEND opencv_hdrs "${OPENCV_MODULE_opencv_nonfree_LOCATION}/include/opencv2/nonfree/features2d.hpp" - "${OPENCV_MODULE_opencv_nonfree_LOCATION}/include/opencv2/nonfree.hpp") -endif() +set(opencv_hdrs_raw) +foreach(mod_name IN LISTS OPENCV_MODULE_opencv_python_DEPS) + list(APPEND opencv_hdrs_raw "${OPENCV_MODULE_${mod_name}_HEADERS}") +endforeach(mod_name) + +# remove problematic headers +set(opencv_hdrs) +foreach(hdr IN LISTS opencv_hdrs_raw) + if(NOT ${hdr} MATCHES ".h$") + if(NOT ${hdr} MATCHES "opencv2/core/cuda") + if(NOT ${hdr} MATCHES "opencv2/objdetect/detection_based_tracker.hpp") + list(APPEND opencv_hdrs ${hdr}) + endif() + endif() + endif() +endforeach(hdr) + set(cv2_generated_hdrs "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_include.h" diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index b613ccd4ac..5666355bb6 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -831,8 +831,10 @@ class PythonWrapperGenerator(object): # step 1: scan the headers and build more descriptive maps of classes, consts, functions for hdr in srcfiles: - self.code_include.write( '#include "{}"\n'.format(hdr[hdr.rindex('opencv2/'):]) ) decls = parser.parse(hdr) + if len(decls) == 0: + continue + self.code_include.write( '#include "{}"\n'.format(hdr[hdr.rindex('opencv2/'):]) ) for decl in decls: name = decl[0] if name.startswith("struct") or name.startswith("class"): From 964657a155899f40ad5fedd7c3afd248566d860d Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Tue, 22 Jul 2014 14:22:46 +0400 Subject: [PATCH 51/71] glob for python module dependences --- modules/python/CMakeLists.txt | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index 4eaa885254..3da937fccc 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -11,7 +11,26 @@ if(ANDROID OR IOS OR NOT PYTHONLIBS_FOUND OR NOT PYTHON_NUMPY_INCLUDE_DIRS) endif() set(the_description "The python bindings") -ocv_add_module(python BINDINGS opencv_core opencv_flann opencv_imgproc opencv_video opencv_ml opencv_features2d opencv_imgcodecs opencv_videoio opencv_highgui opencv_calib3d opencv_photo opencv_objdetect OPTIONAL opencv_nonfree) + +set(candidate_deps_raw) +foreach(mp ${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) + file(GLOB names "${mp}/*") + list(APPEND candidate_deps_raw ${names}) +endforeach(mp) + +set(candidate_deps) +foreach(m IN LISTS candidate_deps_raw) + if(IS_DIRECTORY ${m}) + get_filename_component(m ${m} NAME) + if (NOT ${m} MATCHES "^cud(a|ev)") + if (NOT ${m} MATCHES "optim") # unless we handle namespaces + list(APPEND candidate_deps "opencv_${m}") + endif() + endif() + endif() +endforeach(m) + +ocv_add_module(python BINDINGS OPTIONAL ${candidate_deps}) ocv_module_include_directories( "${PYTHON_INCLUDE_PATH}" @@ -21,9 +40,9 @@ ocv_module_include_directories( set(opencv_hdrs_raw) -foreach(mod_name IN LISTS OPENCV_MODULE_opencv_python_DEPS) - list(APPEND opencv_hdrs_raw "${OPENCV_MODULE_${mod_name}_HEADERS}") -endforeach(mod_name) +foreach(m IN LISTS OPENCV_MODULE_opencv_python_DEPS) + list(APPEND opencv_hdrs_raw "${OPENCV_MODULE_${m}_HEADERS}") +endforeach(m) # remove problematic headers set(opencv_hdrs) From 42ecfc5538b3da27f3ec4c0d5cd3c214547f8a6b Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Wed, 23 Jul 2014 14:54:04 +0400 Subject: [PATCH 52/71] fixed parsing enums with tailing comma gen2.py reports headers it processes cmake file restructured --- modules/python/CMakeLists.txt | 56 +++++++++++++++---------------- modules/python/src2/gen2.py | 1 + modules/python/src2/hdr_parser.py | 2 ++ 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index 3da937fccc..cccf0c3915 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -12,24 +12,19 @@ endif() set(the_description "The python bindings") -set(candidate_deps_raw) +set(candidate_deps) foreach(mp ${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) file(GLOB names "${mp}/*") - list(APPEND candidate_deps_raw ${names}) + foreach(m IN LISTS names) + if(IS_DIRECTORY ${m}) + get_filename_component(m ${m} NAME) + if (NOT ${m} MATCHES "^cud(a|ev)") + list(APPEND candidate_deps "opencv_${m}") + endif() + endif() + endforeach(m) endforeach(mp) -set(candidate_deps) -foreach(m IN LISTS candidate_deps_raw) - if(IS_DIRECTORY ${m}) - get_filename_component(m ${m} NAME) - if (NOT ${m} MATCHES "^cud(a|ev)") - if (NOT ${m} MATCHES "optim") # unless we handle namespaces - list(APPEND candidate_deps "opencv_${m}") - endif() - endif() - endif() -endforeach(m) - ocv_add_module(python BINDINGS OPTIONAL ${candidate_deps}) ocv_module_include_directories( @@ -39,23 +34,26 @@ ocv_module_include_directories( ) -set(opencv_hdrs_raw) -foreach(m IN LISTS OPENCV_MODULE_opencv_python_DEPS) - list(APPEND opencv_hdrs_raw "${OPENCV_MODULE_${m}_HEADERS}") -endforeach(m) +set(opencv_hdrs_blacklist + ".h$" + "opencv2/core/cuda" + "opencv2/objdetect/detection_based_tracker.hpp" + "opencv2/optim.hpp") -# remove problematic headers set(opencv_hdrs) -foreach(hdr IN LISTS opencv_hdrs_raw) - if(NOT ${hdr} MATCHES ".h$") - if(NOT ${hdr} MATCHES "opencv2/core/cuda") - if(NOT ${hdr} MATCHES "opencv2/objdetect/detection_based_tracker.hpp") - list(APPEND opencv_hdrs ${hdr}) - endif() - endif() - endif() -endforeach(hdr) - +foreach(m IN LISTS OPENCV_MODULE_opencv_python_DEPS) + foreach(hdr IN LISTS OPENCV_MODULE_${m}_HEADERS) + set(good TRUE) + foreach(s IN LISTS opencv_hdrs_blacklist) + if (${hdr} MATCHES ${s}) + set(good FALSE) + endif() + endforeach(s) + if(${good}) + list(APPEND opencv_hdrs ${hdr}) + endif() + endforeach(hdr) +endforeach(m) set(cv2_generated_hdrs "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_include.h" diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 5666355bb6..9488107715 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -831,6 +831,7 @@ class PythonWrapperGenerator(object): # step 1: scan the headers and build more descriptive maps of classes, consts, functions for hdr in srcfiles: + print(hdr) decls = parser.parse(hdr) if len(decls) == 0: continue diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index 92f1b7347c..eb9100928f 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -206,6 +206,8 @@ class CppHeaderParser(object): def parse_enum(self, decl_str): l = decl_str ll = l.split(",") + if ll[-1].strip() == "": + ll = ll[:-1] prev_val = "" prev_val_delta = -1 decl = [] From 9ab3b89c53aa0153111398b08f880f68bb7e497d Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Sun, 27 Jul 2014 16:35:35 +0400 Subject: [PATCH 53/71] pass header list to gen2.py by a text file (avoid command line length limit on windows) --- modules/python/CMakeLists.txt | 36 +++++++++++++---------------------- modules/python/src2/gen2.py | 3 +-- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index cccf0c3915..57a8ed7b4f 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -12,18 +12,17 @@ endif() set(the_description "The python bindings") -set(candidate_deps) +set(candidate_deps "") foreach(mp ${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) file(GLOB names "${mp}/*") foreach(m IN LISTS names) if(IS_DIRECTORY ${m}) get_filename_component(m ${m} NAME) - if (NOT ${m} MATCHES "^cud(a|ev)") - list(APPEND candidate_deps "opencv_${m}") - endif() + list(APPEND candidate_deps "opencv_${m}") endif() endforeach(m) endforeach(mp) +ocv_list_filterout(candidate_deps "^opencv_cud(a|ev)") ocv_add_module(python BINDINGS OPTIONAL ${candidate_deps}) @@ -34,27 +33,16 @@ ocv_module_include_directories( ) -set(opencv_hdrs_blacklist - ".h$" - "opencv2/core/cuda" - "opencv2/objdetect/detection_based_tracker.hpp" - "opencv2/optim.hpp") - -set(opencv_hdrs) +set(opencv_hdrs "") foreach(m IN LISTS OPENCV_MODULE_opencv_python_DEPS) - foreach(hdr IN LISTS OPENCV_MODULE_${m}_HEADERS) - set(good TRUE) - foreach(s IN LISTS opencv_hdrs_blacklist) - if (${hdr} MATCHES ${s}) - set(good FALSE) - endif() - endforeach(s) - if(${good}) - list(APPEND opencv_hdrs ${hdr}) - endif() - endforeach(hdr) + list(APPEND opencv_hdrs ${OPENCV_MODULE_${m}_HEADERS}) endforeach(m) +ocv_list_filterout(opencv_hdrs ".h$") +ocv_list_filterout(opencv_hdrs "opencv2/core/cuda") +ocv_list_filterout(opencv_hdrs "opencv2/objdetect/detection_based_tracker.hpp") +ocv_list_filterout(opencv_hdrs "opencv2/optim.hpp") + set(cv2_generated_hdrs "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_include.h" "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_funcs.h" @@ -63,11 +51,13 @@ set(cv2_generated_hdrs "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_type_reg.h" "${CMAKE_CURRENT_BINARY_DIR}/pyopencv_generated_const_reg.h") +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${opencv_hdrs}") add_custom_command( OUTPUT ${cv2_generated_hdrs} - COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src2/gen2.py" ${CMAKE_CURRENT_BINARY_DIR} ${opencv_hdrs} + COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src2/gen2.py" ${CMAKE_CURRENT_BINARY_DIR} "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src2/gen2.py DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src2/hdr_parser.py + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt DEPENDS ${opencv_hdrs}) add_library(${the_module} SHARED src2/cv2.cpp ${cv2_generated_hdrs}) diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 9488107715..684b80f4e8 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -831,7 +831,6 @@ class PythonWrapperGenerator(object): # step 1: scan the headers and build more descriptive maps of classes, consts, functions for hdr in srcfiles: - print(hdr) decls = parser.parse(hdr) if len(decls) == 0: continue @@ -904,6 +903,6 @@ if __name__ == "__main__": if len(sys.argv) > 1: dstdir = sys.argv[1] if len(sys.argv) > 2: - srcfiles = sys.argv[2:] + srcfiles = open(sys.argv[2], 'r').read().split(';') generator = PythonWrapperGenerator() generator.gen(srcfiles, dstdir) From a2ae1db31c8a831c254952550786b3c589319285 Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Sun, 27 Jul 2014 17:41:12 +0400 Subject: [PATCH 54/71] 'adas' extra module doesn't produce .lib on windows (causes link error) --- modules/python/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index 57a8ed7b4f..c41d19a3f9 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -23,6 +23,8 @@ foreach(mp ${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) endforeach(m) endforeach(mp) ocv_list_filterout(candidate_deps "^opencv_cud(a|ev)") +ocv_list_filterout(candidate_deps "^opencv_adas$") + ocv_add_module(python BINDINGS OPTIONAL ${candidate_deps}) From 5f47f70183612d6a1228e5dd3d97529230fa4b23 Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Sun, 27 Jul 2014 18:03:27 +0400 Subject: [PATCH 55/71] module blacklist --- modules/python/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index c41d19a3f9..66ac1addd6 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -22,8 +22,11 @@ foreach(mp ${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) endif() endforeach(m) endforeach(mp) + +# module blacklist ocv_list_filterout(candidate_deps "^opencv_cud(a|ev)") ocv_list_filterout(candidate_deps "^opencv_adas$") +ocv_list_filterout(candidate_deps "^opencv_tracking$") ocv_add_module(python BINDINGS OPTIONAL ${candidate_deps}) @@ -40,6 +43,7 @@ foreach(m IN LISTS OPENCV_MODULE_opencv_python_DEPS) list(APPEND opencv_hdrs ${OPENCV_MODULE_${m}_HEADERS}) endforeach(m) +# header blacklist ocv_list_filterout(opencv_hdrs ".h$") ocv_list_filterout(opencv_hdrs "opencv2/core/cuda") ocv_list_filterout(opencv_hdrs "opencv2/objdetect/detection_based_tracker.hpp") From fc8f0e341b2b33d8f116e215b180b8474c7fb537 Mon Sep 17 00:00:00 2001 From: Kobi Gurkan Date: Sun, 27 Jul 2014 17:09:05 +0300 Subject: [PATCH 56/71] adding support for 16-bit 4 channel tiffs - when IMREAD_UNCHANGED is set in flags, the tiff is read with 4 channels - adding test for read/write --- modules/imgcodecs/src/grfmt_tiff.cpp | 17 ++++++++++++++++- modules/imgcodecs/test/test_grfmt.cpp | 6 ++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index 9013c39d11..3ec68c8669 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -158,7 +158,7 @@ bool TiffDecoder::readHeader() m_type = CV_MAKETYPE(CV_8U, photometric > 1 ? wanted_channels : 1); break; case 16: - m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? 3 : 1); + m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? wanted_channels : 1); break; case 32: @@ -326,6 +326,21 @@ bool TiffDecoder::readData( Mat& img ) (ushort*)(data + img.step*i) + x*3, 0, cvSize(tile_width,1) ); } + else if (ncn == 4) + { + if (wanted_channels == 4) + { + icvCvt_BGRA2RGBA_16u_C4R(buffer16 + i*tile_width0*ncn, 0, + (ushort*)(data + img.step*i) + x * 4, 0, + cvSize(tile_width, 1)); + } + else + { + icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, + (ushort*)(data + img.step*i) + x * 3, 0, + cvSize(tile_width, 1), 2); + } + } else { icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 9b06c5744c..37c9bf879c 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -139,9 +139,6 @@ public: string filename = cv::tempfile(".jpg"); imwrite(filename, img); - img = imread(filename, IMREAD_UNCHANGED); - - filename = string(ts->get_data_path() + "readwrite/test_" + char(k + 48) + "_c" + char(num_channels + 48) + ".jpg"); ts->printf(ts->LOG, "reading test image : %s\n", filename.c_str()); Mat img_test = imread(filename, IMREAD_UNCHANGED); @@ -160,8 +157,9 @@ public: #endif #ifdef HAVE_TIFF - for (int num_channels = 1; num_channels <= 3; num_channels+=2) + for (int num_channels = 1; num_channels <= 4; num_channels++) { + if (num_channels == 2) continue; // tiff ts->printf(ts->LOG, "image type depth:%d channels:%d ext: %s\n", CV_16U, num_channels, ".tiff"); Mat img(img_r * k, img_c * k, CV_MAKETYPE(CV_16U, num_channels), Scalar::all(0)); From 6cac76bfe4400c56bcdc622d15fb4cb3f3f50f36 Mon Sep 17 00:00:00 2001 From: Kobi Gurkan Date: Sun, 27 Jul 2014 17:14:54 +0300 Subject: [PATCH 57/71] fixes tab indentation - replacing with 4 spaces --- modules/imgcodecs/src/grfmt_tiff.cpp | 32 +++++++++++++-------------- modules/imgcodecs/test/test_grfmt.cpp | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index 3ec68c8669..06b2ab6a15 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -158,7 +158,7 @@ bool TiffDecoder::readHeader() m_type = CV_MAKETYPE(CV_8U, photometric > 1 ? wanted_channels : 1); break; case 16: - m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? wanted_channels : 1); + m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? wanted_channels : 1); break; case 32: @@ -326,21 +326,21 @@ bool TiffDecoder::readData( Mat& img ) (ushort*)(data + img.step*i) + x*3, 0, cvSize(tile_width,1) ); } - else if (ncn == 4) - { - if (wanted_channels == 4) - { - icvCvt_BGRA2RGBA_16u_C4R(buffer16 + i*tile_width0*ncn, 0, - (ushort*)(data + img.step*i) + x * 4, 0, - cvSize(tile_width, 1)); - } - else - { - icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, - (ushort*)(data + img.step*i) + x * 3, 0, - cvSize(tile_width, 1), 2); - } - } + else if (ncn == 4) + { + if (wanted_channels == 4) + { + icvCvt_BGRA2RGBA_16u_C4R(buffer16 + i*tile_width0*ncn, 0, + (ushort*)(data + img.step*i) + x * 4, 0, + cvSize(tile_width, 1)); + } + else + { + icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, + (ushort*)(data + img.step*i) + x * 3, 0, + cvSize(tile_width, 1), 2); + } + } else { icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 37c9bf879c..02e84d67f5 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -159,7 +159,7 @@ public: #ifdef HAVE_TIFF for (int num_channels = 1; num_channels <= 4; num_channels++) { - if (num_channels == 2) continue; + if (num_channels == 2) continue; // tiff ts->printf(ts->LOG, "image type depth:%d channels:%d ext: %s\n", CV_16U, num_channels, ".tiff"); Mat img(img_r * k, img_c * k, CV_MAKETYPE(CV_16U, num_channels), Scalar::all(0)); From 586ace1873fbf2f588b2edce29e6917c73c06e67 Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Sun, 27 Jul 2014 18:25:42 +0400 Subject: [PATCH 58/71] double to float cast warning fix --- modules/shape/include/opencv2/shape/hist_cost.hpp | 8 ++++---- modules/shape/include/opencv2/shape/shape_distance.hpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/shape/include/opencv2/shape/hist_cost.hpp b/modules/shape/include/opencv2/shape/hist_cost.hpp index 9ca3825fda..0ff3573eea 100644 --- a/modules/shape/include/opencv2/shape/hist_cost.hpp +++ b/modules/shape/include/opencv2/shape/hist_cost.hpp @@ -73,7 +73,7 @@ public: }; CV_EXPORTS_W Ptr - createNormHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2); + createNormHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2f); /*! */ class CV_EXPORTS_W EMDHistogramCostExtractor : public HistogramCostExtractor @@ -84,20 +84,20 @@ public: }; CV_EXPORTS_W Ptr - createEMDHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2); + createEMDHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2f); /*! */ class CV_EXPORTS_W ChiHistogramCostExtractor : public HistogramCostExtractor {}; -CV_EXPORTS_W Ptr createChiHistogramCostExtractor(int nDummies=25, float defaultCost=0.2); +CV_EXPORTS_W Ptr createChiHistogramCostExtractor(int nDummies=25, float defaultCost=0.2f); /*! */ class CV_EXPORTS_W EMDL1HistogramCostExtractor : public HistogramCostExtractor {}; CV_EXPORTS_W Ptr - createEMDL1HistogramCostExtractor(int nDummies=25, float defaultCost=0.2); + createEMDL1HistogramCostExtractor(int nDummies=25, float defaultCost=0.2f); } // cv #endif diff --git a/modules/shape/include/opencv2/shape/shape_distance.hpp b/modules/shape/include/opencv2/shape/shape_distance.hpp index 55e21aaa4a..acdb6e5f6e 100644 --- a/modules/shape/include/opencv2/shape/shape_distance.hpp +++ b/modules/shape/include/opencv2/shape/shape_distance.hpp @@ -116,7 +116,7 @@ public: /* Complete constructor */ CV_EXPORTS_W Ptr createShapeContextDistanceExtractor(int nAngularBins=12, int nRadialBins=4, - float innerRadius=0.2, float outerRadius=2, int iterations=3, + float innerRadius=0.2f, float outerRadius=2, int iterations=3, const Ptr &comparer = createChiHistogramCostExtractor(), const Ptr &transformer = createThinPlateSplineShapeTransformer()); @@ -137,7 +137,7 @@ public: }; /* Constructor */ -CV_EXPORTS_W Ptr createHausdorffDistanceExtractor(int distanceFlag=cv::NORM_L2, float rankProp=0.6); +CV_EXPORTS_W Ptr createHausdorffDistanceExtractor(int distanceFlag=cv::NORM_L2, float rankProp=0.6f); } // cv #endif From ba706177cbc0ffd48f6bfc32352cb3046f0d9f57 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 28 Jul 2014 12:13:52 +0400 Subject: [PATCH 59/71] fix cudacodec module dependecies --- modules/cudacodec/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudacodec/CMakeLists.txt b/modules/cudacodec/CMakeLists.txt index ca62995505..5d8f7327c0 100644 --- a/modules/cudacodec/CMakeLists.txt +++ b/modules/cudacodec/CMakeLists.txt @@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef) -ocv_add_module(cudacodec OPTIONAL opencv_cudev) +ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev) ocv_module_include_directories() ocv_glob_module_sources() From 27d97aa231b9b936298a57c16ac2d9528cd30ddc Mon Sep 17 00:00:00 2001 From: Elena Gvozdeva Date: Mon, 28 Jul 2014 12:21:00 +0400 Subject: [PATCH 60/71] fixed --- modules/core/src/stat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 60c8dc89bb..a26d3345c3 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -479,7 +479,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask haveMask = _mask.kind() != _InputArray::NONE, haveSrc2 = _src2.kind() != _InputArray::NONE; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src) : 1, + kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1, mcn = std::max(cn, kercn); CV_Assert(!haveSrc2 || _src2.type() == type); int convert_cn = haveSrc2 ? mcn : cn; From fecfaf4092fec486aa7c6ea2f4e3121500180afe Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Sun, 27 Jul 2014 13:31:46 +0400 Subject: [PATCH 61/71] Using std::map in PlanCache --- modules/core/src/dxt.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index b57e4e8cc0..69cac1a0fc 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -43,6 +43,7 @@ #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" #include "opencv2/core/opencl/runtime/opencl_core.hpp" #include "opencl_kernels.hpp" +#include namespace cv { @@ -1801,10 +1802,9 @@ private: String buildOptions; int thread_count; bool status; - -public: int dft_size; +public: OCL_FftPlan(int _size): dft_size(_size), status(true) { int min_radix; @@ -1999,18 +1999,17 @@ public: Ptr getFftPlan(int dft_size) { - for (size_t i = 0, size = planStorage.size(); i < size; ++i) + std::map >::iterator f = planStorage.find(dft_size); + if (f != planStorage.end()) { - Ptr plan = planStorage[i]; - if (plan->dft_size == dft_size) - { - return plan; - } + return f->second; + } + else + { + Ptr newPlan = Ptr(new OCL_FftPlan(dft_size)); + planStorage[dft_size] = newPlan; + return newPlan; } - - Ptr newPlan = Ptr(new OCL_FftPlan(dft_size)); - planStorage.push_back(newPlan); - return newPlan; } ~OCL_FftPlanCache() @@ -2023,8 +2022,7 @@ protected: planStorage() { } - - std::vector > planStorage; + std::map > planStorage; }; static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) From f42bb0bd6528c7e77a2872158a0577b7e713cd63 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 28 Jul 2014 15:45:54 +0400 Subject: [PATCH 62/71] disable OpenCL difference norms with mask, since some of the tests fail --- modules/core/src/stat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index a26d3345c3..a1415652cc 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -2540,7 +2540,7 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr normType &= ~NORM_RELATIVE; bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; - if ( !(normType == NORM_INF || normsum) ) + if ( !(normType == NORM_INF || normsum) || !_mask.empty() ) return false; if (normsum) From 1ad9827fc4ede1b9c42515569fcc5d8d1106a4ea Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 28 Jul 2014 16:48:53 +0400 Subject: [PATCH 63/71] removed ERFilter (to be moved to opencv_contrib/modules/text) and lineMOD (to be moved to opencv_contrib/modules/rgbd) --- modules/objdetect/doc/erfilter.rst | 211 -- modules/objdetect/doc/latent_svm.rst | 262 -- modules/objdetect/doc/objdetect.rst | 1 - modules/objdetect/doc/pics/component_tree.png | Bin 108183 -> 0 bytes .../objdetect/include/opencv2/objdetect.hpp | 2 - .../include/opencv2/objdetect/erfilter.hpp | 266 -- .../include/opencv2/objdetect/linemod.hpp | 455 --- modules/objdetect/src/erfilter.cpp | 3187 ----------------- modules/objdetect/src/linemod.cpp | 1844 ---------- modules/objdetect/src/normal_lut.i | 4 - .../MacOSX/FaceTracker/FaceTracker-Info.plist | 20 - samples/MacOSX/FaceTracker/FaceTracker.cpp | 86 - .../FaceTracker.xcodeproj/project.pbxproj | Bin 9202 -> 0 bytes samples/MacOSX/FaceTracker/README.txt | 35 - samples/cpp/linemod.cpp | 705 ---- samples/cpp/scenetext01.jpg | Bin 97100 -> 0 bytes samples/cpp/scenetext02.jpg | Bin 95135 -> 0 bytes samples/cpp/scenetext03.jpg | Bin 60751 -> 0 bytes samples/cpp/scenetext04.jpg | Bin 99487 -> 0 bytes samples/cpp/scenetext05.jpg | Bin 113689 -> 0 bytes samples/cpp/scenetext06.jpg | Bin 70430 -> 0 bytes samples/cpp/textdetection.cpp | 128 - 22 files changed, 7206 deletions(-) delete mode 100644 modules/objdetect/doc/erfilter.rst delete mode 100644 modules/objdetect/doc/latent_svm.rst delete mode 100644 modules/objdetect/doc/pics/component_tree.png delete mode 100644 modules/objdetect/include/opencv2/objdetect/erfilter.hpp delete mode 100644 modules/objdetect/include/opencv2/objdetect/linemod.hpp delete mode 100644 modules/objdetect/src/erfilter.cpp delete mode 100644 modules/objdetect/src/linemod.cpp delete mode 100644 modules/objdetect/src/normal_lut.i delete mode 100644 samples/MacOSX/FaceTracker/FaceTracker-Info.plist delete mode 100644 samples/MacOSX/FaceTracker/FaceTracker.cpp delete mode 100644 samples/MacOSX/FaceTracker/FaceTracker.xcodeproj/project.pbxproj delete mode 100644 samples/MacOSX/FaceTracker/README.txt delete mode 100644 samples/cpp/linemod.cpp delete mode 100644 samples/cpp/scenetext01.jpg delete mode 100644 samples/cpp/scenetext02.jpg delete mode 100644 samples/cpp/scenetext03.jpg delete mode 100644 samples/cpp/scenetext04.jpg delete mode 100644 samples/cpp/scenetext05.jpg delete mode 100644 samples/cpp/scenetext06.jpg delete mode 100644 samples/cpp/textdetection.cpp diff --git a/modules/objdetect/doc/erfilter.rst b/modules/objdetect/doc/erfilter.rst deleted file mode 100644 index 85d6bcc7fe..0000000000 --- a/modules/objdetect/doc/erfilter.rst +++ /dev/null @@ -1,211 +0,0 @@ -Scene Text Detection -==================== - -.. highlight:: cpp - -Class-specific Extremal Regions for Scene Text Detection --------------------------------------------------------- - -The scene text detection algorithm described below has been initially proposed by Lukás Neumann & Jiri Matas [Neumann12]. The main idea behind Class-specific Extremal Regions is similar to the MSER in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image. However, this technique differs from MSER in that selection of suitable ERs is done by a sequential classifier trained for character detection, i.e. dropping the stability requirement of MSERs and selecting class-specific (not necessarily stable) regions. - -The component tree of an image is constructed by thresholding by an increasing value step-by-step from 0 to 255 and then linking the obtained connected components from successive levels in a hierarchy by their inclusion relation: - -.. image:: pics/component_tree.png - :width: 100% - -The component tree may conatain a huge number of regions even for a very simple image as shown in the previous image. This number can easily reach the order of 1 x 10^6 regions for an average 1 Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm make use of a sequential classifier with two differentiated stages. - -In the first stage incrementally computable descriptors (area, perimeter, bounding box, and euler number) are computed (in O(1)) for each region r and used as features for a classifier which estimates the class-conditional probability p(r|character). Only the ERs which correspond to local maximum of the probability p(r|character) are selected (if their probability is above a global limit p_min and the difference between local maximum and local minimum is greater than a \delta_min value). - -In the second stage, the ERs that passed the first stage are classified into character and non-character classes using more informative but also more computationally expensive features. (Hole area ratio, convex hull ratio, and the number of outer boundary inflexion points). - -This ER filtering process is done in different single-channel projections of the input image in order to increase the character localization recall. - -After the ER filtering is done on each input channel, character candidates must be grouped in high-level text blocks (i.e. words, text lines, paragraphs, ...). The grouping algorithm used in this implementation has been proposed by Lluis Gomez and Dimosthenis Karatzas in [Gomez13] and basically consist in finding meaningful groups of regions using a perceptual organization based clustering analisys (see :ocv:func:`erGrouping`). - - -To see the text detector at work, have a look at the textdetection demo: https://github.com/Itseez/opencv/blob/master/samples/cpp/textdetection.cpp - - -.. [Neumann12] Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012. The paper is available online at http://cmp.felk.cvut.cz/~neumalu1/neumann-cvpr2012.pdf - -.. [Gomez13] Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013. The paper is available online at http://158.109.8.37/files/GoK2013.pdf - - -ERStat ------- -.. ocv:struct:: ERStat - -The ERStat structure represents a class-specific Extremal Region (ER). - -An ER is a 4-connected set of pixels with all its grey-level values smaller than the values in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's in the component tree of the image. :: - - struct CV_EXPORTS ERStat - { - public: - //! Constructor - explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0); - //! Destructor - ~ERStat() { } - - //! seed point and threshold (max grey-level value) - int pixel; - int level; - - //! incrementally computable features - int area; - int perimeter; - int euler; //!< euler number - Rect rect; //!< bounding box - double raw_moments[2]; //!< order 1 raw moments to derive the centroid - double central_moments[3]; //!< order 2 central moments to construct the covariance matrix - std::deque *crossings;//!< horizontal crossings - float med_crossings; //!< median of the crossings at three different height levels - - //! 2nd stage features - float hole_area_ratio; - float convex_hull_ratio; - float num_inflexion_points; - - //! probability that the ER belongs to the class we are looking for - double probability; - - //! pointers preserving the tree structure of the component tree - ERStat* parent; - ERStat* child; - ERStat* next; - ERStat* prev; - }; - -computeNMChannels ------------------ -Compute the different channels to be processed independently in the N&M algorithm [Neumann12]. - -.. ocv:function:: void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad) - - :param _src: Source image. Must be RGB ``CV_8UC3``. - :param _channels: Output vector where computed channels are stored. - :param _mode: Mode of operation. Currently the only available options are: **ERFILTER_NM_RGBLGrad** (used by default) and **ERFILTER_NM_IHSGrad**. - -In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient magnitude channels (Grad) are used in order to obtain high localization recall. This implementation also provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient magnitude (Grad). - - -ERFilter --------- -.. ocv:class:: ERFilter : public Algorithm - -Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. :: - - class CV_EXPORTS ERFilter : public Algorithm - { - public: - - //! callback with the classifier is made a class. - //! By doing it we hide SVM, Boost etc. Developers can provide their own classifiers - class CV_EXPORTS Callback - { - public: - virtual ~Callback() { } - //! The classifier must return probability measure for the region. - virtual double eval(const ERStat& stat) = 0; - }; - - /*! - the key method. Takes image on input and returns the selected regions in a vector of ERStat - only distinctive ERs which correspond to characters are selected by a sequential classifier - */ - virtual void run( InputArray image, std::vector& regions ) = 0; - - (...) - - }; - - - -ERFilter::Callback ------------------- -Callback with the classifier is made a class. By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the ERFilter algorithm. - -.. ocv:class:: ERFilter::Callback - -ERFilter::Callback::eval ------------------------- -The classifier must return probability measure for the region. - -.. ocv:function:: double ERFilter::Callback::eval(const ERStat& stat) - - :param stat: The region to be classified - -ERFilter::run -------------- -The key method of ERFilter algorithm. Takes image on input and returns the selected regions in a vector of ERStat only distinctive ERs which correspond to characters are selected by a sequential classifier - -.. ocv:function:: void ERFilter::run( InputArray image, std::vector& regions ) - - :param image: Sinle channel image ``CV_8UC1`` - :param regions: Output for the 1st stage and Input/Output for the 2nd. The selected Extremal Regions are stored here. - -Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. - -createERFilterNM1 ------------------ -Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]. - -.. ocv:function:: Ptr createERFilterNM1( const Ptr& cb, int thresholdDelta = 1, float minArea = 0.00025, float maxArea = 0.13, float minProbability = 0.4, bool nonMaxSuppression = true, float minProbabilityDiff = 0.1 ) - - :param cb: Callback with the classifier. Default classifier can be implicitly load with function :ocv:func:`loadClassifierNM1`, e.g. from file in samples/cpp/trained_classifierNM1.xml - :param thresholdDelta: Threshold step in subsequent thresholds when extracting the component tree - :param minArea: The minimum area (% of image size) allowed for retreived ER's - :param minArea: The maximum area (% of image size) allowed for retreived ER's - :param minProbability: The minimum probability P(er|character) allowed for retreived ER's - :param nonMaxSuppression: Whenever non-maximum suppression is done over the branch probabilities - :param minProbability: The minimum probability difference between local maxima and local minima ERs - -The component tree of the image is extracted by a threshold increased step by step from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness, number of holes, and number of horizontal crossings) are computed for each ER and used as features for a classifier which estimates the class-conditional probability P(er|character). The value of P(er|character) is tracked using the inclusion relation of ER across all thresholds and only the ERs which correspond to local maximum of the probability P(er|character) are selected (if the local maximum of the probability is above a global limit pmin and the difference between local maximum and local minimum is greater than minProbabilityDiff). - -createERFilterNM2 ------------------ -Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]. - -.. ocv:function:: Ptr createERFilterNM2( const Ptr& cb, float minProbability = 0.3 ) - - :param cb: Callback with the classifier. Default classifier can be implicitly load with function :ocv:func:`loadClassifierNM2`, e.g. from file in samples/cpp/trained_classifierNM2.xml - :param minProbability: The minimum probability P(er|character) allowed for retreived ER's - -In the second stage, the ERs that passed the first stage are classified into character and non-character classes using more informative but also more computationally expensive features. The classifier uses all the features calculated in the first stage and the following additional features: hole area ratio, convex hull ratio, and number of outer inflexion points. - -loadClassifierNM1 ------------------ -Allow to implicitly load the default classifier when creating an ERFilter object. - -.. ocv:function:: Ptr loadClassifierNM1(const std::string& filename) - - :param filename: The XML or YAML file with the classifier model (e.g. trained_classifierNM1.xml) - -returns a pointer to ERFilter::Callback. - -loadClassifierNM2 ------------------ -Allow to implicitly load the default classifier when creating an ERFilter object. - -.. ocv:function:: Ptr loadClassifierNM2(const std::string& filename) - - :param filename: The XML or YAML file with the classifier model (e.g. trained_classifierNM2.xml) - -returns a pointer to ERFilter::Callback. - -erGrouping ----------- -Find groups of Extremal Regions that are organized as text blocks. - -.. ocv:function:: void erGrouping( InputArrayOfArrays src, std::vector > ®ions, const std::string& filename, float minProbablity, std::vector &groups) - - :param src: Vector of sinle channel images CV_8UC1 from wich the regions were extracted - :param regions: Vector of ER's retreived from the ERFilter algorithm from each channel - :param filename: The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml) - :param minProbability: The minimum probability for accepting a group - :param groups: The output of the algorithm are stored in this parameter as list of rectangles. - -This function implements the grouping algorithm described in [Gomez13]. Notice that this implementation constrains the results to horizontally-aligned text and latin script (since ERFilter classifiers are trained only for latin script detection). - -The algorithm combines two different clustering techniques in a single parameter-free procedure to detect groups of regions organized as text. The maximally meaningful groups are fist detected in several feature spaces, where each feature space is a combination of proximity information (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to combine all these hypotheses to get the final estimate. Each of the resulting groups are finally validated using a classifier in order to assess if they form a valid horizontally-aligned text block. diff --git a/modules/objdetect/doc/latent_svm.rst b/modules/objdetect/doc/latent_svm.rst deleted file mode 100644 index 4b4ff117fa..0000000000 --- a/modules/objdetect/doc/latent_svm.rst +++ /dev/null @@ -1,262 +0,0 @@ -Latent SVM -=============================================================== - -Discriminatively Trained Part Based Models for Object Detection ---------------------------------------------------------------- - -The object detector described below has been initially proposed by -P.F. Felzenszwalb in [Felzenszwalb2010]_. It is based on a -Dalal-Triggs detector that uses a single filter on histogram of -oriented gradients (HOG) features to represent an object category. -This detector uses a sliding window approach, where a filter is -applied at all positions and scales of an image. The first -innovation is enriching the Dalal-Triggs model using a -star-structured part-based model defined by a "root" filter -(analogous to the Dalal-Triggs filter) plus a set of parts filters -and associated deformation models. The score of one of star models -at a particular position and scale within an image is the score of -the root filter at the given location plus the sum over parts of the -maximum, over placements of that part, of the part filter score on -its location minus a deformation cost easuring the deviation of the -part from its ideal location relative to the root. Both root and -part filter scores are defined by the dot product between a filter -(a set of weights) and a subwindow of a feature pyramid computed -from the input image. Another improvement is a representation of the -class of models by a mixture of star models. The score of a mixture -model at a particular position and scale is the maximum over -components, of the score of that component model at the given -location. - -In OpenCV there are C implementation of Latent SVM and C++ wrapper of it. -C version is the structure :ocv:struct:`CvObjectDetection` and a set of functions -working with this structure (see :ocv:func:`cvLoadLatentSvmDetector`, -:ocv:func:`cvReleaseLatentSvmDetector`, :ocv:func:`cvLatentSvmDetectObjects`). -C++ version is the class :ocv:class:`LatentSvmDetector` and has slightly different -functionality in contrast with C version - it supports loading and detection -of several models. - -There are two examples of Latent SVM usage: ``samples/c/latentsvmdetect.cpp`` -and ``samples/cpp/latentsvm_multidetect.cpp``. - -.. highlight:: c - - -CvLSVMFilterPosition --------------------- -.. ocv:struct:: CvLSVMFilterPosition - - Structure describes the position of the filter in the feature pyramid. - - .. ocv:member:: unsigned int l - - level in the feature pyramid - - .. ocv:member:: unsigned int x - - x-coordinate in level l - - .. ocv:member:: unsigned int y - - y-coordinate in level l - - -CvLSVMFilterObject ------------------- -.. ocv:struct:: CvLSVMFilterObject - - Description of the filter, which corresponds to the part of the object. - - .. ocv:member:: CvLSVMFilterPosition V - - ideal (penalty = 0) position of the partial filter - from the root filter position (V_i in the paper) - - .. ocv:member:: float fineFunction[4] - - vector describes penalty function (d_i in the paper) - pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2 - - .. ocv:member:: int sizeX - .. ocv:member:: int sizeY - - Rectangular map (sizeX x sizeY), - every cell stores feature vector (dimension = p) - - .. ocv:member:: int numFeatures - - number of features - - .. ocv:member:: float *H - - matrix of feature vectors to set and get - feature vectors (i,j) used formula H[(j * sizeX + i) * p + k], - where k - component of feature vector in cell (i, j) - -CvLatentSvmDetector -------------------- -.. ocv:struct:: CvLatentSvmDetector - - Structure contains internal representation of trained Latent SVM detector. - - .. ocv:member:: int num_filters - - total number of filters (root plus part) in model - - .. ocv:member:: int num_components - - number of components in model - - .. ocv:member:: int* num_part_filters - - array containing number of part filters for each component - - .. ocv:member:: CvLSVMFilterObject** filters - - root and part filters for all model components - - .. ocv:member:: float* b - - biases for all model components - - .. ocv:member:: float score_threshold - - confidence level threshold - - -CvObjectDetection ------------------ -.. ocv:struct:: CvObjectDetection - - Structure contains the bounding box and confidence level for detected object. - - .. ocv:member:: CvRect rect - - bounding box for a detected object - - .. ocv:member:: float score - - confidence level - - -cvLoadLatentSvmDetector ------------------------ -Loads trained detector from a file. - -.. ocv:function:: CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename) - - :param filename: Name of the file containing the description of a trained detector - - -cvReleaseLatentSvmDetector --------------------------- -Release memory allocated for CvLatentSvmDetector structure. - -.. ocv:function:: void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector) - - :param detector: CvLatentSvmDetector structure to be released - - -cvLatentSvmDetectObjects ------------------------- -Find rectangular regions in the given image that are likely to contain objects -and corresponding confidence levels. - -.. ocv:function:: CvSeq* cvLatentSvmDetectObjects( IplImage* image, CvLatentSvmDetector* detector, CvMemStorage* storage, float overlap_threshold=0.5f, int numThreads=-1 ) - - :param image: image - :param detector: LatentSVM detector in internal representation - :param storage: Memory storage to store the resultant sequence of the object candidate rectangles - :param overlap_threshold: Threshold for the non-maximum suppression algorithm - :param numThreads: Number of threads used in parallel version of the algorithm - -.. highlight:: cpp - -LatentSvmDetector ------------------ -.. ocv:class:: LatentSvmDetector - -This is a C++ wrapping class of Latent SVM. It contains internal representation of several -trained Latent SVM detectors (models) and a set of methods to load the detectors and detect objects -using them. - -LatentSvmDetector::ObjectDetection ----------------------------------- -.. ocv:struct:: LatentSvmDetector::ObjectDetection - - Structure contains the detection information. - - .. ocv:member:: Rect rect - - bounding box for a detected object - - .. ocv:member:: float score - - confidence level - - .. ocv:member:: int classID - - class (model or detector) ID that detect an object - - -LatentSvmDetector::LatentSvmDetector ------------------------------------- -Two types of constructors. - -.. ocv:function:: LatentSvmDetector::LatentSvmDetector() - -.. ocv:function:: LatentSvmDetector::LatentSvmDetector(const vector& filenames, const vector& classNames=vector()) - - - - :param filenames: A set of filenames storing the trained detectors (models). Each file contains one model. See examples of such files here /opencv_extra/testdata/cv/latentsvmdetector/models_VOC2007/. - - :param classNames: A set of trained models names. If it's empty then the name of each model will be constructed from the name of file containing the model. E.g. the model stored in "/home/user/cat.xml" will get the name "cat". - -LatentSvmDetector::~LatentSvmDetector -------------------------------------- -Destructor. - -.. ocv:function:: LatentSvmDetector::~LatentSvmDetector() - -LatentSvmDetector::~clear -------------------------- -Clear all trained models and their names stored in an class object. - -.. ocv:function:: void LatentSvmDetector::clear() - -LatentSvmDetector::load ------------------------ -Load the trained models from given ``.xml`` files and return ``true`` if at least one model was loaded. - -.. ocv:function:: bool LatentSvmDetector::load( const vector& filenames, const vector& classNames=vector() ) - - :param filenames: A set of filenames storing the trained detectors (models). Each file contains one model. See examples of such files here /opencv_extra/testdata/cv/latentsvmdetector/models_VOC2007/. - - :param classNames: A set of trained models names. If it's empty then the name of each model will be constructed from the name of file containing the model. E.g. the model stored in "/home/user/cat.xml" will get the name "cat". - -LatentSvmDetector::detect -------------------------- -Find rectangular regions in the given image that are likely to contain objects of loaded classes (models) -and corresponding confidence levels. - -.. ocv:function:: void LatentSvmDetector::detect( const Mat& image, vector& objectDetections, float overlapThreshold=0.5f, int numThreads=-1 ) - - :param image: An image. - :param objectDetections: The detections: rectangulars, scores and class IDs. - :param overlapThreshold: Threshold for the non-maximum suppression algorithm. - :param numThreads: Number of threads used in parallel version of the algorithm. - -LatentSvmDetector::getClassNames --------------------------------- -Return the class (model) names that were passed in constructor or method ``load`` or extracted from models filenames in those methods. - -.. ocv:function:: const vector& LatentSvmDetector::getClassNames() const - -LatentSvmDetector::getClassCount --------------------------------- -Return a count of loaded models (classes). - -.. ocv:function:: size_t LatentSvmDetector::getClassCount() const - - -.. [Felzenszwalb2010] Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. and Ramanan, D. *Object Detection with Discriminatively Trained Part Based Models*. PAMI, vol. 32, no. 9, pp. 1627-1645, September 2010 diff --git a/modules/objdetect/doc/objdetect.rst b/modules/objdetect/doc/objdetect.rst index 0cd8cf3ef9..bbd5d0e359 100644 --- a/modules/objdetect/doc/objdetect.rst +++ b/modules/objdetect/doc/objdetect.rst @@ -8,5 +8,4 @@ objdetect. Object Detection :maxdepth: 2 cascade_classification - latent_svm erfilter diff --git a/modules/objdetect/doc/pics/component_tree.png b/modules/objdetect/doc/pics/component_tree.png deleted file mode 100644 index 7391e2de62cefd33825884758ec3196964802109..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108183 zcmZttbyyo+_dO1yE$+oiad&rjcMTMW5?qVB1S{@doZ{|Kpb!cKiWe_Xk^%(^L5sb) zf6x6q-_IX!t}9n&CJd7~d!4oR*=tXNuC@v;79|!E5)!VOs-iv;5_TaH5(*Lq;s8?S ztBFCx2eOa8iab)&EY%@m2gO!HMG@&4lsYqv*u(TxHSJU1w zhlJ)#YK{=_Z{#RMRR6!x;Hia}Xbof!F%c02vT4c$^Oto#L6p{@^q+HOJk-f(S>vbo ze_mX$yz&#?+My=)*Vt~{JiW7h%|<}5v120FG!6HqFm9M8M03puHoDd6^6HP3mA=cQ z|C_&ph`~R%69{x0${b;v6t15vZ!kFn>(i+CB>g^~U(gTCStk~Gg=?9^jci_A+`k%a zpO~~w+AC1S|?G!tlc&Ac7xRZCEK1g&1dbW2jofb_Px?sp{yt3CUg(q9ob{u5`! zHQ*WR7D>x}MrGL<`#r!9v`C^m@m^HS(`p^udmMk4?MXG z&_a0-V3{h)DXlA^wEo$yGmQh?JaoO#-7P5SErIZP|Ky=kwE-FKu+f?c;r4@|vk#vM z{tT3lu=mwzr?U@H6b>8f`~=zel@PD-GaTWK=Uz8>8^4AT^_RU7KBw&K&93%;Z)-77 z&rKU>;%fPYZDr~P0I#lWbe&Do3$FL;HsRFo(|UCv4)aN0+tW@|2DrOviQ(rji0+3&pC2y$6*>414p@m`AM>{@%Qplq34;Hwd*`aQ9V2n9_MkJ z2P~Ft1pXHxB9Vsou~s4HZnv#YPg{9*9E<*MxBrWPK)0bJ4zi9I8sT@{X$jR3y2!*Y zwu~YSsu3@~12nB%*VpyUkVd*9lsHQ$P;D0IyXNIBg@IuSnprs=L6(d;(A)&CQG@fTp7$hGohG;)Q=`z#hT=VCrE{hUm|f+ zzjr4#JZCI(todI0Wy&IFvFg7}08{GhMYfk`BtmPbYEvbgQmx{*6X7F4foN4y*!cQH zDki<1XQBvQUM(aGNfP(KXI10QK3(Z?B8%#IF_z|1s9ouFzfu_`*V`rST;(O7pfu9F z9OP1RW#KDQsOdrNTaD9isTOmc7}7Y;W1hCPw(#G9RCC+5Z{nVHMX57XwpVs^{E(LB zGGr5Fsae@SKO>c_yw5({tHWruXz&?Ax{&d0Rgvs8bE(yr1YoRY4+q;5uQH#vNz#8r zlI2n+Gn20q8fq1rWV3%w-_WjdV-%>iMEvfaJJiwa2wL&)6%Tza4GWVXJ0F+!&U5$$ z_}i4(D`w&P607CLki_2JU@rj?5dnX=(SrNOMAJ=PE!(6<@gY&;9!AEDpOokFC{s!t z8UtC4$iO;tSf3|-7K&y*e;M1=7XVGJG1&~!kt%nl%qqG*pn*FegS=g;!|Aqb_jlNT ziFh{28EzD-&Wqv9oI^>d=b|_Z>#cTn(@J0@aD-L`u#No~AIu9*A1iTMWp)Pt5=}F)`p@ru9xB*Elj>D{u7;4|4jJE=^Hdt=)+jbj0g@n&E@hX3 zDWNy!wmu)vM(zG57l>(YaWtY1D14?!?_2>KOX~O(ok5d}>PgMt7euyvyyZ>a7sVUX zjiYCSgcg48T~i^+Cp2vGsH!Stg~<8e?F9he!)bBzbyW!gAY2!o%Ek$80TEV#S4;YE z6%Nmhi!XktK>LNl5etHKl2xrD7S*x!x~z!JRWO;5j^F>AIbLoVi`YKOLsyAN~~Z*siB2RUF;I+qTa8v=xe~d_o_sMrl%Hh>W(6fd7v2 zOACG~WL^P!2KiilVa9JcPb!XL{$Fh!%iVp2TzXfyes z8qL7Hnt+Zcx=dD`kRqLk>^ZR`(No`(aZ@ARtO@m{gR-JBh9Z>7a6UOBKTxl4J|w@w zd2Q;?mizw*#>@@RLh^fJ?<_n-S_i)W26Ecr+t{HirgdAA3T>ZaW;%57s2rdp zWRO1@g!*%G1^^38sp7^bs{5k!EMOKl7LT$}u*h7o0)SE0k+#i_`)F5KpW2Z!Xiij_ z%&+Ix{vReE`iUz~d*oZ zTxjB9wX;p-l8nx8RY1)4F}c<+Y@k0puU&hsVZSk-_09V}vEm~ei{IPd>Gc`ejvsFS zc%~$yec*LWRK^lcRw3l2KMn~CDS{?j;BZ1hwi9oV)Hqm>Sv!=jIF2PtX`p?kj_cLt zE_~`7c2dq3b`)yJD-!sA|Kk5iq9c&_7laZqmV%{cXELErmv>L6d?&nFg{$2)g$C!X zR60oF33_4jggzIy=yL`V2 zcH&a`prJ$G;h8&AM14vFtJPDW=OTul&hnS&CeCxxL*ZkOfuA~su>;Mg0EgB2sUS}Q zk7GvlsRkJ69t0Ou>3YY_ULwc?vRfEf{%S%jLHFM4e-Hq=amSRSM6aka6@#3Ny_#Ioo2!0GVm^u)Y%Z2=caR(xhx zEXRhm=oC~}y}(FUR%9g0il;0%9Y{O^MYTTQEvA`^wvm3FCA>Kl=CbnF z;xzam{$V_C{`Vmy3FYr@fRB&3Accp^nGRfk3K32Qmqx;wUFh;jeW8~q&e(1M)dp+j z2O@5`Z1D5Pe-|aQ)6PEfQ}Kb~rE~r?&zChap#imJG&=*q&)1OS!ZFh+-6CGfmZg%# zs)nkbUVQE^8DMV_rkP~dIds_y3aSeH=>2oJs7d=cKhYFcv>XJ)-=dHj_`a6IeOZ&A?u)?r3E`1l46# zQvs4GpfUZIRv*};?WHskGfCf9V(|XSYI~a+AJp1Y*l(`kolEP~K5{c#_J6c4jRY>a z75)H!zMmV`q;u*Cy+1uckl#Kpj&zpou>qMmMy$~ZWqFB_VlD{_l*qres&+2>VkF$gq0`!23W}?Sgy(;xe_VTDMX5 zh=$%vvTW-roH5gt2VdmMpAO`4A4M_q-)uiL?}Xg^70mJCbQfHhnMu<E^` zRU^`~?)PQzyWOWntEWpV8X9@Wuz^s&`=jM^UOWZ1*(!G&b1S*eJu|tAOK?eukOBPN zx46C26D1^TuE2x%mUHOxyXc57wok^!+x<$637bK|Q5TNtbXi{Tqgluxl?NVZ@z8YB z+O42EKMjQ>Q!F-lX6cdY5q=rSS1+<~#0=Y#!I4C=|o!ez{zyLwzNEX562J5;vl}vYC*tX6O3q zZ0`1jxqZIJuDD4rG*A?k?TZyMJ_r1x>(1=iE+^(`ZL@F-7v;tvc;VU3aG0NuIF2^u z$aqV8A{=RDR^e@d|Gxr_h&qBZ5c(VP@Mm`@k?|p*Z&2(_oDKPA&kV{q`?UF(W_($F z<%Mny_#OnpB-jhNSzUyP&Q?_(m;Xf0BZO3+#-_1WXVoRwPD?^ABavDk6ohX?ARinZ zxfY9D^>~uPUZ9DiBTvmV2obW`Ya-GWgbL9`@i)cgBqJCI1}t!-+IgJ5@vtgR0ks}* zhRs{Yt9MkZ{&N2}cORAyE7M~B`}3HA)O+dT4C!g>Zh(SeB>edPMCE)Z9Ii3v_X&~G z+#iQO9qunV`jl)BhCly>`ci>MY#g<)P$6pN%w|gMM$PXU7XzD6=&T2%QZ(L``5m!l z@x7DW+}hfi6Q;r&JLe^JN+1B~7tZ&0{vl~}s92(09D~i_FM8geLJm#iKK6{fw~M=$ z$uI;=o20iv?k{abvql;lCFt1NR}h)N7A!UV(U*GoelHH7>4zq`W8|(@?)O=R-lIfn#o+oGyT3`oXDV}e_Z=`PQ9;k)`m6rm89v*h# zVg)5qo7>bAIrg@I*WdS(65ALvxG~4hq1V@!p4u3JPnWy!`a*FsSoLA6@CqYl*14gn zBn$+m$Tu_&t7g+rs@Oo9TIaF2z&H6*>GWie0m89^F7y~i?LFKDP=c!=g;0|v@#49g zQkl@c!ds}-Q1}K8m&DZW@Tr<@U!6pn7d^KZRZz;zUZG1PIb!nvgc;nNYy4U6Z7746a=#7CTInzHSnzSqnn2FL~ zFs|7TG@NxKmX;`4d|NxEFamV=xuhumT|81 ztcH?e$`|#sg{2y6tas4kb=g?@)h;hiSh#!J26UJob!BOILn&MXz>>kvfNU20u9LJ} z!={w?7Rh0DF)&I$ed=y|%G$*>5Gpx$6>@VoW+fvTH~f@)W;xs^hT}M~3~6&E$sLc@ z0W;T10>MmNjj)vyv`w;Axm&s0%?YX_TcX|S)mA|<&wm$ zU0~JwA;Vm|I#QL*1y_j?ZQNHT*@3puVQ2;Z4>AtNtfaLMt{-naJW@d)d+zv+eA(uC zHo$Iz3)ry#{LHYc$&8Td$AxrKYWK7XlNl@1LKVd&&^<7v+82mOhD15#IC^u^MG`dC zyKc?VGB+U_=X(>zG0mwrB49gg^%3!5VmA8)B#wP8F)B{HeNl2Cj51ALcS*e}3l)n8 z5>~5fT3_Xnsb5tUFg*DVRgQxwUw=kuY39=3_{9P%ldX6u&8c+g!ynko@T=zMZ%-i;nnN*lHGqLXyPxILa<7dv6&@JX2s}zMLv7B! z&&b57@7&Jqrlm4ZWHU7JT7BKI77L{gl1R@7n+*Jv0M2TYE*FImEEB@-p5Pz^$=${m z`VR*_A3;U&qWeDz$-K>6?vBVgpm{qO{_Wo?>M>m-X}pP$j$9N8=X}*2BH_fG5}$k3 z^)oX|-ur`4ca;K6-|!`C=dT=NQmM6B@e9W~mkq(knsZD@9 zOrM2%{mDD&7^h4*rhMdlw#v!(@*O|D?2kp%<-R&dp+)kV;DO_Tq(*aUhP%xzBYN0n zK?q)41PzCDZG=DO-ZX31?CZW@K{8J|Xq4TqE>EOq!{l}MNBUO*!#* z*|+oq5Q@ly2@`6=bpMtsEUD~#f`(S6l@?N9x=k%40EgnF+fRxwFSQN0{plRcsJ!DP zePv$o;%=+gQ9oZUlL*b}j}q@$F-Pu~ysjCDbHzbSq&l^L3r45090jo}=OhqG5V2nW zbH4v}bLjc>@wjl`=5lYPugIArtKDcmcdcxr>tAVp2-+2Vfp0Q^!t^06vH%=a+UD zg^3TUAyb}jvnGXF`#9W9W9VaBqhoNIH$C;y)$EZ+G!!#9qmV8W4t*e`)~tvN%b zmViP9GndAEHim{kNG@}cvqH`IJZ{5i$eAKQ{spj!08?F$p>u7qH<;NPsO8?@nWroe zWypv$zGOQN!QhQt!B?weDvx!=1@#1d9(QQWiBvAc$6`IUh`xcPv41GCd{ZnY6Y_K! zNWBwwvsJjUZ^P->g>_*RZ1q*+clm$P!`bHC>KjvvCCQLG6I80Jc%1nW>6d3!od`*t zTuER2<-S4!@7gD8$4KY87*N}_a1T_4(7KcQbmeCfa$Pzk=<5-$K=y1If$kkBB2kPD zybS1UBQKZo-JKjBEWWQfb4MbICWLZ%9k<`)0LYc%T0*bpuU{3bn&oT)3;c`m{}hraFWIa6H7v>c58gU_oqL+};I{IcZjp>HYWHTLV<`VSADCqqbwhKH*?)&J=>>y z|Arl*`R6MmT9bikuai@rDhFt>NKA^lMi)|K_sv3CxtMS0?^Azww+MLObf&0-bgJi? zGIXkqp#^>RKS(=#0Ng$rtPyAWglq>-E;xTgqyoEd2?tw*a|>iGnT-J4MweBJLl6F~ zB|L}F^=C#0*YTSbjx3icHG}Tsxs$W5@8-JSw|kb4Xf{xeD(rcgeu(O{lDn@QmXM{2 znfN3fUKp2Q8j8j7^S4eYzWA-lgx9_E2;DkH0E~R*gJKgz!#wV%k&1>4wnVWN!Sg%r zPdk+7+0f3V9n{;kS)}_JUoMJrSN^BGI+-O zfq%s!()R$J8=I3n-3c1VeZ1_h>TOHWmCVeJBSII$4`u$f^Gxwf(d?$j+KA06ARK

vSyAJ+A;wNY8Bw(=)dJl8UC)8QE9jR0&R}pT zRS~Tbjr7yq@Dn2E+-^7l8{G(Qtv%M9BQ?9uA|%;rl`pDN_&{Je^ja06&YMam1R1%eVwT z>HTN8YX3J}m#ewATz@3S1JmB7XIV*=Dpc=M{eAOigY{|WD)6=!{Aix)zo8RuUJ%Xu zS7e2~QQmY!FF{Ak42btnNDUJ;uR0vh`>+PK7EFF)%1G;{q!( zJEo=Z&AE>A0fT!Ps;G4&P7$x*pgQ@&mTh`p)vsMjI`c7cAh^AEA-Y3>0&yw-7%# zT+~Mw1*ovJ`_O3e(J+f5Pm5Xepm12m39~X3o=leg)xM1)&+YHE`Fv*&iF?yH)caWO7FixbEGJRSmjhev&s*%K73j_s|vmk5B4- zH}zRp=@2s{{P}(>F29Lc8HCEJ_A;pcl57>~cv^+yo`aTL7q9D+{$r2U%)X)Xu-LqN zZlyjmji26phk6ujTUnT%^!(*}^6y5AZF!@W(GUtW|5lO$Ib>Q`f8MA!C71jSK5UX_ zZ`^z@FmpwbyJl+rXwqI@@ry6qR6qd5I_h%2aOVMfa_CE*RLHjaB=16^1tq@6>YS+k z$KHFe%}aVZfi3xc3Fmo14y6g3c%}NFApT?Y>`SQ^+dbVI>)W%TP$k+Yq?Z=^9^n(D zr-hQYP8AGLq7As*QV@H zS)!Rv4JQ{!dstjd5e&c}uTNAK%r^c3O-o%kIJvNVs%K?=o%MbGEa&xk-hu;6>axsQit8 z#?*fJ{iE#0MB6l680O~{fR;yryi`56$jc_G!WmTt= z^s*{&8g2sO-$zFvEkQAi#V&B9mLp5P3D5>x)g*bwI%m;OCk|CUDySaew2)#Hkl5) z+fbu*e;Kj2IA_PDqn)v$2(4eZsR#F;fSOCh2A-H&)qSS9DpMnb>U{yk7C^QxNXL69 zrG=r#}2`@bq;tD zZ3)GT^3fa9o(o6kG-eSnG&!ieL_WlW&ie+qOlN|P^z@0e5cC*?#?^c)tYq~9FTBDT zhPF`Jc(Sv})5&Pl4?H{B(0+kBw)TGffYJlwHv7;DxLHG0zl7pcli-3Z8KQdM+BFrl z{^tWQO|Anj*BIqb$mAu|calWHRnIi@KHMmXD<0+PJ1sMAOEG~-7C`dDVtfd_ab#dm z`I`~eemEQMs1C+$-)7nSU6oH6GLQQ93DwOUH?VujmK>P=tgJ0HH$xh;&j_HPVCc8w z$i-S@ry#4pcV>(z{&&ma2M21;lgGOH1*)P67CR2|wD*ll+M>>CcZ+*G%hJ2MQ2^pG zo|}vlnZaq*xZHgTJTWwS&7dbtT!k!Q6t%gnFm+TLRgM0(w`L_xwVJy!z`m*#oo}6w z=liZkZ~!Ak>1iM;E;4U(fCu@Y?cF~y<^Um*MxR$eJ{=(eXTJiE3Ntq=42_ZiY{44- z3T4OQ8ld*PEKQ1th#g0vk6Yld@hkqtLi;%lda*ZUW&$OdQ!#DT2zM!gV)4yp)KKEg8wcu_P2M|I|EW4G-YP9wDtQW&_{u~a-&U*l%y%vnb9OX^M}Cp$ zq}9$63qBTAywzEC?wm{uZXF1DWm5BNS1l)WRJMnqDS@?PeT|tQ(ba@kFzb(?)zNae zMy5OE7+Rb<*Gn%ea1%PGjL5H{y@F_^E)>ALeA{Pza{)yB9iuJ^CzN z*Mzua*EEL8YJKbJ{THnQl_%?Oi9DS$zaK+DytE*W=9=VA#>994H%uePGydy)6YW(? z)2)(Zqc;aNP{X(~iYBFX{8 z(q0stga792`f6;c{yMjik0_f^D9f*)nv!1jwTFj?vIy$xI*}YJy6i_5x%fSP^=iT? zp(@GBEPCkS&&j(7(RVH;issQ1CR$ski{JFul?UuINZN1^B_XO80s^01MG?jSG2P9r zp4X9QGAZl!&jnw8(nP8|g?RI|*KVu@vl$cTqsEJ{d1o zE1Ed4?2qsGrJiD*~_b+SFUFsR$?;E@deC3d29 z4vo4hFZ1qSlWDog>U5u7^!<4x(Wt8mJbUq`Gahdyx~XHl2wQLcJCBAW1qnt>s67n1ogMPsYbq{8X0ky*E!uf+{JQNUNuL1dX5Y5drAW(&N83f>w3()?}Xt_=w{v zXG{eRJ%~Y`BSRG1$B;Lj)Gnw(dqMy-Z7?ljv|!r;`&H6@F*tQMEaJycloO zEvkT{hZJBL;%zdXRg=wfQ-s}AON_jdI1sK;kQBGoX&G)skpz);4q+#(3yewR#67$> ze^NU?`fGZtU63$(Yd25vP`*FjDLH@mT$NoTV%p%h`@?_Rik9sC+ZS9CN-s zMyX-NfqZV%jt-I~+8`Sux}S(L#PW$G*<5i&M(}*UFBL#M#&PNH;r9Cx6tWQN$uquk zSgB#VS3aMM>`{o_H~Z9LWE<^R2NFX`oXJacSW+c?&sW#XasIPElA& zMeTYt$#cijfXW*8eXh0(Wl8v}Sy?i}U@g{sE6BT@-*9FmTmR*0^d&hw>^ax@L5Wm3 zi`VqzC@Dw%Z*Wgml4>c@Ou0IpsE3(-@;Ztkl$iKBqZz<>H%Fw{%Vd}|j8_Ny?F(5q zoQ^#Cn-zHdOV^WAGE3}%lL?4hZ$G5|E#I{Z47`X7=Z!O)s<3$R)+Vm9$t=5rhQg5K zvjY(f8RqqdO4pvgG-;%za_v;IJ5ZME_;~fuMooX_ow<#w!C#9ZQ87V$Rq#OwZFTzU zwhYKK4P*UaU{QCYXSM)kunW$b<%VC~neUyuPkcq2`68m{{p)xoKO|Rfo$5yBUYvCj z6DUo*D*W*Wr1U84JcIehZ=MUZ`skYsxo^LD6>e0(Ksz$=r_SPqQQA99>j0_~GlLpN zq08wR4WN&YK+dkOPoIp28bI40UkIv~KU6~07*SNub59hFVZUNp z;`YY728h+ldOs?)q$zh8gsDaG<;mr@?Pc9l`VOZyk$tOl*wlAX+T67gg~>V5)?IJSlg+g zon-G@`@@o-cWF6&!ZfsYWhO5{fG*=Bq>R%M`>eo$JzRnG)R(7)EgsT0MxP!gKM<*D zWcn@#Ov5r`>|3{5ny1Vop4oi8MXgkI8^k`qWtmyms+_g@8lar;F^j23$rClnzw> z{Zf8H^xU}0savGx^2@8irCgXMjlD%W59S3E}G6DMfN^Vk%wj5XynHtw$*smFyNJm!rlBukq4HhK0&vAk;!|v^@s-IVyS$3FP z!c&ArcpOBxhXO+{NEnwvbET(qixv#FLY`KNR|iFiW-Gs-f#LrC{_t89UG3(bLFo?) zY^NG1#G&$kse zI(B{Y;Z3Yc*rRrF{u@XUsteadboU-=JJWcEHYur3_sEd9R1IzvPKI#!pmH5)yCTj> ze+PL3?aHnalSK=pZ|$KXNh zoGZryKRbT?vq74~7M`PHEYvZoguN+Zrr6QgG{fEf^!T@}zL78q19lR=G|K99UF)Y= zTWjP+Doo9C!Ee?-;z?ba08PJjgGmILabEa1DQgqZreev>HX?uYRSJzED)R5HG)w5^ z4avdiWu=Nq)J=dD>URG2jF`T}SCg&N6s1!~4RL&3A({CdKk602{IxL!IZb^b7iJLn z^AM%YNzCZ##6VQ_<-4*LI<+F2J$zXwyXG@v1TAj_it`5TDB5DO5J_nGPn=H#8O)KI zi7S$rvQ_gc?W5(*cq@!pFBFRXiP=?OyL$FT1wD=2=@<8kiu?{lyM9; zYMJndhXCoh&Xq~6;k`N+wR}Ao5Yf}P8UEyZLyq+D=V)tVl}mb4%`)!pGd~3hS51ij zxeF$DSs(>Ic>koZ8{z&22TPEe&l~{ysYCUwZ`XOIJ*!fPssfZ4bF@yC zld^sGBd8^FD_9X&!Oia186GUkv>=~X*oMtckKE2I{k1LK6MmfZBWcl%?8qtDM zcUxM}_EfxwGBeYd%n!ds8`Y67so$0(k-cs7{b}-r+_~DJLo8-mllh=LasUuTjfye7 zmpu?>Cl@93hKoSKvEFzpN>QGff~ZhU($vO<9l?=G^x|Ve4FmeDIDQ3X^COGm%&)`K zT3A~skpwT~7X3c1H?5d)6+E(_BB25+(=)5E4B^M0>YU?(<*VYo#CA04bSmcjzAdwN z0fp6-iQNhcjjZH9iWYhg)|@MKC`MG#=gE<0tR()msb-yAbC}5+e$S(MSdyj5x$n?I zKQhWLpl3bj`uMisS!WfBdBabg(%rlhesk@}E06HnS5xuv@fRf_KB~RH;7jGk%j(k< zh+a`d^X-{*=s>gD_kjDS#|;w^uotJcxz_I5kbyy+s~=pehQ}YqKQ8P8_mrfz8h(Bf z`Ny@@1yqeDt8xw|&j98J(d{tr-z(E{99gu)KQkYh(RUh-^0J_f(Rshf zFPzI3euvp(fr7QDU&1T3=x*2{R*6TbmzIHRgxs!=iuwbqtg?pVs*}CFpe3j3W%~lZ z6fHAIu@KDig8-_gX;SdISBbzZ7Uhdqok`lOw45+{{#ZL4>j@I3ViR>(WpxKiSpbf* z0;HX1D!S-h5^^I`CjC&K6kCr8j~RfqGj@bfNfu9SWQ-7<@N50mEEON9t+GMsJy&CP z4o_G-#hB~J55-q~Ei-b(biL@gtP6WPxvAI$q|%233hfOp2o5~(OjQ;$){EGK9}kQ4pvu_|{r?0Uub%iOa} zu*Y|%?pB5bWu*KD_q7rU|M0UCfs*cfpPh}t)tvtRJfyu))wVu>bDdd9vA{~nD4B+F z`Z#@NKC(USD2tU6S65(^)RsUUiu&)0Vw*#&v#D2xP8B8*QRr)i6(iWz;8`j?-V=$G z;2E-a&21tMY=5S;Bj84Ty-Cpm33SzCl#twDb1Hb+2iN9slvFgeG#o}d~+Flgt zjxb|v82Cvyw;1>?)un<2Pc+)*{7v`LWeh{)AI2Yt@~u_vf0l|n0Vq6{*G;L6w3RBs zff3mPiEgG@t1lcSXX@j~*watUHQnCxmFv(WV^x)DwCQt@|5CGKU&Mau9KgUZ&1WOE zj;+eIj(^W)A6GoffnHVdN2o`*F1Oc+Lbs$u*FB0J#VH;%&vD;gV-ZriAb-nClWN3+ z)LW#LSd!zW&Nez5KaX7bR&i=ue%!m#f}^3)u)`M~RgZ~|T%En!#q9(BRo~X_pB1B6 zuPC-K9#ts(VR+rUJH_wrZ=ms&O8$4O!~7)H>U85Z(<(nRfo4VN;Rhb43-}n4R3ekg ze0i(NvCHT}oKk3eA?mll`AcKPu2rva{f>r3nnj*uEBJ&|jY^c+C2~%-1M2*NDVbN} zeEq7#w!*=2IVs7^92J)}hJuEj4(R+V-w-;yukGbuPqKCN2F$k0qT=_}Jo=Xsg3rfx zJ`pRQa%*vJWi%L9=BB?gbHsXvETrQ7(i37*`*>E^A%U*7KZra8m6=p0~jLWA3S+ zf!j(I1z~=^xj>y8i#N#8Zt}(LyQ+5?L?ZaQgX{R1n_b3dUa7$kyf#Qb^oxB30#JB(UC@)b?N?!YwalE(Sa1k}8nUx;t3tYL{XbBP;G z!#nX#$a=LN&t(og3#k4ka#LBEkwaC>#wNp8%7pxaeLiakP`%)gCf6R$&G-4Tk`5sF z+DVTSLwH-rUSEt#tHz66PdNrxQU+>)q8{uLEp z=fm&%9lp)aP6tY_h%Mv0?D)h2gfSE2ey&O-+Xk+AR;-{;agyY_((N=0MpjXvJVz4FPG z!ddwvFV6x7ZNnM{_G4Xvj}=mA+$s2H<7Oocbd}sCabQ6Aex46Hs z8A=0dcP=KhlWI;kqOem`{yrpu=o!xGEY?OL5>}ri?f+s~ ztfEJeU#CbW_AgCMFSXf~;a?b)C@}sQY2m~V+kp8B|5!#FKZ)of`i>AY<;EQl5s36Xm9VgCCRlJ?+kQYA|x+Q>`RtRZ0e&x~FR zO>v?|{PcM=;$%x>PEpAvtVzDGdDEY19T?MYA$sJ#{v))YCN!d3{L`_1d+_`P43pfs#Or zv))#2!ed9Kd`Xdqq}RjU>6GO&>JVGTumD~~odP{Cz;w2pE)vE0kWP_!LxODPt1aNo zS{4cC!S~;7-z{fr!1Ke|jnjWF(^Rps=DEL$CK!%RD}!GTe?#yYVN0*pwpu_qL8J9` zo8$S{plDmk64Go6Ct9rXGWIO*;fo40QwaXJ~LVE_)x(A5xWxT*6kQu>~)ARgqs>t~oZ zM&XIbXR71xt}NHj&NnjZZ>o6j&e*l+?ssQWuNTp9H+=T%AJ|2X>b(%MnGdohAfq%l z6W7*%Db-4EHU^pcLv3Q6$!}w1P%g=3?$p;qplYJf(1D{elEFL#OW4ChZL3AX8E5)b z#kUsoD;uRN#Mm0g{@2)JgW0yc7Fo6HtS#^P9;eod^48XPuaPS)x;QU7p_S?K@?REP;ZC zT9S2NG=~5lf4Y0|%s6MOB^Lj*SGcv+VZyOBOo6SO<$zh-j9&zB7w;4$IKLq(yUW)L zlBD|y0W(QD$!V3`-HqXB&6kXJ5u{m0YnMxm?O;{|aaX94l>uitp=z_Ke(d6%0R)oc z7s)02tk4G3VH_GGP`}AXZ7$TZ$y8yO*U;9@Zt-n>mEeEvSB8fY?N;`~IM&iT@{Mv9 zj#fpe0S}D>#{MD$5wb>I!u-m(oI#b&%O4)tbaEthi@K>CLFN_45VHRD{?|OLi05m5 z0%Hf90h3t9Z!6Mp9{{k@V|_HX=6T7UpGTY3REia#{-xo@I;<{6+~u~vh59wWBI=H{ zK81szGHAjtkqT~r7>2HoH#I&`iWKr;MPnmj21p`PS}pFL_FMSQ^ZD}=TX39xfjqyi?={C8cEgk)3mVhmLZhX&?5U)C5pVxbHJ>^$TN* z9Nbks9;g^uHFTR`@VkK$aIM*kX1}*!a(5s6PV1QWSLR+GoP(mN7t{Vv)HglgR3cl! z%IHyT9i-ND;)p#{YaG*FWxs5?$HMywL&Ve9y1>{<$S4xB2pX^X!KZb>s1Q&)Z*2J# zi(=OQkEw5P&+7}ej?KnS8yk&n+j-+Owj0~FZ8o-zH@59Gwr~2o-+i9%`~zq2Gi%nY znLTH=FNPRwE6CplEE)ya=jZ)rUa#9^IWh4@kgLmH+$@E)o)y9?7m^|~t#|B^v z5Vpn(*`SK!P@VnPg03FFh{aLeHk0rxBQr!)$xUJ@^7L)J^3`13TK-- z^|29?zpZNBzA*TNj044#SDnbu4d&eOAeh1>+_@Brf4k0EfQ$iaxWY|+&6NdMF!Rd6 z!-Gd3+=pSyD=TG6Ey|6_sB*e;g@~q)U0+zVphJJ3PC1q)iZ<6txtg?=*q_s5`a17; zRHC1=3YO&o6tWe7jB5SNT;>dROaQM8G!|NQT7o)6;Kk}W=3kWT-1CvqC2UgI!lw+V5UjwNK6XI zvO-R5o^s^*g*qs<>e0K4L$T?X{{E9@e%(dwPo~MUk@E3PJc9<YhL`Vj^bDc{gG*W?MfXJmFFKH>WTeWRZYYi@32edg*?ws=+~Z z|GnL93$lmnn4-g=ryQBH!XbA!hrlZj1iL19_K+z#?}}3H9Xm>ZSP@ERw`?-L$?|JL zRE`+B@yA;DWs;edTPJXL?tZ>8Y@l4-HLDP71`16JkmlgJqa5#h_W+h#i|VamlkJ-Y zhQV)0`M$jBjRSw}lkY0f@l+ARF5p=ib39yru+cieGn{r@))fM?ejRGA*}cB39o-f{ z)L*rF8ELc=I;4y@e`TV^%TBoz9b^0b*k7Hy)Gi1s&A+oKbL;WXs6ZbVAdE7KfOk4$ z{=Os=7t?RUy@v^nJ2h0C%kGyY!(05-&XeQgwb}9|b3#8aXfMknLqqQmp1&mBb<7m_ zB9ZsUn$PQ!p7+b0E5Q9$=UCqztt#zT=9GMn&*z&H5^?s2c@cg|(Ib-hxo*Mj;Oe*l z+dcXivvi&Lv77N<+EU7$u|%_vJgGFCB+mb!ma|k@z7ofzn@!lZJD;IBM}Er@)!gJg ztDaz_Taf|&+syk5YOzdful~`q-;7zUk}*=iXxGJXd4|R3N|#yY>!ZGX2a<_FBdC3& zh!Enb=_RKj`9{Z6)*2|+EyV0Hbp<}?s~stYpQ3N6+8GT|MZ!(WZd%=W7gO^K*6!3#Ds7tI11R?|o%TRjonVSI zrkZuBHs(M!l(rdc%?((TE%;(e_I*i z#1SOvWgTS4(&sd!m||^{=;J_T3_fCIito)|eE(I;>esNjQ?()@7|kc)#rM*$RcSP1 zls4vu0Eek9j__4&6m7CM;xA3$ip8WecZQSg6ldlOBsvM+KRpPWi!IO7KPP(!OhTE3 ze+e%8nrGeMm36Mp`-AJ-0RAizzLcA{x2LBoSBr~w)Yx3WFxO(r(NvDl>-p^}rpMb_ z_s9Exj8oTeXrp|qP#!s=v2_28e+Inl_VU(E2)@q|sG3498J34AW!9yLZ@*{Yjn?`CJX*=WUpNiQGr)z?yAa>3t1qL2t16Rn`HK(nzt zU3Lno)E%UjZor*-lhXi!}}Ffco{x%Xfe)Dw$3*xEC$=&X%y{r#6IYiW^m|brxIm zd;Ly&>CNP2qWt2=)4>Xjtg7x`Y-9Cr(;$J`Uvv@DcgWp>VRm>3EC~sjl9xZi?;a$| zA@q5W-SYkJs->Zer}VK6!F^{I!@V{3EagPbymgq@dpdTUbgsX( zh;aq4b_Cu_!?mzFrx<>3I9Jjx@7O?Pr=hKkG9g5l=}>&RBSn>l>zf)RLr*H(^pwzC zFU~Miwl^m@u2I&c38jJ>Uo76t2Mk!i532mwgtqtw1R)9G7f?z7OHl}3qM7PUohT~h zX!KIWATER%_=3LxgWrP?n*8NSg^x2qjnjK=jc+|g6_%ol&9zorG}m3Nfm6i>|(r#xH-n~O%~=lyYK!1`bgh5WCkeU zOm`O0iUr#}{regbg)3E-U`n_4xN4VU%l^z*j_HE|EOa8D=UBJ4f@jVTsjt$2u)bqg z7j!yeY8MaR$J_AH7GHPd)BC~6^~qNwe8b&^i9izdugWiEd_9g=bNwaqUy^xE{wrYv zaI>c4{pK4($JWc?=dT9j=s~qr=x^Gw4QTW$?hJ?|7 z@vkzqG+cd2%UhRE`*vG4&tk_F;)V_2C>L#Oa{{?Z#E^5<-IkR9#uxhs6U9)+4&_dR zMI-~)fte;*l3GIBu^`LGlMrs`>n8R{f1JJ;0xCAF*k z1iFVM4z@$moI6ptX!-Uft`Lc=&%YtH@L zH;XjRZ^P7$)vTKS)8{}cCov|#chssTga%m{+ee#yw+^-i};JblFFPH@=(qFAm^M^N$1I!P9 zuqy+prp8*0xgT(LVj(EzFC$Mn%%#0jw0X!VcPUO^vwzebOXd;XHh4eIYB?F?P@`BJ z>lagozijO8?#(H_=$M((b+?%jr=%(xtuc&ifNbsdbayMCL&(p|LmSb&DNwL!rJ-62 zJyyxT|DcrCxT$p(g;gToiBpt$o-5Fs5|Wd>DD`$Wg{;0Cxn1PI_xk4!Dtj24f#Ix8 ztd{OZoOvut;7n~V$QXXDtQ>CLc)U?>$k@E$JaZLN`#-DODMEd-Fq4FjH?3es54FIc z9yNNwe$Ze+xU_%`i>DYB0l{u9+`%qa1IJ6Hn@g;@DNOpQaMPza+t*;o!H*PJZ^zXv z03zU50BS6rVKf#imbfqZooWXe=0rCddFj@S3-v$&h?xraa|TSqAOw{u&+JdxciY&b za5fZj{{+F;>!})MffE3z0zHzno~j8)q%?wE5Dj9T!A*Y17_sXtA8jVrERG4Rh34@5 zleq!-!-nqs(wxqUGQkL> zM_t+GOhzPt+sofqu8#QP-{??Pm7x$yYhClTpw3{1)W}frC{l^??fuBiq(c7McM!(; zN8$M=3MR@nfym#$&0_;=E{|H)k>=N}zQt2MCVF+evT7a7A~ZrkhbuiTK6Z6%(lxK1 zVm)8n!nxFOh6ncb2@-h5A?i!w& zfZIh+dCA{QB(rb5s|r-g=!o$Wvs!v`UMJphm_88`u&{$JFLyv`%DWRiK5MVyOzo=* zFRBYVjnyeK)#PQuw>w5Sk#aoMnl6r{u~lQe;)!XV4N-qs^*8!XFQ5O+8w5Pm`;};( z@Ix%32ok0dhNK_GCHVV(fzOk7N%I~3_R0PUwl1$mp!-lUj&tM;50`2*OOh#kF;kq) zcd!Th`kgc6pR33H-?pEfngENKN}9uM__`Rg1?D$%@X4nE2+*4GUI<|`CGbpNjN#GxQXloVOp-XUIAl9O?eBh2Vqs~EKod-&ok!S_pA0pVPZ;DkKCC-W3!|DdcXOO5F z(=TXA-o)w+Gmb(}D6N^j5Q(`Oi%4Fzzs`x#yd=Ek*nz^`u7;l}IsUn1PLdGa&EQS+ zC&UR~o}P@d>ZSxT_J>8>l9to{nIQ8MW?3682`g_p&eR!Dx==+S$f6`6 zm2RCdy3X^}9mdyS~e)1R0h5n4_ajaVR)%Stnb><55Vu z_qCK+0y2+oHFJYm39~dQ50gwhesVNdEJ@=1-K`|~BW*r$-q_OMS2scGcp5T5Ambyocj-E*a$rLYwh?=o%c-F~KqO6yUWVz|?SaL13X^v+4*NTLEd-K!av?A3{I)?@0JU6vn zm4b@Gd{aK=7y4imt(SR*8Zaqxu1MhX1>;V)&Uin#oq|mqgPo0A;<9AXD&O(q&B3qz z2AJJw)e7Y{eE3h!E3kduTZYzk4$dg?1O%`W%*uip1q)XJ)~Brijw{^Ubhmw zxiocI`7(i$bj#|9Ez|ay(si;+qVkwWHWW>VKoaDU;u==J)1ZMmxf#9XsDe;|xehWQ zXJ=uiH%GMa5p$3IH(lWt@CG5pu^{6_T~-v>T7}_fNXhpKSOxbyeNzR*kfAt1 zdb(66a)E@Un>^Z=gN%b%quNfim*j76-0bU$1JCm)gpED?-U+72fddW${vzLMq%8+h zzGnZN5#)ZNEPL+RT3=+SJeJ=ihet+^VUvi-0ozeP*y9&HEUTI~)8h^&Hr{Ady>7$q zZf9G{vOIpo8s54nimfx%aOl*K&H^GvDygSCbQ1CP%y+mGH$~MT06+b+QI#|8v*F?` zlFuc5@49eOb>$k$D7w-kcA|&B&h*kN!v2Xc`G7z$Ui;_bNA*MQ;p65VV@Xq2(P(|T zox-x$Ype*)%^CFTjhtP3ENUcs{|_q5i{6+M;sd6YHz^k?0c0JE4!g z6`#kmvm#8hl3T@C>G&w!)a*PfgP#dRC+pWhK=z-r8)jls)y}u0(M-4L1TgJK1i$fR zCKi={W-t&o_uU6~ODbJt3qc|@9O^yg$)1Z|Kt93_WQl52$gIp8C9+A1$7QHd3SL&N z6yZ6it4-lXY)-LXZsdMBgdUgo;3GueY|x#0=z!qJ5@bl&?yTCB3azDf@~oM1=dRdv zAB+5-TtL;d3D+G(aT*^A2+*+-(U@HgV~m)!(%RBv7}vxXzS%&K=I#ixAyc{YW52S@ zI96m8CAMa5$1GrePNgA|Kgg7#?#!~nAz1$vo70sUHLx5x+}ag{0l68|V^;IPkQ*Fd zd_l4+`l`l4@lPEIdXX)eJzJa13);%;#hd%uLbnj$To$_dAbhSN(BQx?gZiP~-%KES zQXO81KxAc3Q}sY{ZS698lOs-GsLoxju^*4@2al{FhJvfS!@T>AKk#z<4Z7!W8Fo>t z&dlGfs6A0NiF1wIG=WVZp5-_0#Q1ntp&s_ylBzXslu`x!tr7b*ZLig5pmmKqmS!uh z0t@!C)B)+pM3%}L$Yy0Mn=ifGBDth;fx2XuEu03rUWW{u;Db- zbDgByi~$TV3V&y95oC<(n@@i(3|*^Gu0?0sU}vBV-_dDNwVyi5N@&xC6l0=h(Spwrvvu5{OfZDZsV5v|q4}bNSouV^9CKqS;hfuvGT{Q$cUx|bEnTzz zq`a(p&`VOWl)&22XHdoeb(3MN0xY7-wc+ zYuk(Fq(yiQ_OX1#Xztiymc9Qodg6-`x?MeO2PGe!@KW4GP*+}_o%+!^O~c|nnwGg| zsUjXI3In+FLyY>%dnT%{pa{aVl=tW$8{aqHys>t)mCvZ z5qdr=$pTS>GUaF^#%v)2PC%eFv7w0G5r6Ko$(-%b@El+@rtKAHnn{RbAcAVHYky8b z6{eU;;mu9nPKjX5-ro={H67-xLV}taSgBKYaZ#OnPT z-vypj-K0rQwZk|x)CK}hSHG96Q;K>@*W3Dj&xEP!|ryyn1rfJcZI6|f&9ZH43l)y86Js5(O^sF2L4T3iw%k%qrDkFP6{!pk#b?8?W5<0QCOB$TS z@X~70?(*-vi|x0N-oqu_&7Zwl8BDXZ?kR;UBCr?}Y`uaW))VtXQ*ER})y7v;^t7qu zKcIgDUPROE;&s@VUyL%xXNq+O4w{;xacZzR5f&(;>LrjZ(hxCwZZFe;C76%s+eF{R z$w}BjgZG^?9drRIKw=JW@n$%zS8i|=Iv27MAdS^#Q~qKB;3}H(FMbme1R=&l;E~hl_CIJpU76sn;MOtZN%&vP{K3a7)8*3&PHG6Z>buviXPDBM-V3szoyjeGsGtgSg(b@N! zGin%CJWO>3>heR-|h!wg28~b>bg_}PzYZE?el5EHX2M~ z%_|kluFYi_DphGz-C&WXE|0+VD}|;5c>~xXb26a)Ehl|Ivin92AHttzto|pLE24gK zb>M^L*3!&g{b5Nx2#F5$$?|Q6Yu`lqco+2tk(+C&kT7O3Swvz;&;}x4yUsIID;|gn@#v<%!eoILvNqz4vum9( zLx}k|7(=lM-RU4SI(yR$->R2&5jJx=u4Y#0BIx& zFwBy@5uc2wRq%IzQG$qv7L1$wzg_^BxF^2wA8g&wpj~==+ytv-*y0g^Ymcnx-yo_4 z$At(2vEoOFDFFh8vyAU~$_>J)R-d8$YgX~hxk;cnq<@#`3{b%#vS=@43WA}2o+_;T zrv5Gx5GTYI=%Bdqq#-luTc%*s|NW4TYKY#G4Z5q4Ye7~PBp*+-rZ>!5f6*T7ZeB!9 zGhiwor$y@ozn_&_i0yY@g$!}RQNgGz2}nR(kWd_3=M*xbE=2J6VhesHA~%puzwf!< zvuYA0{4a=N^kGR4ZrwDo?KzZlsv`t9F*54juy*va&yglS$Cfh{COR<5Bp8PMX9gy6Efv zEXl!T?vS#0>{FCv`MEa~o)b@Nvi>)sM)2-=p)|3n`lyMZrYfGaY(nks-(?iGO zA0T$Q6f^p?ILSsFaVc$+A&-0Gx&Z5tl*7NTN{+%O0J;O(_ z*o?gRQ*^`;b4`JGvdxl-Nc#b-KiapghSvnyEJZA-M$+l1Fp+3FeCf>;pUZ5NPzbDy zlYZesTi&e~H6X)B z*=Cjb-IBGAVN6lo=%FIv?_8QxdUu|M@YdSGx!ar3k~UeHtK~uSTI2;_h<$SS>wEKl zO;Si& zK(scH+NS%374;hLfF(;g>&@2Qp76Ubi)4YB@SmepBqp+N4zNN)&`E{k75-(P7rNBt8^h3xzNm?p0&ZExKW{&ls4aK*ZJs#Kp(bZKh95Q?-Rku^ zH!idvix{jNejcL%1ls50PD(1D*Y>_+LWd$z<1ixR-Y zu%3837yjXxH6J

z!IX}7U_#f@M0m&$^}`f)smNBx2j>=QWKz##M{RAdd4xM4!eN?`F?-d zE=$$yGQ%D*tIKx8y-@hBtp54gXf$ORq;`Thv+25E^;2i#;Sco!LE6?HA-@C4@qi{D z{m=L!PObY4IWT`nlG*KG|2Qj}3L+Eumh^H=Y*BP;N<<0Uh5FPG6UT3XKp@0e$&93C zFKtuVb&DxV+iAL`LiW3q-NUw*0ZKu5AuQpDML&Ua!*m#5X@1jzFAPdin`<-CP@yP^ zsbVo%Qn3_aHNOMvu2qWc*H>Ty8%}`m_suU~R4tm8rY^E#KQ4sv1B6Ibh()DPS2eCQ z5_aTKs)6ZkkY!U=?&_|h~f{UqlMF}K&F#5bz#SWYw1j_Wa zNEX+h@?11;mR!tWcOh{uTYLfz=g9*;L`jYtSAl=DM1*#WH=Q`yf1_fav{9+n7n~*S zkj;Dz{0eZfMeO_{p}y-gam$pHQHD{k2T%yZjD93|g;H5Tdr~_t3b@`DKNxLBoM0?D zQp5D8{U< za&ojF0$5BhJ9w*OvGi;!O(U-hQV%n*BE5g%)<;7vI5P1$q-4#kP&9%eDZ33cIPe|* z4l5H>?&%j92fQVHmuEsazj{o75ntk?=OH*4&Y=0;C#H_3U^U!G(n_@HJELfF;)T!f zB)ttb!lrXvP^Q7MmR>&SB7*t8q*uLw{wW?uO-E$HzY?;y1@e|2!(ci5`7A*G6JnS)-~)ce&u(1$iMHf*=2qtOdf5Fwe0r#!1JoJ!_j&y*v^-Jj8f7(2AI@XdCwi9l-F=*`l+z-g=ZO7{bC4vN z4Y4A+_;-es9N9*{nSxL!ErUokgm79?)cbO+>+8;1JK;skOT~P>4+o7_C}IS1*EMH9 zh};(gJB=Fu5J(W5@UTFeo@9OhJqIl_>UP1r1ZH{iqn|aUWpQr8n<;_5@2)VS+>>E` zvdI0?D;@KHgWJ4_abvk5F<*bSLgLh>n7>pmBeH+o1kD(z*=-2SF_82y=ck5dFdHFq zi7s3(+2nA3(j|UB8phK>{-yW<_tt)Y>pudHhH3DL^wz!;S}4}=blFq=a_n9#Px@DV z+o#Kw=(FU{>*6i>=dceH+mRak77Rg$M9H!LlsVs^j7uN<7Uyep)cCRb1uOmgYy#=aaF!X~FlIf+Sr`|?0c=!-z>9Yg8 z-?9>eJ6Dryr}_p427D;38aPxqe};f9O$v8jY9jUkwlREZ+^Y|xk?3NMnh^vbH3^K> z(7~GO4a||zD+nA36jha4P>ctXyCif}TKFFjMts+5xAtmT*VONF5zukW%5c%_dM;!2 zm&ZhLO42wLZbpB-+6SPc5(sIT!}wX8rnC*QqczD5aBf`&M=NuWLuXz^b5rL+Xs~Du zEV|wsSfu}AtePV|)_>xyXs#5XFG(=fKxX)bKfF!pC-FdDv$^nD3Wn+V=2;dR{%F;+ zqpVth>eMB?Ht&Ax0v??~N&QS{jXr#xe)l?-&kj_x@_PtnB79_LKUT`yxARz^;)T-? zQ&5eGdc6G_ddC_DeUPCzaH*&PlUteOe)~>mUJODu5dug4fn0N|wWkOx;|zKhqTRjH zvemvpb{X>1r;_3LSfqW0{6@mvjdy0dX!kPTeto*zDvHz7`S;7s2Ad!u#`e(6qM7Au zqgVV=+1uJmG*G2O_O=0nQ|O~!BZ$ZHt;wZ;gAH+GxFafmITZi75x4&-&)VOyVg$QzXzBT=$U8CKIhkkxkrz2O`z+17pUhT7(EN> zZ$)S?3sJn$vmg$iuTlFwD0>!`s#k`?0IY3%THOu$VO!gFubd9o>)WF}7Jb!R{^JlAAFZc0$Wu%HLzQ{{liZbcGJK1p6B60BV)E)*JErS_dZUNG+&TO z6DoK5b`=YIWha@e&_tIQc{#gWY#QCHj)Xlp-mS7DdK9aKW?hUAP1H4%UtRu+Kq)C@SkhN7=aINl7z)L0 z`W4XQlVh*)`{cUgcp)Lf5Y$th>-pK97A%>cxOF~+0SuQUNadr%E4viT5Z$4Jgg;A| znuQN=*&PG-$E0WC>z@>BC0OGc(3hK?_x&mcFRdguid96o+vhvs5HV(=+(W2UzaDaS zx7(fof(wY{sE?+nsR_i`E&Qn(;Zu;*FkAME-!{LW9r(FFpl=RI;v(z0fryt?zaEaY z6YM7)ZmFjtE6BQ2WEoxab`Fhic3%b~o)=xa!d>I0yRqM{g^!LOX9EuE&gk7HVeM{J zJG4o+)qc2FTJt8yV=LS8FXxo&oi$XZ6?Gva8AUYz_{WFb+V*M-D)UYk@5? z)!2uFfd@=z*$$vnP0FD*+YGIJDcTRZU>O6#FKO<|ioSOzt7hn0 z^?X|#{8&v?$n~9FN_a)*^JLQZ=~*J0Y{rBBv7q%Djs~|(CU8dN+Vc0a5d`N3lRJ?k#ovK>l$RjAUwm5@6<2R*tRMkUZBaswSGBkg-c;kYYzu!VFbE+ z2s!38Ywf)Kjc@(!Si^xuu9bXfm_q)$r#2$r$E8D-?XjvH>?M$A&iuO@?eTNVN<>IF z1EQuYQ)R-RtS-@sm|9RT+}B&OvA9lt{xQ41b`1pnQy-DTM(>No+by|9nLctdGH+`E z=RIwJ%Y55)BEoSsfMu4z@h z!g9m~loC_-*nCRIXD1kEV1|zk;Be3P+L-#bAQ~IeAATaX7NP6iY+^8XB|6X+kMnEl z^OfFb_O6;Ugc{}DlAz@n+r+1qzp}RY(e<14x?eb`Aw;g1ssTYyGdWn?z8hnD{r5;q zywlHFZDl~_mM1pah)w2Y{SgmC^yX z6|^%Cnz}W5%cdWEZQ)YPZPfg_-Z*pvHiC-?6glMg^Bx^JYGJC_oo{l2W+;EQOm)rP{ z7;yqZJ*06g6RoxJ#zdvFt{A90y$R#A+A|1dnc<}fNm^U}F1DOR*)y;P)pO(lm@ON8 z=yx^Un~kIT`^5t2Sl~;US5*snIY^b>1Q;L>gcQu7vm53~+^1`Gj!>ffXWoPBgt3vT zQ3b^6+{XqWy-VvC!0(%4i+Dh`fl$BXGb&csXv*>Ws=w~Tybxs z?%rhv3ExhU9S=DEwl8*jJ|jN8xos^yUNkS+f_T3VDIX8w;}%4nt8u?I$%o-FGp=V- z6KaGp$ASKYJUj33C=+YFw>$T-4l{gFW3aQp7Vv@l zybh-WmvmDu^g!Rvk$o(^U#GrW3T<&WyP>;3Qa8M{bU&UQbSsDY|FGk`xb~!W_VuOk z_6QmAFf$l|{GHf0cs!KK&VB$9uK>bzv60+TR$c&w>3}TZ=!nSeSswia##}!ZUANf1 z9B#wH!g4Tja`^YDZ`d=JQZE<7?QWI41Xn|RgnPUBfbmb+WZSLSA9bA#`p$#RWW;N5?2vPh{oyrJViD)UZ6(tq6>x>)<6=+D>s z{3VbF1|vPr=icDfIt3>x78D%0UmB6(9yx#JbpJYI|2SDX?wBX-N)PK69lwXaI=KwO z+G{!Z>lH9q-$VEj$e*tLmMn5J$G$u??2};sMYO+T9YJ~~%^-UxDNkg}Hv$5~4Tl|E ziAQY5vJ4uiHEg;1?Uwp6Jv6<8(hERWK67yCw~?PP$*=;f%OONqQ&?M@((&->@YBd}u{4GnF}5{Xwn z4?}kM0%hFrWMaUrUUnr+O>aG=InC%Ucu9E}XuTnXsN__mSX-pJ(o; zS?=*cY>i8Gs;x1*%HA8mOR1j*qD$n1|xG(4Jq!sx#K7pD{8 zHTts`?aprMZDO}B>0eu;y2kk=CsX}eJ=*8BT}2(X{X}3QWI^Pp&K@hE3LqN zfFIKlCG9vXkl>GdL>BO!M&; zemcG>WyXI*2wnOdcM0enBOq=L21p7+=7+Y;d}85z2^}-$Y|hu#gm#>&H9$=pJJGvnc|r)Lr@h$lMhj6=w7W0=dJ8ajqB&aa8%1 zgLY!w)vD$-63Bal&{`<6|MC3S4!`z@vHo`Qq{T2u{XI5HzQ?eo=58KIlY#T={KB}{ zIHSDI{3-&aW5sqsXG4r@fefqh4;>EF3%t#^A!$CfYDB$IMe8P3Hkvjf15GX9uUOvd zP7zcQtq^Teqdry4rnPg4J;W8m^0GSAjHY>a7T15~+%+9A9iRWtSBd%^^5bNo0}}{@ z{(7to3fKw)*bDKYU{L*5VFlyZG>LLcr=tjsRag^*TGM9yjQ#A0>pJJhU4udhH*%e> zCSiE530Zy0ArHqEQy|b`@Y++rQ+l^5xFO-ns!FCiJzH+8DT(poY%C!NOQHYQ2mTjJ z?=8pBGrx<#9jXg4{O8+)ra=!<@EVl!v(C7mXOio9uG&*u-on6DCTgGgMYx`b`qvN1;oh~jsg=KZg;=CD)B zwomyRY}0kgtP#6X}8z)T>qK)KQmLxomgDR1Psj@gMW8P46~5@xO45#;0V>=})w1Xs&a$bq1(hOp|sni#bz?W7-H;UzPL83@&eSZ)NL{okn6jo73 z=0d@{tg1FDRLQYu?geM1+@{azs=}~xLj5=gx^M&@DBAW;>7cR zF8sglB=d&4Ou=k%9_~faG#D`1B`2bJ4!{1pc+uJ8QC3YRn%I|lS~YU4J%iwqE&1lp zH=R(V&BK9x7<7Htg=71dA|dv7!{`of^S41ive}@F;=d64Mvzk4tq_mIdKCo!Kbm6v z_laA0+s=q{LEo0vyf;TL1Q_j5fTS0wOT80#HEreT<<}S&l~EvQL){>>chprK><_5= zAR6!&sN{;(u=6=!8b0bN)*_qSbfq9+rvmCbdNJ6U^8eiQdT!^Z$?1ROjkm4&V-?XM z&v(JjBO?sT-~1z+PHN^-`F4)5E>4WSZ4J+42mf3$Ae0qvI>r5bVh^M*N)o#{h+6z_ z#bYoe#stCj7^r`ckl%J0({aU~&27^Q!;rX0qR;$h64KoRYR|C#H8dj0Z}Y`JZ>T+6 zugLJ?U&3V7qKx6ITKW0sTn+Ku#snuU0^ zk)2~=Yt}Rt7xgPcdG2P;km&Dyfy<;bs>7%c$Y(4HzXd)W9fSOdd=JBl_o_V)lm4G> z&>S_ww-WJ#nH#L_gj@64%=0P!3wX~VnT-F4R13fdaTG+$(O9Q`Eo*`F19XIn?bsSo z23kZ#+2!vg=I`tkTgG-2BhrxB17{USLu;f!{Y?r*L2R*V%%FjH1iqPU1xZHijqJ`4 zZyTOQRsP6`HUfQ3nwlF0ep)d`4QuO4$fw+k9A|v#UK`>X8pzii?>8MH-=gJ3wZPA} zz3l&>@4alnaUQx`qs6rA7vdvLS~@mq*!6tDjOZqM9Cl581`VQ&sN1h^sIwJ{29nD? z?+l_g{b8AE;e75!R9y#n*2jx9gJ{iV;#K~cr`!Bp~1;^{j-$$QD8T<@v`-M6GJU`#e z(_?)Jx%VcR6XWq;)c5%5oQ4<-t1(45`MBZ&sZe8Z9p@zwTw8c^UQ&k8fB_X=qZ>hI zg;(h>^}x$8d@BySEMmH^<02w}q9Eo>N@&K*()t|fwB?xtUy9c1otvY&wTf)BQHlG1 zy#Ss$_O~v}yspKFSov0!1#b=B`;u%Rv*gGhgei>o_rpu^C%}5i*mpx0PU33S9SAf^ zQaCyWumZ4Ai5F&cZLMB!Yfo~w&&wu16(5dLe3?xh(Is=@$BU&YN4WoBKp%4w2s5%+ z4$VU6pU2D{?x{6r2$xJ{7PL1JX)s#{3g5TZgsx3BQrXPmWU46fqgLPRmV*H5?=WT+ z1$&Mww@}r$7|??Z%7)z>GvIDu8;1rrEF2Yzbb0(I)bEzjzKe{Um}u%%JugoTnB6+0 z+!wNxDgvPtDr6GL*ieftB$vxgW0d>h6ynVd)q;51?5!*+9DchFNRc3?+ zdV%Vp*#9R&*e4Z4;@Ny5ZC~{bj0;Z|hmKqGVKn&+U$gxGT1~#Hje-Mn> za%JLu{-s61E8BmdbH)P;0f{04P8kfg%lH3+N2mPsOP9m^!$NC5ZTgAHKurHDTxw(n zCy@S;21bh&k>RhYw!hw}p1Ov{<=UXx)7T~_Dnr2lm&FRKI!u=bF8{CM$$u)K3k_X< z#aAPWoVO;_W*PO^2MdjUaUL$=ppsoz3P!7f)w@paC2xjBz~FJLU;eZhg_yu$XHdf1790unAaKOAx$6 zlVgS-Y7V{Pbla1>$swQ5g!lqQl#}p=P3iVg$;}(&fW@Xn4?tNk=6PY1)I8pLQwnX-(p@X}dn!|j z!hBr4j(FO?D~Ed&SG`ktonc!38z1DLFJj?~mO$>CTusXI+NfZp_yqBL73`2o0~Brf zu7M=SXxeQ}U+YtcOvF6i%yl(>kg9)scsM{k%!W;VSCg`KxXq4F+l5fLoAAauv(~Pm z@36ji_PA{nrw43TKN4RbQ`iPIc48Uo;vh($>i;QeqcWr_wEzkKgSxKn(Zeb+x}h=8 z&huN-t`LmO-p$1ip_CaZ^nHW7VGYjd#|1$htA;3dZ7oeQ}(YMR6yxDCELG z%NSttjT97uQkqq64o@_6+_h3c1Lkcd{oy1@|8(Mz0Ga%^v6z`(M(3N<=e7Oy@1EgIX%~rEN{CL zOp+Ea&$3_IMn;#SE5FVOzS=twF#L9!JibV=V3`F?3}6)}o~``sSYNJbHg6~Msv&j8 zg3H&`cz+*_iV=Sjz+2+>5VR!9d1p8G@>^>|t~i!aGQsj#TDri4A&cYEYLEj@o1l6KVWVT)R%_|XOq`gLmn0zyKyC0_#^0Fx`h?~`zbchu^el4% zx2jN-?=tYf{v*oBM;k8X=z8G+S`B?trGCfR#cEnQwyK091uS-=tR(a4fYPi);<%@3 z>AUSas%ULzV-rSp?%#v)gRh-{;0+;8oqUJkE} z9n!^L4>QVt+^{UUAT^MbdWbWXwH3NfQk5x{a)nr0Fh>fuHqmF6@axH>yrp5lqLt39(BTA}OVQ$?i6chUrE=vHE7zmvq|mwh)CuMPU4VH1m0Xom#PjG!Eep z6re6zZkh{fjWiqec*yRWV$MK+rIcDQR+S_4M;;FE&y05%()DD+Yd_-p0Abl1#UcI# zUr7;f89!){dbBl*6y2TM4D?T5LEfn@dqRvUyaLzPC;il`zPP$lGG86$OPQ)Dh*Wyo zb^Utl*PMh*T8nfp48(>eg%#H>vJ3GUHUU+~LA2IBpk^2po`z9ODrdiln;x?VgU5>$ zS(jbcYJ~5UG(<5&=?;Rzo220QfbQe~zo8bXQh~x~gK0z(cZ7ruHc?E7U-vU)8ws3` zk!*sSmQrTENMh7ob54HDUdVSp)xa_AUXVf}Wrn1q&@P`bu!c_EuMrHVgZD`d6FiN4 z@nlea9#KxRy?BL0ArqnGROz!5@0aqC1Kas$BtE0ttm+Wr3a$3R2DJNQr(= z0=)G;cNHMt#c`zV8J6MsTcc=yqVTFYHE0Pc!KMY_Dk9W=Zc0vvA_jp}7o!sdVX7Fj zZlj9hIAVe)29$Rb2dw#O+Okme9+mVfP_nx7VHxW@PqO-(!3C_#;{|`s$vcYMA>6o1 zZ4#`9jfzIh%klwOiGe{^f+`3ngetOhTE7_eZiJ)Or4;IKbIk}h5IHFldTs<5)Sq3?aZ^zcMwY~XZf7!7H!l0Fv>O1tnLLoXvL5eEPqi1qE0F;N{)OcWO1WX7fhClYtWE)pmc zU6KMVYT%7))RUHG-0xsmDU>4xE0*wqtal5VP*?TeJ7?_fu!M9=XfXQK7`ShG=~`*k z)%2<&(IsLVi~}PDn>C=Z?exfSKPOs9BVia{z$P?J=_oqxcwrU{cN1oat|lQ*r-L=d zP;62Jg+>rJ4ZqQr_z$>(JD5u#SAMFk>rg@UCLW-?OHv&^6Mv^PK%Ko26R6@Jk2xo4~Cl= z9qcJ;NDaTe-#Gi))vw`>n`sALQ@Lq1qyKZ$Ejs*=x;F7}zU=oMZjc>>gkxIr=;n6y!Ft*3-EOB+O8*5q zTd2T=ZI~;oIPZsPn%ik0CFWBZ`K=E3vK^8huUT^vJP<{yKNFk{x&lzkLnO+(faJt*D}!}@YIsv4I(h6$oL_mi#yMs zgdqNm=~2TugT%ukd`CY8aU;X)!o2A8pvr9>HCQzi8NfF~b*uVr>~!$evnUrU9Pyix zWZe(o!cBqTY?3nTX~|P>V)`~6c*ud#fNS_PM;`Rt^!Mm%`|`y`v$>-5&sLD@fr27E zC@@P+fNyzo216kb#<3wc4X*?NxK^sUsHS0I4Y{S*r?V+)%qUK!TS5Lyu=}ynS1Rw^ z7J9*3ylb%!e@qb9UB$G|w(#T>QE@Dg)*AK8?XrX`68jg%=Y0xX&L1a*TE#J_2=VwQ=c!UKLf11Q0@ zrBj(qoByGUuy3^8=?~QM1$l^pVQy5M?W;@E6$p3kB?DpUFgd9q>2mMZ6gn`+&YJ+X zhW58c!&y8=tP0KDP8eY|co47cj`JPl+^3*Y^QY>etJ0{QK>wEcvnWdVz^_KY33i1S zy#B~^izTLz%Ui3~tH`T)-FY>@7K;pDgCr5w$m=s~g?$q>{zbWpj@NW>uQ zKs?gy^T%~04_h>asRCTD`*b~%v#bgwyxucws?={i!nQ-T3{X(vCSu62+Cu2ZPH*>m z31PyJ%o?psD6}PcI4^!`+^COHDOX!&s@M@GD`hjo}Yd z6?`3ef6rS6Bh=SRIoGe6;~NZy+qyeZN#NZ7uTj|ri;0M zcW0m>0RdMLwaq{C)l03X)|8mjYh{Ejwa8phskO3NgR-r;HRT|c;b;p9e`OL{wmFK> zcwFvxtWGam$QP;nfu0jqrwq}m0oBoXO-yVIRwnJnb*b^wR##TCqercbuDqr!#adfl z_&uAUxLOkxMtVVQR0B(#t~^~Mt+L4`r>XDb4jGMt&@X731VL}BrrNVJ#gSA|*8#3hI(0cwlD|Es1Smd%knL`r|LHcnm;B zY;h?E$LQ{S?Qr;k;<>gx2$|Vtle5MT$xz|zmYT%93Jg~1p6F6JmCi(YTEqu|F7#c& z2Oy0N4OH8i_6lkquaDVh6RfNP)ndx=|5&yCX!-bbL2T~0*V{8s?uGc!4lG^d3;5B} zY@d1rso`A7Sv_Kwf`}KRV3=*U^)#)%lvvQUL9;ue!jbwJT-8nxONr#k`cg5hUni5M zZQN@@(=^JI@ovX3Oq6CHjGQ_RV`6BE=R%owo{~o9qZXP->=L|?8(;zd!#R(PN3(k2 zBdyZHUU$K6G-m|jtLu7GgtvLUesjp(GHwQPm5Uig8FV2EWrGN2m}NyI6=~AET~bX2 z(z9P>%MAN^Gc(X9)6!{7#9~T#UuIo1g{)VBx@1ez?Q?w3M`1D{OhvxZIsr)#bD94Q z(;p^P4P@*ElVM@Nc{Y(#T~GUrOdf|)n-b-yIH|uB8L>!B8;o2QDs4H;J_7BQO7G9X|Cxi#hv&vg?}i`*9<)+<*uGs;^g*a2rlz7GXF5ajKy_rw z)MYh*ICMd?I(?NR(kbClZUW%?r)Gn#aS00oQPJCYtvJP3^y0I>SmilC%Fy`gNf!Y; znMJ+Amx%bPiJo>p!HZsd+QnZAK2h41?1q;&MU2&z4_D3>UfYFE*qWg7ripe|Q0A(F z^C^W0Ie0?bM-Dq!28Pxqf$7NZk-QZ$Iq`2s)lS-`_KS0PK z(H7NU>bg-@994uar}X;tz~AV^*5PE%I3R6_z9x%33p=>FPJVb2?)Cn>II%36rX(>);IeNe#hdfH<)}IT=|9c+hJDgE)@OEV|w9SVenCZ6X@jIhAY% z9QeKvzM(t*_B|mf0wz3ejD#}%%AgrI+0O#8zZURAWQo90HeNgj#8`vMN}1#wkB8rBpYue{U?E+^s-QykM0-j zCj5?limmS#arI&^{1Xqws;;a3-dy7frqFH$9nld{F>wmW7iSccO7gXh8D~ua?mYE9 zK(}u{QI2rN#5>^Al4z&tw!wvzlrqubFkfZSTdZs%n#4z2Q`c3Y#|25{M_EFD81XEv zRKrFL8xO<{leVsLOEpx;Bcr!8gZWrq^{-GwbH5nVt(2@={4_1X?Iv(t&b?D)Ngji7 zyW7TlDEWd=B8Z2yT<0I8-GZ0sqU)0U%;i^Uw}gCBIsIrU_Q=$>wTJ>XBvV4%lW!wH zyXOGo76q@tk=vFV!Ri7ff$TKKo?@TioWQHF_|lvu=7}~TQV1GPzO(@3r4yV}=$ss5 zG0{J#GX&P^aVF8;ujf%z#Y#KJxiw91-DaTt<6!9T53B31j}B@pzMl3Hwk~vh&k}&H z_8(uqY~CK|e*WYx-EJ8+&5`2MKQh+Z1BAycj&oVR@Q%@0R>Mi}JJM7Q`}N&d4#mza zWQJPd_t!KKsN~bAD^GwYQZeA40hDHh4!tUI6N_VG&-AWcI49yx#a$Jg#1dL6^fd=1 zGt#je@IV_9H050S!xoO;dd~V7(?{RpQ)+cW)WTjRK(iki~XAoSUh^{jWfUmj*P*nIVN6P^5l4%!sP$sJ2T#R=snp3cKF zX{sc8Qd`Sg<=Bk0u63OFa=F9p=hoPu3DI+H>H$nfEH)z)C~m!+Gs6(v{1GVBRC}90Y|2Pn9;0mURU{#i=AludAzF#mQ zKw*X*#);TswA0gU@1`&sMjsdP~Sz|2hz>=e2E`^+7N{Cmq&gs_@>LfG8)Y~ zsi-O(hM80pAo-ddVeTMCCDn5?PuW6}W1y&a*HT?xA=-`~jVUTugrOLtnUm292zf0x z4?!A@ z9@mxc6_Z0wCzJIp3qo_OFl$@bvx>fd_6>4~HV7}#0w|;GC~y9z%>I6TAvrCBKq~Kf zXEwX7Y&?6fJ|PJIbBVaj9^~}u%=7St0=|M|>dQXD2=vH)XEX4|?E2FHb)`Q zp@dti%}Pi2c|UGk5r6;PtQX;|oLYgp>7h-N=62!y1)IhBZ;cTX(`T&)qId%U#QBga z);!|5Q`q^MN{jo@mMw%%_ZaGmtp>EC+eVl3V_A@++_^v4C$oeW(h38#r7xJ5tWbjB z&k}OCcvQp#T?H`tMOybfQ$cv1etVKczfslq0fiF91t$?5n9rjHMWoth0o9zB>5=75 zfc?HehRWsw?{hjC;gDzB5I&W1FyO2RqOv-B+8FaQMTJy5#3C_ScWH*i5&M#ZwEIjf zdQ#*qJaiVJTBjd&)|$xBDvB_l0&Fc4lB=~K3gFup-7Z9|h7H~p2rT0oVob|9fLml}MBE1{1DIC`D{9eF>$Y2#loA38EY@y^Fk zyNyRqs%G(<<>#CSOu&7zM;WiPvnptn_SF*ZDejD8(y!9|WiefsQ-}dsn&m`#2~Pi< zgxQbhk8q!+Z#;5n_L2-G?IIIYML#$$Y}4h1MEDId>lTUqtl2OrmetiGGpTAksbE{! zD+c_3Ks+FmA)&<(5_5?aE+eUGW)=Op zE*ox%W-)3bn(SDL5^JlSbZbn>qv}vTJr2RI<0*hzX`9i?M>NU<`!$8V9D~$K*`H(N ziRN>XjeaIviRjs^Z&VTyMk27C@+-kYHZ{e}s+ZCP)B?&93Nz|erEuDRbEf}Z1b&qR zmMC^PJ(p~Q5}V49ZqhH#CZ8Pd%?SF91k!f@43e=V8%e~bC8Bw2IFeKgvfA*VzF=~fenm5C$3$Mm zcks}n5Nl05sdy*!mTBlVP0s}Jjk&IDB*EM-h*A9(tkr77Fbxfd?ToJ0ODq0S6?Jtz z!lC$n_}a9@aBB~>E!&)2Nh%e2*fgK(dl!dF7z@)NoXZn9G$j^Cgjd}iECIXHt*(@$ zO5&%(bvQrhE+t1lJX>lk{MOn_)5-KWf;LE*rQ3DAZoG|3R(4J zIv^0KmuuBK#^$~qq@)~lobJuK2DAkn36_)7Q(Z0YCZ-5h@=loPt7A$ftiCja2!shr zA!0ouZJ3!|UunQ?;o!j^X@lD+L=l4XJVgnA$?Wv!4B6sFdKkuT;VTy?D- z5B6_mU=UBz(1IdlbA~CS0t`cIPM;PSH{V=2AesU{sf^ziaJLfKou2??RZ%RT4z0_F zMYlr|KElAEwo z)QW#Wp;C4jDzwWnCKs3QwxBoSVL#7B(Uu0;imOZmIP%d(e%roR;<6Jt+UYcGbbKia zgWK$aWT$R#yQ&)zT=-cpxj`SIwZW3AAyHktrg+bBzB@zoih6{x1uRPaS~uMe`KtD< z$p{Rzi01u=oG6T$<~Ok%p(b~coU}UagCXrB7VZCeId80x`T=gK5isg1&W1MAT6-fc ze6P~d+ja;F6|`-CCZ#rl5Yp!PddYyjr99J>J?t*@Vs)UgTt<9aGj+{#tB>2=VVyO_ ztG-Jgu#)Gn{HJ?7LJMQ7^7t3$ukJLi9Sz?cdQHD|{G-7ApH&tgHjGI$jcRn7p&!W| zj@I#O>`6|dt{FdiwkNLa1iDg zN$@I;A55=U#3(weO*FwsYWgW}J|T68a9@0Ie%|9mxf0G`F86}wRZdY)3RBakO;&RY zW<3b4lOtOdKolE6&7*v6!_Zju5+L-2F!sY)MrGHi6=W@!G&_d{4{j-=^3F#sOy$(^ zd&1IuYfm6>m?rZ!>Thmi0ZWIS)I9#iB&IYqct6^5n0i7!r=^wti*ObRg%38ueYL2L zL|dS+5u5;AUeZJOC=N-4T6E!Uy1UFv*^Ab4Om(qSmOOsMh`Hfu@9}VVD0=E7_jBuY zx>Np)dpuY&?L|a@I!5Z-s=1>m8R$eejy7QwMmHQ`j1$(74L5Stc>Y4+&}|t*z}QMp zxwp>m;BGa3pbDu%t)O6967Dpza8~}6Fq!4c2w=B)DSTSU6p;X$%!fXRvYmR;78ZNI z@YD+4oRTJM9VRS5x_+k&M>5~mQhBpiP5K0e5DR;VS=dk5qY?7A56#d=Z_TojFWW#? z7)K*WjcP+@9*rp@IfriE&{+%L06lF@G4H6QZu-b%kMZ>vTEZ3So#!QcEIP+tq zY~%RuJ;k41cP;5H+>7M`q8&-Ll4`u&yq3KjIK0qEwK1(m9j|Ugf%o!4< zy1Pg~7;*$;P4Wi`?t{s`KrV%=ek_(-$*!iJgtw_Kn*HYaPoUBXiY^`o=om)f;r)8p zSd>y+XxV;$%(!h6s2G*BxmEgHOZui49Nmq%2j6nA3+mJ_u+5v%-0DBoh zz94sRvD}@7FLqX-bJgtSh=IFL)ydX-#^bZU&7K~_V%fO{vB-(z#Kve57!nU9FyK)d zqNboZO^QK;qCBMgy~@2rq>~ij@q+}mb5{yiX4SVGRHK*my_&re(3LiAvP_x(s_aw3 zO;BX5^4^gACXD2-fL9+k#2+8n%Zilyd3|K>7 zFi7O~j;)b-c2UzAdMpNhanV5{wNDHdJXT*^NXg5ib zvD=$0Og&Oh)Uq0ziMcXQ&V6x|s2Oaaf|(KMK1b#g^oNAWi2%4J_I0A;2JAOvqi~43 zWnM3v0f&y%kh`Uir&bTd__3>4;C_XEk#|a##Fgwh1U|@ct`&^X?H)Ld$7oXO>OxZk z)MA|)MA>JkR^!Ii!YR4&VIsI&?$f`n;-RA>SqJA=A8-4HSZm#7jD!QKi2T`SI2C`u z=>PUK0?vTThxSeK3KnxG-$wZ1hCO)X$o!Sq;&$XS0L1{1`JKKW)yHP)KU4E9my(PF zzj}`Gykk%fbtWCu4*3os$=lE9!Jw&G1Z{;OIvu6(D%nfiaVw}im}nG87~ti#?=_9Nw;VI<)s#MZaZLR zFi%_H`jWnTW`4KsCayq+b8s+&VVxJY!_b~FN#~3Y3Bk`OX73O{LS3FYzCibIDukz> zLlqPWk0G)=PTOs-K{s|7Y{JQTtXF=<8)HmM<6n4_yQW7NSMsHTXXd<+fv#Jcek$(7 zqwqvv^03?J^V2P2&wkzTSVJcQ$w7+h4I_;r5}gY^3I}_m<{~s;`VykHlY2FRG6}(2 z2%Q&&@Ixzg001QV5-a#X*_e+*>cwA`v-(|*kED!<1`JvN4a}!Maf;e7MjU|bhsUS* zF^(ZgQ4)K?l#mT`{*^99w0QDUeroLzS4-#Xgl{N4l+%)VlBh-mjuVP77UFUM@}!E> zfvB$u8mxFZ(sC-|cKhW6dfzPv?t})0yA9~SWh?3v@b?6&oE?dScULUjI=rY9v1kDF zk~%3r??ItK@AUS5YkxjD}xaS(;CdqrJy zc?g4VwdcrH)O|oR5*8gIjI&0}mwXgC(i&MFK0Nve_(MX4Sih=$C0r*);sVIyI?cW( zI6LW|bog-FUd6&1gYG94JXG(C$d6sJ6aXX_lC16RH~!p0{Tvtca+NKb^NSj68CMqM z3{wgzk$v$l7$HYO)R~`zly*9F!wG*nEZckd^*M0$l`D_P+O5`XU}h%ybPoq>59I*`KNBr^a%=n7 znA{>4jv>WFp0)CD(Xcq*>HZL~5g}lj4kfUp2!N>(6>CcZiI>RJxjZ2#KsQ^DZO!IL zt9Pf_szZS6y948IM_hn#?wd&-0+sZUg!^W^$&tsKzYZuT7va0k3j z&%- z1V;jZ63u^|e^lokSx}MyA)2n~KWZ5y^TvYL+Y775gv912uV7p5rr7pCp|YB-9a!S$ zuS=hbq65bi$9WXy%oq`6FmyINBBkZY3Ao&8B+ob`#p7wkgz==AEC+Hnce5sJT{)0i zC*b(_;2q5@R4$YSo*KA(?b(*hH=X(}_Yjg44`2PK9*!bK>`31!0Yst;2RGC>*QYmk z6~m`Jty&UW+(4Nz1I{TX5M57K=l(HN8J!UqGzKC+4>g>X-MtJ_6-$tS#hIhtm2lzw z0M&;e1V^skmVJ@sztEz(AmqGXgV7gAEluO6qYoMt9-l?5r2v67q1zGY%EBnR4IoYA zWal{}COWF1q>(?}^@U)^o9ht8hJW46yO?cl8YqCQQ-VvOZ;x@2VU$_%oj+qQaCkYp z@0Mjq4-WJ7xo{7&p4r%WCDrTb!WjYW|q>8b#$)e@(>cTp^r6c7}Rv#dmOec!K#ecb0$QX$!e8+{pVz@ zM?+%^80nc5vT{@EL#$aoM zeS9m3I}(^%rkx>FF}kyoPxBIyg%$GC->>t_%Z&~DWn;4gQVsrq|@NnIUW#_^wg z`*l%$Fim?0*Tmym104mLI`OG|<@fZ}Hfq2)dhTC_@RV#kIPnqT(LN)*%px8J*?;%d zUqM{8H6jeyU?>*p0SbN#l?SRZx7ply?QD&ZZwMV?j-vuN%S4cX5(!GPSA&NaiDSnw zY}_!bJm{GcGaON&M0#4N!|T;&feVOd4mD=;;pAR-M}a_e_D*zR$f+AIgfpNNg7h!B z-}e1xwy*g;3tm2ZZNH9Gd1My3{ZEl}0!Bic^6;&_IfuM+R8ls=vR*^&;1)btGsY0+ z%=;dPoHruiXxMOBSvk(?=j%)8eNn(qEzH#i#&2C3nmq3*jE09_mFKEYs?=U)jMY`l zCl>481PmZ2DgK`ky&T;LfzH5@a76CI&=R&&~)GVzf zi-yaBT9>{?plbQD8SRK~%MTy;PY2bZj-Op$GZAlzuYNdru_Pr-;=Js23`^R1vb%bs z7aW#e=m~bd52`n$C)wNP0{zwDgZu^%P}yz_H@Ip4d9%NW?GjOpid){sW;e>wC=0bH z)5VLDv}_GdFszdm&1bmwYgDKBUWTHDaD-c_%s)7!2MRDi^ ziWRl2`+A~bC-_Ru#zHO0(wZjemyfncE!v=qq?vPz2~4Rg4u=H59qSgVdM=2`V;KG_ za0c^-ap%CK;Z?=zh(7CYqMvT}vKg#ge?%i~1+o&}N8Ar=`qjhE?Bwo5!Q?~LdMP?S zaK)?tDmC?+HUAU4M?2~lguvJcvA{`3t(is{#%~LzH8ByOC}tL2IIJRkW)j4moHhi) z0qF2PIG|s1`YI(=jd1id6($RFa(&y9Yje%To6yy=BEvE3reASK)dLI7unP5Z7(Tg` zdout-?_gNtIDZDHN1#RGa-AfioXQj|M`M@C0Z|G_V91cxsp` z8f6kCgtxHsZV=!FJbAgn8cg~|Vkz*(^l^Ius=NWe1o^RGR^QPpro3$O<-i1$YPxg~ zB9B4SB&&Qvhijz|S871%DDj?`d+tnL0SnWl?YY2r+;YV1xk0J~S!Q6KzKyayP z*x#gQGv6v?zA_4N2H0sWH+-KlPi>+i`>fO)a@RzzD!l*A5Gl|3;LuFx1V?j2!(=T| z#Ze);j^Jti;z#k7Znv60dHW(OT!+vp0Fx98Xo93SKA-bXg)1I6P+KG}27Y9eIS{_h zXH5MKF|e}}vxfITiCIzJVa~eZZF+hX1SpT|_nc+qx&W0?B}$_hX2KlPB_T|Lu@DK6 zoKq%#h$CSz1K+x_A9WYBaPvrB((%OxbixKHeMg!-Rl+P7gmnqo+^T-FidCFLry?Nu z(NGmg)o^n(5l}S*j(^@e?Ds_b;76nj*5K*r-<+&vt

tXH6YnJ3{r_;=UpCNz!~ys^$RiGMB7h{rZnArhZD- zrbjy9r6i^>5PQILwZr@N8z`3hn}& zAe3oj6DBfJO0oGvSP@G@S2cYd^H^tJB7(6p+O(Qq(^hany{z^^9Zxve+lR(%LCWjx zu=d=ggm)!JuC)z`U=IV3ph%DDvZf=3*s_>*Vh-kd!^(vuia1>(hVWTI(XUc&l#G8Z zlQBz%DR)DC%EtiY_}pxBq}%YT8hc#aSy0A876|6|y;7TG|IgCgJ=%Y>1A(DBNp!1m zXK}TF38!u+U4lSX0S=Rn= zeFwbqY7}e86*Y|`$RQ16&8*3TJ6`~!BF!^wI3Rlo#DWF+uqKD8nJhX!#3wUIzqE|m zhS?kxTB6*hv|U4Ut(Dll65kioT0=Z2#O8A99LgsDVi#<;%r>m&xASJ))MrS-y(aH?Y?drPc01DBb%y_zK`FV2YP3Kdj16|e|-RE zfTyKzZRJ{9Tg=p+uaOaUim@9;t{AejafFCQ0>RxYXk_SZ8=0l%L8@+<_pN zWX#Ldp!`@)d~H#c(WAvY!kc|U23w(8YNHQE^1OIc z6L%HP6x9w6ef2W)wMT`fCg^lpWbblcu#dG75M0PAYG}@Kk$*Zk;EK%py z?Irmtp9Wf@Rv^V*$xR&sC)&w0s2dG-X0yPlon+k8`l+dqX$?|AJI4++Y11JMF<69l zUenFZY>xO`|=e+{PO3EjIFiDu;zT-YWk7<;FZq zcsbY~x9e@XD0bcClPB0KmRtS8l&;mMFIqXtT^*JAe-W+GO=hb7tv77ME&Y{<8iMxm zL~|5l36Ro2J3&2KkFWmT?%NVjA(qBWiE?Q>d2y`+2@T(Shh1x{l8pqOaVB2scEO$J z1{e^VIRnzTJFI15Ohm>{QXtp%8En?IOI<%YOzT_Z&|NKy-a(Gw(W}pLap3=4f9Yyt z4FW1{NdUaxqGYaZe{uXb725^%jhl+RsU2!lO?sjiqfzkmx<2lVH%^ zi*igGb>eXxE?+(y*LL=Sh;KG#EF-WtE*$GL~eHqKIiJIQ@<<}H4XZcHUp zYtVKq7pILjlRn|~e1CbuWw{9*F@i44rI>2)y*S7;`^DcWDf$=AKgax;AlLOGEm4{iBlHwF_c9}y_{KG3B_dYeaOxzY>UwZbM8@c(#q?j*yYqsyH9vVZdp`t`qe zt-`%qZXOxlNM^6nvYYIo**U5zw?POPRFBoY#J=14v$HmbxivKK?xigwiFrL{Rq>xI zv-A(@zv(C}R&)7&@Q=Q-lILXWMm|Mk37GF5)9|+tjC*B-()~U435>tMvT^V()t6 z8A$>h0^XOPNFbc2m+u>7p`ZERe-H9H^J$L)pH>hY34+l*WaN42fak~_Kk{4lfArg% z2jZ^ji#vYj53ZyyhaD;ZpCcFGWGUXCs$lkd&QCb6zJ2&--pr}(lM(43>dsF_%x)d4 z<5mCe^4~xEd&=B_w=BQG;0Jm959f@3IPo*1_j#an>OT*(@;X&UU)1~0^M5}4%3msY zP~-AX)1UX@z%l%f(v5DaC+;}1TRoVc9?yB@eolRy0ADgCe(MJ=3%p|bcq>74-P(V{ z02S~&b{6uTmIHVp2wg@WiF&^FN)rb@r~>|o{gF);Q409>4&OcG@q?>k{NuS!noD(P z3TgJBt$e4$`8p7SbFpV?Pjj;R=nFTst>)aObx^{R`8F-m+OfXg$CWDJqg*v$ZB77f z7c0uu8TA+mNdWW@_JeeC=Dw)hC0phXm(f4h+A;@9zd!!0eWM-vGuW}ej+p%;>vEm$ z`kwqw+5JP=v%OO7_MG=q$}N|xG1G+?GAl2sMpw1ZhI5Y;csoD8P)HWMjwS10YADT1 zY#AyKynu1jzrO}18(|83fNmX5UOICL=cFvibx&3NP_@LnI_~N^Q%f3Ze(x!56ji&M?Vo-<=wX0Jylve zeKmUiaN-wVT`kt^MFp>KTG67_>61J>WG_>f7R;KTwR@f+gRcpjtxfGO+qaXq9Zh6e zX)*DEKAp;!U2{XY<@%keA8?~PyA?qrdoRl& zHuV0`+)ZZddX4jP07Kf5+dOqRE9sv)h4T4q%I7Do@3CEIHIFRu-r)IF*`ufOtq#=6 zIp$%rN>MrM5(iF6XY}b&$|w5VRLMf(bKPdcR6K&^?T@^sV+FXpJ`~8n>Ajv8xG(46 zhZTbt+~9r8ZrNWM-YZ;9MaWkKPLjBrwg z5M=A+_e9s7!?=7eC!TMErx2IL@jrz~;I9#Oby06RYj|Rqy06KvNpI_Ebp29z!W7$xml*i?ycHAr0aK>2b@zM8vxQ{GU4OQGZ&5D$OtaiyN*lCg9pl#wR8TP_CStO!XXKs4gj0#%T#jG0+Psf#8`{R_%3bJaxH(NppOJLD52<9OM_mML`hp1CSgIysx((JptZE8 zm3mm27G2d0`P?+Q7lzs`HsO9 z4jWq<=+FR9jYHNS$>LZ{{N%1m1kSA#{E1}%?BoN3On24HLOBd*ADdki&Zu7L!bRs? zxe3pbZbr|RGsljHRSF1D@X+t^+~|%ypJ#Jlt;&RP#PPysi`7^Ecm}b+ZWf0jK zxHL`F%Q&o3OVUt_$6Su9njRKWgI*|4M@x{q2istCUzs;`rrvxG4m`Ng#a)@OU(HoZ z#z=Cnc2pA=Wy}tV-tKr`IzAH1Tjz%64nB%b446|r)DiPUwJT{^_D_|C(=Yp;zT&YL z-ooyjTyrmkx`ELiaYsPjKJy6s)jjwNcY3)IMR@Vc$*t2$()cCb&t>l%JBvX@jRq?Lh)ez{yK|LuTlx|BnBQJO;BIBx+i*xT+k6{V&D@ZGVD-)Q8qII6w%q0raa z7!~Ji&(^fYAr1t1hDMoqE1rS=^x+YjmzRs4(O6T)gcqLiEKJDq0w4+PzhC<&0As|? zNDtYU)CD)z!^cVrI0r*{w2Q(efjzDKZ>0+ojx=am*gW`w#9=NjL#5B0JBNzIISH3+ zg$?V*R^F^zXKu=L=#4UcJVK`m==!YL;<$2UiBnSQ(e}P2+R)fACxOa!+x=3TP2{E! z{dqN-Ma%{erEpiBRxr5s-ddoENZmUdeNqwIk&e0q;4=_{_b&U`#Z0ozUSqRSwDmKJ zvEtrsbnw*|y|$d$<^`R44h|N#rDLC=H9Yzd_}KtJXU;H+t=b@%1j+fWF%qB55HWBN zo)DkIUVDT0qwitBz-RFi7PNkq3`Ve+u*^oGS|bJTYd0$!2_gmkpK zF8^~dIs_EyJP_(h$_Ke+qrFi$JUc|t+F2-09J1=}w8S6_l2r5R9XoUu&LsHE#d^U6 zWDd(CMiipAJsCo(k4eS(i8Zd|G({~^FiouEp_6<$CVCOM4sth6RsTC7R>OxM1zg~C z^#u7EHQ_U!Fha@s04P|X$w3;DOZiyHh8YEM7PJ6l8EEAxKayk)7Q{?xCg`vvvGJV? zUq*WZgYVhH^qAMI`5I^(s7U0=`)N?DtXST|*d?vvgfp2fuo4-95U=@*E>r#b>+h{G z!nEoO=_G$ct$1oXz{#sY zQL4Qd7$Gt6b5>X1lp143Yy&CHs;&Ry;VyADygYEi{K!I9!!?-d;HuDsouZB?0caxK zt&c0{o(SoLGlUaSf?3dz$)Q+m1(+Y{JITUaOlZ_Qxr7YF z7ut(i=jmCAz2TXgzMu`Z&Oyc#DgJrfrOZ5i`hG{2hVx5d^d}fk)F@;xvT7jD$6< zCflWp3iVm{uAlmY6gS1*+sdz0kVkFdqyOiNS33-E$;t?fVrDmByGRdn$ae*5Q> zR#JTZdM{%z@e(~m!`j#jlnpll3BoqoAn;NYJ>D!`?`XyxZc8T3Yb6~#JtKj|$eQC^ z8426n0AB6cu%JduWm72XZ5-&D3P(D1AYxC)hz-VrVi4RGXB@r}F09IeGeVd>D>Z2O zYXbOj1qa! zAyeG@ok#zxSi{?xq%ABKy-5Bks*}aKgGF=ldhEJZK4+AZz%yYRj0o*@U06v1zSpva4d;*P!gBLw;88Y@mFa#|5P;mn)p!BsHCixxxVdyTmlE~db{kLvi?N}T5# z=+c2Q8!LwX4M(`CyYm-sr3yRa8j0uuiIgj+bS)Ix7H?E4Xi7w4lQYYL3q-xL45dPm zs|uUZ*F#H<3Y!ZCI$mN`(KJT4jiW#(y+Y~^8SNwFR3 z&`dzL6Fq+~bXp{z+c80gkRs;xDndxn+|2&MVLzZNAmc^F{v$)pCj*h(bd(zg=Zl*Z~DjQ=M;9<|w!;R`@H0&Zocm zpHNv$CX?UTi*CA+=ysD<%C_dGq_=gP0VP}KHcd?!QR{NQy02mZ^ZZVAbLD4V_S29v z7N1{JS%}ry54cTvR6q~cPwUo)BiC8bB-X=?|5tum&VToF*VXFj-W~xgMjj7^_V}PU zM$e}EHLl5#pYvgLJU-w749lm3S%edfBj8AZ%M(ZCP$YW0R1JW-EN>*NY)!(+IiVe! zy}LP$4-Ai-k+AbIPsNRj1&ladVxnx^hqj1f3&+L2#0tf6IUVPCU?}u(Y%5}=_ouoq z`Tve>FUX;%0!wd))6&Ed%x#Ljq}EvfV+GyHQI7{ZJQ@N;He;+-2mn26@9k}d^g{=l zv^L~LT=RU-lW<=M?Oq2Hj%7vbS;=*L(YVxMz*tt0y(#X(|7KesC>++54xexrDQ3b5 zX!*YnZA`e_PVHo8E=M4Q<@3d6g{PLjnAsVtGI776BG6=j#_}l=J-0cB1VQwN=rnhH z)LUiZ@qq*llbw|X#rSc|l<7f0*4gmn_Pmu0(R5oedY(^9L;>^%f?ke$S}+|r{-tPo z`PJ>NS)@QvPuuNj2)dwB>wV4&0epU-7qIq{leiu_{!%7OOKPVot;Eh=JPocNq-Dk7 z|0M51{p8ZIx3y}bG8|rx3LFn_*4Qvw*kDyg#oLp?*S~a7R;IZwZ#`ce*6YlaH^;$S ztJ9KGe~sYzjGIqgKAB8zAB!|m%!?_B&Cex5c3^$463kqbI3 z7d;aR^zKt4c_q~HetU`#!jJ8|PK_PNMx9?B0ski(PODA<_<$97vc&7^x=4~eTIAKF zGzA6FDl4K|MID^nu{e-}vBdI&;{*SLYzCC=qQ5>c$pd51NIY4`sQ}zILq^+3uyM&# zQVWnqH@1bG$et#jb6aq929R<@ zPg_cdJ`~6fKIHujjF;B3G5*46k-P599jcz}z*0qA83%5Gc>^7cD)T;wNqQ*-kLZoZ zwl|b=P&)kjusY%{zRKR{BU@9(nmn(tIQOWY?CD|_cMS)NvA~J-Q0R@cNp1s|{0v}^-2fZ=4cShvn(cEs>WcZcq$18icf6hH-WnQ=R7e}uX2YtIl41q88vz)nJ2 z1hG7~$^j|}hX?KqIrV{~vn%dXVswb*gCZB^a`es&2jieUZCYkpT!Y6S*e`2m&kx5( zq?et%d8G~Uh=p}n-}*52ur3OEX~1t-L>fyqkeLR(yxDVwE8>)j_*P|3097O+%f?qR zYnj@7GMUUF?#H_aV?g%iYGT+&1jqsmc7z(j9Ik2&4lg4jdpn~^#Mu~vfJu*7>ak8n z$caXe9SJEQ*}9)9w-C?-H*V?T+ z9iweK#TZE_BiB0iUW>F;guzj#ABORZu(`9;0yu1X@fv%x)G4%b+76oNXh?)(o6grJgOi8(_S25kip^W!fkzG&=l%( zbnKW2HlUM66DU4N1g``Xj~#AcCs&|{@9#{j0LyWxH5`k^?2tw?1EG#|!*L{j-;FWs zq2C)^0I_AUP|DF_^1;=X^i? zxr1P{0hzDv*L3dJ3g-g7EdXxAB3h#x!x8qE?29(F`JYu&OeT{rrW=xD*hL2}NzX`# zcI(4RTfpMXPU_*;v8M7yFH`{GZYgsM`H&=6XTl?r)??%ikCmJBjyr`NUb+O`3169Lkmru45w0pE| zedB^Imgs^R$Axlfh|{N6C8@Qo&`Klh?E*_Vl@*;$6C)&<_mzDx+2797FH5LoYaa)=|CmhG-R*J%2 zUBJPpO!a)wBDYRSOrsEn5a~h{olW=B&BRJ0v%`_}hi>8#E^h@uj-{DA@8fhAujR>3 zU-)5IBA&Cpww1k~o)K0+=K^-yEFBQY?x;)4>CG03`?DT^Hbb56{|F#K2!$R-@VqK_ zFL0_uXTWqd`EUS7lRF=LEXefkJVhYcw?)&rFi^ZQZ!$P$r1lf3pam|VcaKBOUVijF(UW#UrqS&1gr^Z&0HF#mZteWU@1k#I=GC z+uLyS$#5^%Bu}QKMFIP_@HQX6h=vN)0x?wJ*c<9Kt=(uP)C1Z~=UU(d7it)}@RzJ}>>=Kr3;CyDG8@ zb=m?4`2i^ugvFK?R3Q-f0p$PVz4aZ5E~8cjP#34;U~o9%Ayj>&X4?*Btvl#G{U}47Zp27jnoNUg-vG;UU7Yo^%D9k65TU^5JYBa~lYx z3@MN7nJi{TTXPK{12(E@b*>5hQ!P$1$MPePylG z$q2Ut?1g!d%w$B6rB=;u>kGT7tg58bf6U!*U^u|_1hTwmev_(vt}(dcc+^3?&m9xU zruw~Fe3P}0-fOu~Vr5Z{`Aee)73OB$x?sN=s6e;~`?~d&VPG;YJ%V((rX}&>YUraM zqL28_8}oMrSOs}OaD*;@D7zT@=ga*-N$nxQ>il8X;+*M|F;PTm$LNXiX|ALuWT7#C zoM?>;@Ua;~m;H1r>C(^um_ff__Y&k#zl9z1)24=DHK7fS**{KmKP4Z2&LDWhmNUlr z*~WfNIRqn901wF-@G{qPkgi)uSg(MIOtw zk9ieI%lZjlei$#GDLo9us^0xE<7n-3#|kFti6&7g_p6Bd#7bVTOMy2c9&qV`ZLDCB z3QVqy6-=N{K*u+b5Cvi{QC8R>jEVdBzkB^YUbqFfiYGl>BOZl@A$mn8l>(Sq}t#_OJ29*owTGihhu#RpZd5n zq9iK-b~b>^37ao(<%BGw_GOG&{9I%Vc1zfHqPSbxs{wE$#$*<=evv&Z`K-t#=#U(+ zFy$g!G2YyBGB8p^Zb(u-&JCjpHHHxOA^V0fGxp{^SZ35pr*FK##eseEM_ZP?`HPSK z?}WVv;uVX<6Jl=H({4Uuhe-gYH}oKo(Z~lc{fY{86K@4bi4Lq55T1Uu6{j@L^LFZ1 zfaoQT!StwTl7~}aA#MZ?hd+Z-V!&epqh3if+i(8rMeH{$ZK{`<&F-ARLMSn2gTVmRwa@mo=YJOE2Pm>_Jm zIRf3Fe2(i%$en67(81;s)U2kLx;24M>jKd!L z_f^owaHv^0L#E)<5m`RtpbNue0>ScHzB6lLLlY`!z%NlT3gQ+tdZi=XWsJ!lpbMh| z`UP}V$C%glR_(Mbixu4R*)A$^Mo>{kWmx+-F+TPb%Al9HH~X!`nywmY~Z%LH5y!Nl;Pf%=J7b%a~Xmqen>83VRfSn%eIc6YlKHmazlVP_}WAGe7*|3;O0$ zC+7+P8fmP3D4u_bjd!BD%h%HvbkbU>0K-RqY1mXHRFoSbv=cD)PR|B(0=c_B+#4rz z%V+-<6Cj+1&9ulN8r7dr$Z=it%w{n~N!W2BQ=PgH@JN>cbrs6= zjpcGd-H>9q6AiWY!FhtzoTV%{?&^eD$%QweTw{*jxAvAT*tm}^#L&_Hd3cgZ0-)Ql z3`!I_U`a{Q&GF9Px#^Al-fH>jav;fKf^00oWcmjJUa(O_HKwMBUC2Gh3}$Ba<$1?G zahfl|%3*a~u!HWa#QF}BDKyoZ1wdzjy`3P`5y%8@kpUVz`-WQEvxe*|2HuCqt9I-f_Xc1ytWq-mJXFQ4eU04Ad?l zQ^-KwJ!6gDke#nH=1J6EFM!4eHWv8frk}hq0#A=~JNsyCIsNz$8%7KCeW7&5QmQvf zpdfNz;_R9Tr51QY12VjGH4zkmki7|h2cudQ{^v^-``Ap{9bkhYDEZ~)@YCj!3n;-J z!hEe=VMph8e{_yXV6MIdemv4?&S{~svXYGI{lAJ_{cnQQMZyvM$$Qd*S4TA;2jG8@s zFQ&VKS=yz>Y+a(+<<9c0fsUrH#?D(+!hvp<2hMegRv@jUbCAoaau4cIw|;i4=)jT#>aDW-)Km9*LE8#3hZ-6vUN8kIH92tdmJ3T!aF;j-Q0)SG!_nuk zr`|dhfYs0jR7lo1t=T8?`G83Grin73|bX;c&WiJXOUQIR}IxV-ft zB3cjS7H>Y}Aejip-W-LowluPAUFL7_`nRb?0BL&9ggC1|1lQ_LT!mZ z$1P~DL{l4HC1Cv9rilxSZ$V~!!yoFRQ^6!AkU6N)=cnHMIiBM= zp5wo^&}D$JubezSp5oYsEWyU?bHNKpjg_yQD~%& zi1(Ly!|V*OXgFNZr>>}zo=hMvS4HN)YP39jV>|YGUF$?6O|t+AGUQm2qaMpavB*fi z8EY~E#)Tju-KU+dp5tZj4S`r;!^O33L15hi=K-O%;6MZjp-Kdsrk5y2v#KWvzwU1%f&Mt zYzQ+_~fE-J0ErdCFH_=)Q*V;A>l&{ah)05anlQc9mzh%j&7Du5eH&n zDq`1tfDDdgX5WYAwsSL1;ClqEyi&Ms&EF96j@6)gI@=ogFhS_(5$xT6OrndOb2tgI zB$WdOGX-(IW;eF1@G_exC)U!_oj%kp-f+`Ju;k%@tZvD?Sfz}6xAgrURh27qsqtr4 zyXe)Z6cKi;*W=f_bOJGndGcPJCVc7-I`w2FBL6-q3LGF@+w^u8!l?4KV>xR-HqMwG z625@utRpyL7$X$V4(ny&AGA4tV)K7$H1VX)pW`|H0(`D4b6|uZCk zI#x^EBrGF1u?;gangTC1m2!m8M@Y`Nv$9rdfe6P2s!ym+sgWyN9~|kBx`(Z}YNVk3Wjvjt{aloj-35_uwL3`fe;lKeMf1|B24tr@!^aVj)5Z1&YH8;&`$KR)1 zPYftB7^u;)i4hF$Xi;-jw7x-FUw3xQ0COd}!i)JO8|M1+@(8@md^3d|8#Z777gy@& zf;6z);UnkHBuFq2ZaX$$+Pa)Tgu-gD-RzY#c7ubifURsBG{Tz9U8yAGHkzj%*PDcrF<&uv$TYR*tK|*9ejIzF+)zEt?1ShFLzt5?lfC{%ozRP13 z4PL)!Z83Bt9|0LLAhy^&5M>g8 zN7wh576Q#C7*s~si%P7xK=x3+IG4Qtsb!o82Reqx-G`0Yi@(#$#Jo&Z&zL1J)lG53 zJP^&FxZTd~PNgPQ`5~VWaLNiY`dgh zEg+{Aa?jwA{a*m|0CtVWNNNnC&|(VQPa`wD-#6DVZ4K<9Vi6Js*qqQxfG7MtB`cF$ zgaK*xi_inxVvPU9vd>&lXTkZyOf`TqQV%BBb&H9kYvvqyVCu i@}`$l%x2GKVy zh~ohc{w)pu2ghWH2gY{CZ*L>q(rTznw1C!9|9_@f-`_hA*jr+iV_Y~0deV$B@85SO zn6*muH3eK~{LM!7L^TBND?-diO_WQC zPKzYXmQhht&CC7sYkR#_qzfY;rmM4oe()+3LQW4A+?yFBdho5kK6}4lKbJbE9m9wV zVaA{4(4W}+Nt-{%b3Dgy@6%G}zN6T&h;5iHEscoyi78&XdZIo@OJJAua`my+q__0Fl-n5aK!*FO&$0D4j>?eF|pNj(uSLJ z=w*)XH0en|Usf48PDsnc5<;nRIa(LKRd}rk+E=h!9)(#=Q^wTzI$4Ecr+T0Rq~HzQ z001BWNkl8VR?$-gWYY}|j8tDz#OmHT582l>e^nquKS z$>_$s2D(t0C}v<0@DGCDEez1tp2S?xJ+#*!K@|gBG~nF-K3q#0#@G$)7`8e9 z2GS3#;1ABZdGPG#lu0i8q4K@=@7j{g&=(93rL+1xy{zzA$@bNIw>&=~jASgkm}^(W zjmpcdx6aMF2;TTNt~M96kRFe)@9HMLK!u_}C?-@!pB~s-s`etf3j-G<$dHS{uHbH8 z@D3*2XV+-1Vk7waBa$FiZ=0~~E%28H+b1@Ej^}ue=lBQ48-WKP;w5KXgjtAy6t;a} z0~ihG%k}G6d=1dm+GZ0ASj#}M2%fG=fv=Hl1G)rrAO)$xjO!*c@rA3v2eot%>?XyZ!Rt~J5)f@=;kpTM^2M?2W(gPzJWMi>S2ETEH0Am;v!RwT( z111}nXeimgL7VH9Lyj?KAAW)HA3$Z<4nBN79~8o|!_E{tX4gQC7EwZ0LgT&q$xQ7|2W(nE*&s0z%|-+D*$L^x`Pic-3ehXOgpARX zMj3OvCq{}A^q~MDDTHVjcdh+0Y~F{y0CfZ@>N1+j)-MY|I*9H^$$}(E4lu-&Fi=F0 zB__+AYX9(wWHUO+d~J|o5nbrzWZ#@$eB4M42Q@1Kl9~ak!B6MwCOpR5=0kg%*d`_c zArs|V;RAf|Atg}xf0`m!zb%@r(tWyp8Qyv-v!WA7`;aRR@O*D7aq~{Sj)wg|v$%NAo#zsz+yg1^M_B}yBwI6GY5_Wh4ifrOC>RI9LU%f)i<+qszU_wpAbnO$^m!71D~W=vdw zuhSA&>bw(ebH!TCI$<6H;r8R|@tP>W2768G=0LxYlj;ZBlA|;wm4-SyV}gSXlA}_D zZ(sklXc6W>N3-^_g{jksAmYBlgCKJn!Lmk21D6+~QAOiu5}4_S75vp!@&ufT9Y@}f z7@B(?+%E#aWYBBMfFLe_@jC9eGOpiOp;aJOO5;+3MzMHG}16{>`EQ?^mz5;{P-CMr>V@CP(La+Nid~h>{dn))$A1& zh0i%}A5M5Pegt#m!odibU(yvM4Gc2$U=T88OsfCvg7n1ZPh9>S&+#1h5a0SI>Ez!_ z6C2RvYEsKa7q%}!=ylCa2P$L-N(1RsI@|EZsZ{&BC)A!BQxl1lM)T-1<;n3Q3*X<` z5W^nzbA#9ztpSmesHtZ9t1GJpNri;QT#&IkH3oE+|IEEsHYkL{MiDRjFbldsc$k&` zD^4JI#*RxeHy}Gfyo^@AaMc|TFNs0#a)UDF<_x1YR1W9)ccC&Lqm{i!T5QO&Q{c($ zc|sCCB%L$ZrvVddLjuuuM%CesNb808hNR?p+EOQMc5l6{K=_Sc1d-<>tid*6zSedI z2Z|io$kTo1{`x;=mqYP_F~b7U;lN&(@>bt1+zP0KY!+Tv2m}*M^5D9cCwK`(UMi?e zyhi}W_z^)_kuX;>)cvQg4 zw&Ovo-C@J>R&)?$=`7PP7dXk_io~;Am9Nw7Xk_`aD8#laV1vwyxB4zubfhxMvibz5 zb!Gz<7E^g-Y5mK^2~#iFP(5GvGAN5UpQQ6RN}orUq^>GxwdU2&p-FTzFF$MSzudOR z$H{gGf~K&aE-+L(3zzD_sRzioE93=GFGF>r_d+#+IkwlRa{d;lb_f8H&FJ!&ZI6*O zQQzq#SeyPp*MO``Ve2Uq2IYQq{aN6&pb$7sjT`LgnTl{Q>xu2@X+=7vs z8}?1e>C1XpnOiP+>0r#r5?)_kZeY!Dj&}?#L6_6a=xs6{*@&*M5vhWN`yV|rW<(Em zq3`cG?pv2alj*`FqDEatm^*=JtGrsxqjR)R7mzKO_~Y;O<(pH9&M!T3{+~ZlU48n` z2+p6p`9Cj?_}Axcc#h}zD@Q(jC#RcxITz-I%*CQxAqMtuAJD5kjm!8vC&=66c3C!I z`=2-rwvywQrZFpHx;1dpETEV0T`^O-Jl4)MTmk`YwSk8t0^@2kAs5n$~PP^h- z7JpUvQ`@*9T@W(0?KG5vn90yz0t{a9q=yh2#j`q4BjUd>;rs@3WppqR64g+Qiip|= z`Nx;xDj(4U3YCamxR7R2B-xsIpgJbY$O$3}DM0E{7;G8n-u@6t@f!{}#|{AlvD@>- z#R;+owLRXN|9Fk5tdzuXUoXNKG{5xC;iZn#@X+%9=~##)29E?!zj|ULG~Fj?k6EW8 zX5BQY{|#9Q3W`cQ#s$fV8iAY#Mex#T2Pi;=%yCKWaK={p0%7@sK_{^~P`yK@xUt%K zI0f^*snc6>%8zqHU@|HX+6mw2_M$hKUdD2; z{BpK)YqHJ=a~d_6A1^n4E+EE-ZHE9kW%GQwn5+!eP`>_Xm=s<6n4Q`20)wfHKPK@W@N~PrgT;~X)nHD6a-G-rA7U7Qd4YcJ+-pPS8^WL`Hh+%ic#h}z z?x2_R7eYviUQJcbkuV%6{-$$n-k3}Y9qa~(i@=>|!DN32_^s{hTD~7){zJJsOsgl) zUt0nkc5QDVNgcp4IC3T`fK_sZAmfLg8i)_oy^=6LWmFnW@XgLvAO#SS$;pZ8-Vw6Z zqa!uvw;HKdJdKQy!X#Uu6(CbvGB9&Tkl@48d2K2b2rn?y=rUq0wec15-n}fN0IjT; zI1OV0mDM6ZEr+>B3p>JeC0Je}7XUl#(U=>c{phi~dp{Z`+LfzEwCpI!d71P;p!dhY z>~h6Aqm~b_Sc46BY~IJIK`hGX&I56 z(CyC|?Rg`g-^5e|E+FB6Oqs~31Maup3S-%*=-5|ca(L!9v;v=M8Pw;N3(2u@1ZO?u z!$I_NcoORPCAkAP&8**hO#nvxAA>+J2EDxd?%8(ode;ODRMM>;K#fzS?{~2r8Bj+6 z!3hDo)h=xMAL5x*?#%O#f;9pPQ(k5r99Tz!i$XFvbnsq^P}j_3F*-@dqdbbvY1BZaO>Y&$rwqStTiS6W1^ z_NaZc5RWip{&-z@lJ_;CpN^w2O^o!wVZWX2Vy=+suQ;+5K&>g0;cB|;3fMIPrjd$K zwU*zNH~N6WN?3lQk~F2p%Q^hu7W0KJrmGn&%eV3R!>D+$39B)jZ`|N6%n_L&1X{)j zdg`#1tPVJ@x$L(HvRch>D;xFwnKBHN7-Rb7{AJ-(Xr|u`=EwO3Gn~>K^JCxy!Db9&0t~TNB6U)%n=}O$%zC3hpv9BDjpod;Alh^+y_G<~|VYf`CiG z`Q@>yr9zE(0@tT;WP$6Wv-irKBr<}P7zh)90 zf7Qh(MjzAn%a76eVeN&z+!@mfzEy`K(CYwG&6;m@z@vAAEh*SHZ<+II4E5!XT;@Pm z%aF&c_P6+K25eNw(WWNq&#ygy>TmyF)IS@v#mD*n&wl<-T>c!-@xPZjuhz^KjLzuk zrYqV9^2oO9(`VK}hbUmf@&~YM>XiX2Bi}S=;o^6fpF%k}Bl}2QhRc`@@}n*LeK0$K zP?#pr3eUkTY|9n}+PYJ{f4QxJ0P?G7PHb_)f6(eMYiTPrj}{KVl9h9E;epQbyx-Q{Pp8W@SfF49yup+k0}se zY(gv;txy?`L622`iEbuh^o2tvx}dF zIL9hzDAiVVp>A|t4_HueO7jq8ESDz39qa{|qy^`e=Z4E73I+??+wu20_rMa!-|-Tu z4tD)5L=p4d{C(H9&$^72mGq$DVsr4}V?<9>n;Z7wae3g2&R>A=AUa6YP>$H)oA-G0 zyj>w=VzkNceiz>wtGlWJu-+}L9`XxUQp5y=Dbn8{rYkV1$$eq~hS=Z8Rw}2$7W^{N z-(|B&7vNx;Y6gFC_Rwh7KilH^@hkApe*Vw#9MAFBmcqY4pCE>dI{|60P1JaDTC)N-#f&M!)#rgMl|wZie{**F{~x3@Yy@^gx|Ts?ty+Lr#&M z7g{@aK&jXhCv*nBGIb0lFy2+Pggc;sg!;4w-}}U&DN>HWq1o6v$V)FKF<-K8OQf1I z;SCLJF`d}u<-yp(-17i6)RdHh5g!pl;6z6ENIDbw+LPNP2>UR6gT+*%8Ow)E;|p-! z0o$083_*0T1L9f-+?*iJrOO1!>>0Sj+w6q!|G11k#tXBma>#n?`X3*~*iL7Ka;~qxP zd-sO@mN?mJkhrks8@4A^6M{vjw#g9IP0tOk7UCt)ekas6(EV=4dUMtkL9`=dc^9sE zx*65RL^JYz8TQ;oe}vK+iNd``Yub(zlaq_htrXg7e#u zncz}9o?2%9wB-EHXGKoAdwpU;g!pqn_hA{z{m>;MUk%LC|A_b8Ob+ z6pn<(ZyrjSVA4#4>E${6@23zGUJ-_?4eb%x97KtdeFbKYeVCeKj>gMwk1KbG2gfqt2|Ls2vzxNCMQjgf9u{ON|!zg5c&kpYGvC2uux&}N!So(->MmQ#UCU$ zUGQO!f2XQamq#n;0`uOw8N_9dj^ziMh(YeZ__)mB3cE)|*KU|wO0HSfFOD^GR9)Ds znR!q8Bq7*;n?eKz2zCk3-1?h1i@-3#u4Z9&AU5|6aLgUy{^uPL{~S`~x2lF;+BEX}A3of6za^mNuUJ{r zzxVw9v!DNSJjZkV?;_9vaF+Ly6?4QyVFnAk@cQ-qvez+*Q$)E!#oI6P?|v$djIAG{LjlzsSXKlWE&J)giF7jGXHy9$&BiuE9uG^AOec=3Mihy=i9 z$yQ@-#@#�VuRyQgG{yBn_CRjh%AUB{xgX)puKbr}Yg}jgID&-Pp3@h9jpTSXv_a zj}5zJ?1TL}c8*~fuXifWoYFv0JL}`mPhe)?0XQ!(IxJB>$VI3K@Qy*BIPVFuqx6=K zl8&*LiCNRXSjT++y(d8LW=s|&q+kZ&eJ~kQ(5KgE0%_T11N9a=`sEkEGF$1gkQJ8v zz2N~k>S6rd1?S6rW)enqo!15#f^YEI8DK=5+Q6d?jNivLVb#pPi5dMDYg)dG;CUmh znZNyWtu(b#_xkb5mgG-g8N+&%#2xSW-W(sJJ+L`k*@A&>_ zTI>M2f(;_TxzhyB8!Y7LoqLS-TLlVetJy?3g}so!SMBV@0p;;=R%ab*&eP7ePzGlU z7r`C8QU`nch4KKz7H>SdqXNLj032+ZlF5-nrLB@|clzV4FHAqu?{C~u6yZHhoBc`gpGK&eKrimJFC`<@5 zcEd4-`4x*309$DXLu>${3@C70=_d5JkrRd>*T{%x@K#Smv@#d)d9)hQ^ync{J~iGT zg;oHU-JQw%Y$MgQUt*kCc=O>T3KPhsOc(BpnJrKLF>UUukINy*Bm&It(>*fP#myf? zC)yPh%Bk8z#MyGw%)MGn zy2(kfWXq##FRPJ^}z-kel_Tf-Yo{zMwH@i*)~0*@c?*zgfurJubcg1%DST<HcL+s2k)y(4(1c5$y!pGb!j*(~W!IV%8o&!l=u#UCbNn8aOr^UuRje z_+o``s`7wlwlduy#K*ht>*q)mi|7eCE7d7Ov375K=%5H_H=CL0(#S8A5~ITx{NaNY zfNe6~m@V%V@CqOYLsAc>m}@!yvkA;=4+IR4hBY<1ANbWP8u%%kphGzIUiv_I9YKcg zm5Wf=`5^Gf7QluM?@9J(^2X^$DfHOj|Ml?(=Y$r@$Gpz6?zzF46{d*#+wa{G1sUU! zff6C-M^KmOkcS_FS}iT0*khwYUrFGb&X#5#t#<%XrzN_ZCbl)56affT5kd7ka`)!ejREi zGZqSh^Pso()6F1Sv4Ywq{QrVC|L$*A2OFBe^nY>J{vs!C_jfhiEHJ@Ki)V%u<>3LYi@K9^&YkW$ep)iiH5CO;#`%fdD7QCODG!bl1o(qxAS`8$jx50KAQLheLMFjGYh8fQD~*q2#c0RrM2hqpX|!{+ z0ZKQ}V1P-9Ovo6U7PA-t%1oiMoYN1`@Bo}=Y$zoV2eg#%6mYRr7fzT03%LtRrFn|s z#*Z>WHVn{2xdHF0xsVfY&@hb#Nt0*U(59tOSk91vtllT49^2Ot&_2-B$!XT`nh*q( zJ%urM7>_+EO27yh5X`#^MVuJ$>JxkvPZ+fPb-+QQ{))l0)Ugz2k`1r!)fSh64pG)^;9<_r)C7EkKfro$5{g=1kV1!&dex6_- z0{urXJrkz_9hOXUbv1wuU^!;~j@VX-ic|uP8 z-+G%1s&F-~tjb}2-zo4f#O8OO$MMEpP&6O}L;v*<+UOO(_uuaPpSb)vp5w2j%ME+{ zLHiIfvL!}Zq-!ubK!?eJwC}vc3x#bIJF$$ zC=7M+&5&+&6o!ZxKG-u``F_j&gOal0)Tbd|`I(&~7XXF1G`P24cmm9vAW)5211;!B zU000WF7O0&86+;bcbp&=F^H|Ly++V=*s<^+tMu zI3cyEF@w`B4M8FKf7yGREJudMMeM$H8q97?5e2z3^xxF;EW)b zKr4wx?hZcR(+Jvq-U=+G1Se>AfHCvr;;aNcpqx$9c2YPv?7jzfmZlxVc_NUb_O=AG zZusOaV8EJ25(1doXU&cBoY9qzQ#b;V))KfED8yuup=;ZhOJj8aCmf%r>&oq(9OYWK zzDxi*7_zrfy6F5ApvCQC^U9RZhqkg@ZfFTJh_)uc0*U6pLB!*9nIs9B-3lv9?2%*e z^M$c@nRid-Y^!@FjHJoT=xb@)6~-Q$?yZ9PET~_X-Bfmi8t^i8&XdfsA1)M3^cKE+ zb6gb8m2!a{U!7eH`e(pC+YMK6ohhcVG1Yv&ZwCkweb zKLK+~rZb$k2P)8mp3FP|y**X3SQrj}U`(*={_G&7FLcGwD3&_be&fG7Uz8jZstU)< zs;A$&IEmqD0R1d-T1!Op@UZn5T>3 zjFB+#A_{B8CYI#b_ml|EnA>5ADin~DAjTf^G<$Yw>pqdBk%=ifWnpL?{^=~v&Or1J zVIh-m77b!JFwG$)fhvl%c?=P|nv+bj;r`fm7w3k*`nP{t9(Xo+1Y zEgak7ZdB5k28y`fix+yyY<64RlD5|=ewdnC8p7vGv$p^sJSOSloyQ9mnbLS@lwBta zBo1w)DcTx>9Emih%^x0XO&+sIZhbfmGGUi=gqmH5b^=qtsi$(J4 z{2i8dzmMHG!qsUTXZNu(*x=AC(`1@1^YSh8?&5?v3a*RsJ4E!B*4uSTKf(}gloMY6 zDbACxDtCFdqvr{4NsZOz5fR>t3aT0H$IIqFMK!d_rE zZ?ArMy#eFbKKyn*NZKy zAq`=eTWXq_?AYCRo^CpD*!_N%c_jMw^TnY!oD}11>59eQavVp|6b46x;Z{jO%tNFO zvjq{y>=wju2*zy7;$+?AmmYX9&I?n}PU*0LmZo_(Y^C1Y?I%ue`z5h>!#rJhL5Suf zeTi>ByXHwU4mpC}Ol*Lq9qlCi05pg>t(N&-O8o4?iG(rf!&mntbx0U`gTZe(dyg%1 zS2~IPC_jI^Afu&CM|;usH$&{k#*Z8mdV8!A2}5vOnum*`L%ZXdT|Vqy0Aiy%S^l$rxzM{7|1NGw^^5F^Cy0?<4O=AUM+ue>P>XWAi>)8SD1$V*sH$U zFCt*+hP2JE&iYXGV^x27dV~-zOEzuYCi;{_)7q317CvpSk{t%FtoN)tXnL70KfXi2 z(S~Qh*|jkQBe!e_7i%H|#pzQ6y6blOm5bGyrK5+R9q;KAAQr{kfgT_5oFp2jFzF9- z2X^-@+;2_GX^=ymkAQW1x;pgHhcOK~51v}0aQljSdmA9Nq`v$a%A^4 zAxMK6x;l_FGwT)%OiUn@vOeXK-$|fO#`xpKI=yBo6C2|%vw2Z`WBMpx{Yi_fTBIaB z*hplVQ$mX5+&^n;)3{|aEiy7vO6jvQ=Ip1RbpR49QlZ&CZDxH|S}-Neq5DXs^54#M z7fH-}1a$~$zVWz7wwxgavuo}#<9yTDkS%F{j~;2GaLfAR#!fM0i+}g*%!k4{ql}LL zFuCqu-L3msNZ-gH;`y-MXwWYD2y1J%D?P9wXG=JotTo#xi+GK z0rUr5jH9s{gfXszi%go435R4*62MswZxT`f4Rb7^YTMsW(sLxpPQ-&1)w0QQhREAF zdnqVEjA6DbnXtqV-*{#+D2R!9Efs-zqTjGRrH3hD9`IA8+H<9u%w*mWjKPuNqOCnR zj}8}`#zs-%wb4{)fTaX!O|M!QGAK}t3u*n3xj*g3CDalfjLamil8+LecFY^UHnf~z z8f*RQ83p}T(HqNzTvyjbHlFIhzSK0hY~2SgBi=ml5Qnnjr#3!y%FGi)pBm5t{;aGV zI%b%i#=6$y1<5>ei5YKgej;A_@^-EJmZM-30!#kaHBJLmBH)<<7&C9Qzi|370gU$S zFQnleb8|)GX^@KnlL0b1!o#gM^cK$99E75QX^4@&Gh;t9cF|)0In8r;67B!~V~|X> z@Dv$S>!(LdMzeg?Z=`JIMe#?_Ah3HoJ`xQ~y>(cd%kw@QAV_d4?oNxlyL+LhxD_kK z-Q8V^I}~kkcMA?F?!~=GaF>_!Y0vlfURVB0p558md+wRpXEP(O|53Oa%)(gBNw-pp zLN)5@oQMPJySk3jQc+85{<)0?^LLBfV6MZ{u~J(8%eHIRnIN+u0M= z1zb2AJE5fJkq6jPM_r~wp|M21Q=gXG^|9SkMU;|kY&brmLxIK+;ZO$YC*5E= zJ3yK))XhAWHAyFRwcHR=y=Dr$Fom5eZM11rIgF9B8lAt?qPcF|&RX#O?ar5mhf`1? zl2mNJ{4o+eG=$LetM#my&)b?#7k3Yxkt!d>fmBDIdmZ*}1ECYJ2|F97x7oep2XxF9 z9n%YIT1(z&0?kuu33RE8L+SvZeIVkC` zeCYphw*4}GrFWcq!kNO->_gUxFitd3tH|xhoaQ!=NmMzFml#g$e|%HT4- zD*`gMs;8MRyMaZ$>DFVpnx}jQ5YvNY>d=Rtj`(u~2ahv1MrgakMx8@AGV%L)X(2N| zv(y_tZ|>kTY8_#IUC2xcp40O|7&^HTxF4a?V<#ookd&3%O+YxVe~RKQT!G@p0l%VI z+=ej`QFG>)?%U8T;5FsQgX}(yz&k5#hAiO<=}qbkHJON`VUM>9O6~ue2lwjoIm#(7 zEFEhhGziU2X$u)Y>PZ#w#;8+nG?5?ssx#~#E5)$27xhRbET4DNP6ItM(s-h;1w2~S z5jdDZqYc8=5*))BUi zf+U6SQ$e&nvt0XVz-`AHyml_j*H2eCI#CIqLwmMfFft1DmWXEh(RuxzpX5~eF168f z>I*;RYcWt3zYoX^`Ej^rI(RdxkyX}{ZepH~k@O@LF72{`{2yBew}fd=oG-c9hXf}C z{Ltp>m@`erDM9tCo+6eD*R~Z+9|YD(UE7$Jg1*I((z#fTuOi)E%r#OPZPZ&6qMy)H zXE-2oClqq4%HmAPqUlbF=9d(X4`qS(jZ_CvsfnXWl?uci7a7Anc!&wPa|h3hY<)J0 zMx!=~Q7(+D-h0VqvMEFhr$vHBNxCi*#L_W-5=h1tf?!d-#EFXd_Kil{8OSAx)!i8R zedZwS)E|%)MaxWb#8G+CPWK&EzRC?bI$vU5QN91>KTy5e1{ci#S~ppNC_6`xMyWc9 z^=UA)hyth}pqLk52rV%NdD|0FN_9Y=`cLUeK&;ZHD-oE(ye-;6_pN87 zWe*UW|E+|GKyrfOVwfE=>|ikldTjep?zf`Ix11_uFeGK?tQSXp*jx!>Szan6C9oLm z^?W~~S`$r|Sh{}Da&SktBt2Z34EEG(NEg)|Y!@|>l$bf}ig2JGTg4rZw&+VljO#I3 zTS;3ES{dI7OOCQe1pB)_{-TazV`VK0i`c3e_>eUo&DuFWALdtmK>qC;H^;!jISlp> zav$$}jB(u@y6HJp&-R`{PRQ1VHgw!hsie$dyl) zUxioT3?u2NB5D9K`PW5o3NPEZ6+c?D~1Y{RcQdmNDwjy>NrjPyv zM*ivHm{&!JnCk#mA?R5>y@O-(_BNQ8^&}FO75Z|o==wx@Io(u^Vr1*x!xe6_7lcq; z)0A^qa^8HfODJDU81C~6YKI)@flYe4Q#A;^;(HUn(c~+Puk)*_5t7<+$XR{(m^pD0 z7b(G`imV7{>Hw~jnEXT#2jwC)@YXLBoG^djE`ClNO=SPl4`&?KhVK<;u9$h_e{=AS zX$b{6s}%Ax4WM*0y;k0J$x#f7-`I*m+n9t!0?TI2zoKgal9kjw-W3|*eT{hF(`wfR z5PjL4+}+D55tn

mM8~^Lin?-7#4kXJt?ncwu`1uBU1WGj--yTqZ9)+8_-QTjq$x ztPCa)@|FO=d8VG-t%Ilt*2sKL*k)^ep&PU`D>bR?(Pd~_u9VLe7zdcc;O27TR{APVCV8rY`Z<+tTTM|Jf`7$}xo73fyg>YFTi=DHJ)x8~tzb{T6;Zceu zY30LPl{@m!^fBdeFQ9F&Gjrt@q1c0n&OAr{BCJddWtCSSI&o1CyP{?1K5y6E!?^gw znBQ`5yE~(MLlQ#O)QRb#1VF8OXos?P)as0NTSw|Qd3VN}thvcVfzs|(ybl^?aI4`c zoZ+mmEncuqeFV}V0Q8ie>nX{1#HWt(epv^Wwi`<;qJj(#exLby;Z85C?8?XuZ)!Bj zp!n@51|^WSR%K1aF~oFxPjyZqy0JO)fr1|;i;Ln6p&D4qnCfJR=(@7K2gb&}FE`{C zkGG&K@tE$fh;48omcDn>s3G+xMdS@ z8UbC;pU95{`c-@3I`!FxcUPX!ySH;Q$1!5p`>gSX@K%$TMG%bB{oqe8Rw}2_SvwkG zkI`>$e<&j=-q#(UWSF;%A6Io7`q6C_rJC5ai9EsI-q~@!@xIO|fKu^8HOOJSA6YL{ zSn(EXbNzEgI^8e6q#txW3-E3W346)gvwfK#rMktdztdGE=Qxl1`PHoiD-lV{G1QDm zBaoKQi=dc=Dl<#Wb9Zt89svg_^u8~sDfWrJK_x;hixsT9N40231(`SOluU<2ZDw-7 zuFNHphopxeo@w2NpzzxFuDT)^TGdbFbN_{*%ZuLV;x2?YTG zBBVLY8dE-a?Cfn0WEs!2nUBwwxs@v&&|O>LH;1E3!`zXrV%S%!;$EtU<;r()V-!UM%$$?=M{%SEot@Pw^9FwxTk7k3JG z<>g1Cp$7c8QJZchZKOC4!r{gn?~B&LVthr~nFzcirW03=U;kKU#P`55kpJB1d&9Tm z&3D(Y$Jp$VD)qosUf6Z}j#o$;svK4%XTsUxPMou9Lt~TX2B6zvF<6imfcts+GvU+X zMZbm95tw2E3R54SBl?a^!!3lZXNH~*U?Q@X@F^9!Ws7atlMk#@{y$Z4id+G4wj6e9 zTE>PvtnaC&3hxRxTrwgH7*?Bk{4ngTLtQNKuub6(I3pXC(*0g~7U`$Hd-N8xv+J3QBNQXs z`+&*w?2)~x6;{?3O3YzM32AVzv%uNJ|7{!G`t-llW|4e<+eKH{+bPEy}Q&y^GS2E7dzWH!?5RM)- zS4G}b+%Vr2Vk`FVv0vuPQ0Ip7-upMdH3A4aKV00i+lLN*7kn0xx`5sS86pdA4Jr`* zMwW>{+Nu5naGsJ}J4y^dx`^=zE?T4(A%7(S^tKbBah`VoC3r7=1?l&=ub+PMZ;&;S ztUd%;*&r8+ZoGm_BY@5li|pjG5t3jL03~X)D5CE&A$P-VqmpH@;B^*;$f;c@Dx+Su zv0BOc9AWhaS4M>Y6&!E}>D(GoopX7E2opCnrGK}kBK9X z1d1FErj2EcMzcq<4ArYmhG)kTpfH|FR#oly$NM9cUoelVm)^*|4f{YBi&|?4oSa3 zl4%e{cx$G9kq!3jFP4gdMhYH8tZ#$<>aw4_7?RJMUb=na_y`LxU=&(tp#(g6t*A07 zPC?Y?&{$|X-1uOc0R9$&@Elu|dGcF_`HzZhd_xdt)=nAY&b-u->y)_Q-MY(tV8t0X z!js(4Suf9OaLb;CKUWq~`^}pv9Q<9x)Gj`ap0(Xgbm|#mG}xijrMVWI1|;D!C;K&~ zS8_-BHFzAW?~`tcz;~L@|HGg38R8cwu)5^MaZXL1c8Zz?fD|<|gQNf!X~;X3y$q~M z{%tpH+P9lApnxd4Yfb7`KI+~d?Lsp(3A%$qn@CUyx$INrOHF>O9ai%Tp}~Zaqy(~y zLr(#Kh+XS#;W$e?X2C?$Iq47o;$8Z^_ZtISUZ_gDncD^XGZ&X}7@Qr|byIq()DnPmeSn5qFER{6U zu*IWA?nB%8k@bJoD;Ry~r`A15@N%%yWaXpHY8-+INWU+~p#ykS3HTM??AE--V#e z96}UB9pRx)nT{H<&&c)GMK1%%I0@AsjO4S-J*ux?`5*q)4$5vKYUei89TC%l51NnE zz_ESk_9&xLN4`>8^WZ5dg5h!aI+l$a27I0Szv!yJqh9UYAW?X5^SB0q>iG&4O}8hn zS3Km-SB)a9_eIA5VY+u7vZkYqa1RR327i?LNV)V#%GRc|!4rhjI;jq*5iE0a^QRk2 zk5@2T#zF%26|7UTpOp-uqDp%<7Gnj^(%|UB*ZU!8m$S)|* zF*BSq5oFjx!_O!ZrAMTJp61|@O!)$^WsHVb z&MSh%p=zQ3hh!)Q%vZ<3#rrPR{a>63KWW4h)D^D{9?0pZUvbl3ksS74X0=aB*|?!%C40>n%CBhyWVlV0nm9CUXK$1B8hOt$W3 zPZ!ff%fZ!waKf18n~44XA6Gy%GDbB-B!O~8w4{a8Q^{(Dy8bXKVB6S^*02^mS2^g) zi9SWL#JqD{3dIt>m6Pw`+i!nle$_KnDGNnLi30dmITR4D@D;T#(@ZYU|4~fh1*J_5 zAwxb~oxVWQVpN&gOgHg~(?$Y7wA=iz;D{CNWU`O(c~ z@sD45wL&Zlk!cJMC1=kkT#dV9j2E&&!@nG|`ETg*v$8nvk4*I*@BgF8*Fpb`_wOLkn6`|-|0qrRKY+hJ`&Y)- z@j7`=z99b@R3Q_;X#DTQQ-1}Qe!tXyyV@*C_?P!ru`2#d`t1EnxNBpjPS+Rsd{*}H zg;?~_@YXNjWveXo1^F@e7VTD5mFK>y+wW1INcbB>^YghW{>z6{s8{jPu3seaU%&M_ zI)l&u1n!^TGJLt}nx$#C7%3_GcUjL_uQLpn`~0fO$;~leyf;VGLr`TZ|abVeYF51u$}sb;M&h@r}Z}W;!tT7f=; zV0h@v^}3N9G8J0qVr4!hj5UuvrSue2+VU1&MlhG{7pALFbV;hFI0V_+QqOG0_v!J&x`|SP6JjuFyhUu-AXpm7 zG`n!)GGa#!7DNjA0u&bN<_d$(4R*b=6q&$ov9F{)5Ux0wrqFg+0Ynkq{kh|93K|kHt(PXNNpQX$ zL4y~mXN(awHHJ(jq&~e~u|pA?jbNP$%IzhXb6rM?f9yw;_AMy!V+glK+SgB1jPNvK ziir4}DARox9$iQDp1cmnneSd5A0R9)mXS5hk9?1XQ1t{I}4Nc%wzx>4&u6xZD zVlvO=VjEfxa%@EMm_TYXJALCHD+gPQcIea!9JG&Xv0yQG+o%!Kv_PR)X&vUxHU!#F zoqp7w?#&)~+GBLU7~7kAbP(Ne;!A>6jrkeeq%?_&Y1BJ8yxUsE0{`&spWYzY)OK!0 z#e^66Qmxn2@io)YP;>%_MiN@5tv@Zj_3{)~Neh#L2c08oh@_rJ6S7BKpzg9U?8F8KVt{hL1S+%F(g=xmHuO&8jC#-$v zBat}Sh{2e@c>WwJpH)o59S|+V8D;pn^oVn47&To-Uq`Fe@j6>)FgA0`JTs5@LgXri z7NYnce~1Qflw{6W1XlFGf_ACtFh z*zH43og^CAoE8?bV&e_(Ve-sRL}-Bav-!cKc&j#us^yjVa)Fmh@p2V+1DNr&HMiIm zAUl=Mt?7`Oe@R~UFUcDgLRRi!ki!B@HR0y#q3@M=yb&B#B?^o=U1z>QkdD1!1vK^h z<7P_4@mWkS&XT0IIFQE6?B}D1N29ngjFB3xc4z* zj)2XZz0+>StLYSIHw!u%NGqsZU#nZODY_e@Du7Jw7@3*5G`e?<|L0_&dfBQ7u9=h3 zF&Svnp}Tb9BKf*Opa~fsTv&hlJxhFw+8r}>CnJXqI?Ee8DV57k9Fu=C>}ebw(m^&3 znJrUX%=+SK#pt|8d5T_7@eSz^iy`K$!nIPfnF`Cu0hNV@uW?|U0~($gogID(dmy<<<@Q?y~#Jar-A~fvvoynQo0wGBn^$T+9 z98)1yBHh`)Sa=n_ur_72!3xHH2>S6%$tJE4G19q7o)|w&63&xZ#L?4`A;V>_Al;iz zEFBPPAyDgeB#>5p$PLGdb`2V~?#^(fllw$&6ghy_bp`0ZRD#juUzMlv2^|F{HF;QG zY@*r!C1{upOeW#vjmuN?@niK%f4JWeJ&Kd%Ak2=b5&0p4jS`{A)LGwHB2BD0;JOi3 z6QUx)!K!0XHUpSx05zXE=Rz|hy%19-KScH7`s_0u&ADX8&X-6@#W7`ygwKt_UC09H zx>5e2tEiT7d+{aTq!Q)IB7;)?bNx_KL?+8@8i~mn*zHDCR2gL9U`EHPh4@`IEm(Q? zc*M4eE&TJDF(Hki2**A1PvG=`#Uj%%{f7e7=|?Gd^+hvKnBj68d=Hj{&bBkyg&sJB zrB*5J84H%r*(|hr@n(*~l8p}_*CZVQnd$^HLk*hV%cZq0$7IF*7nQ7nBt|e}`;@4R z$*PajBoU*jrXEZ^B$*LVY(@;_`1vvG*W5=RuwZ?6GX7))oZt>oR z&=RHiuk!Vc8w1LD35+OPrk+Ueg)6pfyX>_bTb~72#IP|^hqqmyv z?elxZUYWx#STJehUpTmP2aC%maQMp)@6Bd~MWsMv(X~-HloaowZ~i%mms}*2hsCYh z6vsaU!6ir$p($4O9EKVzbq{rZ3FKK&<3SusbRufbc#5ayS7ZnCxGg2OWmvQkCOHe zSlVesHa&!KcSS zeQR{2h86qnAt{hGCAoB-eB{sAnut4FvMinhtA4UkQRU?dFkXZu=|8q(fD#t3=bsg4 z(BdxgNEm>S^ezQ?j%I5l8-EVbNseFH0<=^tGa7lQl5kfVHE7J0=9Uk<74uU64{Qe! z4o&A{@lwQ{G{92J<$WRVFGG)dd+pIWWlH9NE@&IYDXkSakRdjL(x znh>5?M#!Ga37Gcy{&v|Wz;1GVHQ`{{V~SSiZT)NOx-qNq2UFX$V|1V;X zExgls?V4F_!-tW}7!uD+7xgQCN=%ZdU4yIXfu5@R@jM21AD5)fw%jjl6NrE3W{ik>hqKzgj8MZ{#@9*gc0_dljeB{? z32BnQsav2htJ{+f;`T~O4*ps;2aA=Edn&#Se4v+Ygqm9qW<|f7Jq9_A(;u4b(hTG3 zB0ql+1O5Je7P`Rl5jD}2tbYWTO&~=Sj^VlG7t0YZFXG>OpI~J*umcWFPiVq zJxIup^(ojvBS&jV12ua*xLI4>zE%;EJ-~aiR9@lW27mlo4G=R|nEpD|FXkk)b5B)R zqM9pO-UX7NfVWq~%tW*azoGE`rF9M@T{yc?(S8%L$ z!WaI_$~m3}@k&!b$w&_kID{!m7%x{6?t=!LW?J9~JFT2L?wo05yb?P+qcoF8w?~CG zwp_S*uL9~`=`A5(!6*VKRbD6y>U)o^TUny2otW?gCZEKwiwr~d>BAwQwTemQgmw~+ zZ-4vgdB6R0FHqj|NN#+qeo8`sCE_;`i;Y2&$iJD+>h!lyTOO2(AUvtw$wqy(?OJ}o zp0KXWG9IB1pUF6c({=_vB!m+!C+R4Zbd^F=w$NESgYqN9sLiq zB?-HLg=GvFHKVSCk3h?eU+U*@jT#J}WHij(B)1_ausJ~aG>f%s=tqC+l~MpkV}WHk zF_JR6qy*~3oRm;n#KW9qcRMA7T@M0&=G8USP&yHcFQgt?tRhlN!?49#VW;O+US(ch zV_wGGeD^L@TcNk0a&2n2pSW#hb(ZU2JXfXc9;KasO;1Y8taylKiu}h&r}5O;hRB&7 z81sOnDr;0{U)(}dSf=V=$D0(PCqY#q=-5QnQj`*3I`+9tX+A?Wirtt-AS2k)NrEjC zX2FH$@^I44c-|P9%BiSc9ebK(+bh_X$=FDs85PlSnC(St(=Cm3-38_q3IDq0P2t}c zZx-L>B@|*O7XQP|h^C4a6p$0ma=83S=QOw|A#Kw1$F;pYR^c8En29`0-;TkdNwFHy zsAczsS)2ZV3SFj2?*a#-FrU~mjgYrM`azV6_@rN~@z7OStd7f>VCv(q_n4=7d1JE; zKMOW!tF4-@eBF`T4CZZLowD#R>wnYH%_Dh-CYI5-PZ@N%)05d&?1?1YoH+70$tv|n zTKH3#=m@d}^dC()QX5Z5TM!S@wdCC#$5f}XJI1$v5!WaB`3#6h7X$P_#imxPhv@6; zg6wbDRwu^7BAmD~MnC?VoAy$#D1ot^KLmvmq-7+ThugHdrA;&+)NQhTvHhPW{v_tm zXUcpN9=o1+7%mj<&;9Ub!-6?SFFjO6!LU}Hc(U9bnUHAgWGfX)iwa%4bVT(_A*g9N z5`xBWai^{#({dMWXoSjYi=P#IBm{uhjg?y< zM_8RyR7>94d#sDN)d&)VYe)h+afEF)g(G#90K76Nj-t1R`{v62Shj{-yag`4arf!tsS7H{s5oyaURqlA(5_OD!yo^h^&{CpPkMVTq4M=$|f_ z=?E5o@aA#>88vM?3E6c6DR59R*Hdw5sJk!g91K|;rtE8EC{cOp{fOe#@lN{n7I&RPgPc)_dzRSC~Ko zYnrwhyI%)k73s{%>VXOx#)alBHP?V1N(xajjGNiRgemu?WV;@?+BSxC0R>YrI(3_<&WdbC8Bh>ON4IA3ocbf&}2iy$bj;k$jeaJ^dvQ6 zmM!dKb$Y7V%MA#rYozT2uxDZhEytSM>!N>xXi?B$~P)=%<}NQts!9U*KN~ z5EcUb?h1ZX@;-ehOiLH7@|5h%`{8%eR>iJrSd?4kYS1+SSlZwPcwXsjm=GBa8ya-r zQTyk*g3>m($VFNGq7XgKrI&V}od3YKpC#w!{Jm@mJdnhd5Y3XfR}aX{2>g~$uFqXh z)tYRRsP1~mV~RO^GK7H)eh;bK7Ia`(Nn0g$tmBz%mLJ0bYcytK=$)v)Ug5y4;QNsm zV&dx9+bH{FN#XqqWjSEbQE?uejoJOU;cMlcD?&rjFnIG5Xo)I)SO1#itF7H%QVU#lgs?9aADrVSJ30IhqH>XrcM-QqExCkCFOUwq!%G{D^ zJ_MucaUJ-w6m=9&mK_>WlAF|iYVWQGLO!q^hIZq{H~T(y7h z4!SaN7)ke_B^HfjjRGe+Cd6Rb^s?X5)Iw**w@ZN!GwJ9w4GEOFMFPD zlRHl#Y%k3=j@3hQLX*GU!!eYiyUOPySt#QA-qV#3Nk+!xrH1|#G`%*ntLXu{qQN}X zy&aU9N!c!*g5F7mAwxuQ2mVQlipP3`G8Zn0pTGPkIPoitXd)YD92_$2Icq5F82BbPs>kA`_#*0MS5GroOB2@OWjeclq|Wg*{W-Wx zmIazPvv;6+DRTxpoxD*Qc9cVy_X9DAd)S@Ur85gIucYMdyLZ!IF!59c`?uvf=Gj7nN~tXNaROHWDHt;i6i%Rxyqpt#r)Z zGMm1&l^^K6>)3H+DCQ~SLsxIpdPTmK6J3T)&#`tT*H-(Sb5SiSIv zU}5#wf<=X*PKP6wFi*)}t~PFdKECsRIql<8wSFdfc_Dz^?e>N53;(FNgG|<@_q01R zh4u*^ka^FWbc7}GuCI2KIoRou_}rC)^+7dEdS@JqT&*;>R)KPLD#Ng)wCNp&hOuc% zKN48#nV|lk&!1y47EK<9V=C6%7nO0(i+@K~v+VycG?fmVg){c{x)eux3fPr$>b8q$ z6H8%yygNUf+?mZ``!KH~d_(T!Clx`y*ldsPF2;qMC~ETc@a2bSmjHLUNt@Y_r1KN8 zL2-x_PJ$IA`OR}rJ1$G6uWuCv@aX%q02frpl4DgOFQAI$sp%nY>zXDI89!u!x^JF0 zyb{UakQ=x;lG|4Z+5`0Px90h8bPdyb<~8(MxrD%v-nZ`amJeZy)wN1wB(rw8Y2rIr z;i<_*QO4RxlNqLucln@0-aD1kk_Fyn7n{M)EpK^F(w4S~l%Af&HvP3&&$nzFH zujet~^9Pp~v*!sNzSloRN+kJtn@|ol@wjUupJFs;O~AS$ZIru>HKZ*rll%E;qxzdD z_K=mogI3yKq1@eb#l16a?3+qHbGsH8v-UTd>*5wtth4I zC|6WXyiNYis}l6~!mgWEq8R(sFr(a(05(+DC)ba*pzp8_i)VDm<^g_*EmVBwlW&u* z?qPRp4-Y^6)LVfv;Zemq1XM7<9zH2_VbAXBq^LVNgS1_#4N=%*nQ4UAXKG^LCF?fd zd!uFKdPDay>`Xg^%X4vHdzw4%jBT+lzZ4CBmBqNfUOi=oeKupAA`!dzwq%xX**Tv2 z=2I-}jCTDL2kXn7EAtE`fIC2KzN*!?w31wY2H4Jj*l|C5?cKrn5$2r}%8Kv&rrGLh zPp8UK*E6$Q6(W|OHN9Hb)L%BdQ7=+W-E@&>Q z-Fggf&aIPD^~4}HuO1Ag@zYbTk)q$a3?ZPIt{^!$zT}-c;Z9UF(5n4l&&t{QPn{8oo=^=%&z&5s44&GuvUC~Zk;q#O?0D)QJ zDM=Z8$o<*z4N*&1_SVJmpk0LD*Ss^oKHXDogs^#vQ@^l?LDQicnDh6DYAzn_x(vU6 zKbMTc)Qe0PQ^>-s*HkE@b+Gm$7Q(CtvRttnBOp(;JczG_Qi-7T`%#>{nQN*z)Hlbh~vKHNufWEKC7i+CwZ`2(kd}!vW+q=i<`NTai#MBX?nStHvS-qo-2!i>sDS ztW^16^t}HhsejE|iW6;ya!0;m~-=KN{%|YT=Kh%kNnzSl3?Ou|8A{Gm}1;?Jv+hz%%R{zQ;QmRhb3nBLX1D4JQ|5 zSo%(f|7#VZe|s{eN->k2s%_`Go;zT*Q;f{3mv^^S&oy*7 zp>}e#8p_US@_|{LZHA4M8)KPoIRy^^S~Cv|v2Jq)vGC8vjH<3<3E!JM67hK(o5g*( zKT`1U!D}|iLbT)%?0{SIAx*kaGP@W7YQs0}cT-9~5RuJEiSc8a8Nkw}9NwCv-U5yM ze6q1N7+N)`!8s{6+Zb<)9j!~}Uo2N|harPCPnX@I9Yws#A5R4g_+XMaV@Cm1Isv)H zBfjN$i*Nk$Anes}YcG-OOXx@V6=Y#i6*P8A8~e4F`!$Hpb}>39$S@cZ+@cg}&pflR zbThF~Kf;2D2%5_?b-RmuC;K}rGs9Hzo({iT{2D_8EG(x>d;{A~tbq^B3O-mICtsxR zAf%@@!b|^L?4;l??s8l*7&X_v1Y4-CmNtDZw4zbWL_t{gTlN~;gjSk7vB5-lC(e{2 za0;AqS3l4;xcQPQrBsHJ)kmjlTFY*YtQgL!daU=}#M;kaFT9Mp0hT{xl0hSyWsPNQ z{j3wxsCF4R)n!S=9kjbPAmrjs(T`6zg5w0FM)eM}Q$pb?lkgbTcV+0gO86xJ`{((s zN!|eo{}?oDoV9ynCAN22VoqUAf-C8U#To4UzJN3)E>%?HPs%c6-D}TVz9z!H=x_L} ze!B!smUJ*BmRzj-ap;61hGGAdGi%Fu!TA6Fq6C|;=2fs~<&9m_V6+i=1wunSp^ESp zmvt0eV?zx1f;$~8X8Q8{PvNz(^g9#nyE7RF{P#}-L%thc1pZa5ULs$>E40Q@wmUH zGKoKB!frXM+q%3teRO#MIT(=IXf9YAObh}C-WO}#t$@Xy z7aLscrQ7)Hk+FYM6_}k~zt(tyw@VmG>rSIX;^2Q?2^QG3t1{UMS&*(3fH-W|{*e44 zm?9LEo3!9<9e{H*;+y}=n2pYW?o#Kl< zw_r=3#?&?c3SREE$%*amT-Q<4=WY)a{NP9A8!cj~-qFhY$K|Urj zP`TN2wi}qX^R_Nw$L5Tn71=Gxby z?7+twcp1(Zn?@$RmG%?%9@QNumc7iBs2ECCF2c{XZ1~;>8C$?F%x$fe|G;JFu6*Di zTA(NTpt&&50X}4ONTqZaMQoM$sQIIZfufy>KFEfQ8WtjJYukie@{hF@0UAQ7N(1cZ zlAx5Q_ekU_%``S;7^9>=`VhA`d(AIm5xe)?h&s+Z9$5Vv5hSJPq0V|%5yA`ssK(qL z)<1@uSutVl5H~p>5;$yj$cMZ}?J9bm)MOeA;ff)f2xtVKeXW2QKM*!69-}Stky!6z1!v7UzaAXM&b22y?8k>_=k?vbrIc`wv|07<5KhD|_GgD9Ayx_olhnn} zDk3{&U3x$~PokBs7uaFiD+^A6BpMF66A!F!sr0;`bMW)Bn9j$2DK6HM}-SxM3?7xMD2Mh1u zHnTdt+hnd~_yc8wGR{8(mp8g%$qpReO$)+P3-G|qQF#|?<188Me9^9~UW~VT+kS*0 zP6VzItK~k2L_;#neiBuQch3a$;0>6S|EfJP`|e=OovT>uG4>P~xYqaxm`y7AJ&3|$ zLThjpe=4b|wp|cVTn0}I&_?5a$KOaoEaR^FXc_yCrfJ5HG*4)07N;LYZjeKn1~ryc zJ2u$Dnm=VW3_(wr?siThOyR3_=QeD`5wvC0-fEGpE$N9vhuKTXS~d!{-LN{&7VkEK z(yelafG1Ag4pB#nmB$rUm?Rd3H_AzFBqBAZ1&z zGwQW#!wWE;Evyf%_)QR7uny%uoJkyU$_nd{eO}9lu_xF(ViVC=;~3{B5`iZYcD7&H z%v)-t@T$^7XY+IO>znHEKgG9xB`=N+`<>)1aLJLxSoy09Pt91D#vZ?xy6%n$NSdH7 zofk+9aEujpvc?I-gYmUq780M+ym5rZwfZ#0hYNX26f@IAj(H^@wwLc3(fq;q&Xem@ zAE8OR?+b#H-bV39<@IJS9Kv*pMJg-3q2#cO%@+1ypNwrYCiIO-?-?`I045)LL5CtG z^y-&oltcg09O^;y+`R8*^ZUI32B18vd&krlwtj}yO&@fjR&-m$!SA%@j_RoY6;yu(W!Ue^Na~1cGz1;+g5UMy_x3)Q-x(T zM-{jqeR zJ@bdOoF=xBAS_|#vabE_;ItZlGrl8X1tP>s&q#JGDx$Ane!Cd)#_7`^AKH@It1qyL zM@MX&w1U*QQvy?2ul!DY-DwIxzWK%z=TR0hYy5$^AR(q%4c{^Iv7Q^!`&NBu*nAXH zZQ0m9D`nv@@KNT}9)`K>1_eOd!`EtH7`R}PyDTkEx)$3V{qr}PUYSw#yQS6|!|vc1 z8*I_iRXBzumQqiqDiAaFWV4(NwQCWg`b90ya#jfGO7eaANq$sM?Lhqv)IK0$0A>{0DVvXhwkZ=JqT_<4hLronv8}cXAJ$mMR7yU zLfakd5$G(zWffceJ>ZHG;+4=N8cU!CvI%iMwn%T^pa{nV7V)De8+=N6-=SX) z^Wo=`tV;3^P$WUcw;wJVlsKs?%t_Mtm3dFwRGaGgO~0*0aStm53KP-F7=qSr?9sCt zPl%k778ig6jk1+=#h9UlWZ$hK;8hLRyL$Nf?D6;G_l=&6&nxf28!;=D#~6IO%|;JrN5e}!qmG*E&rKbyZHE?K@Ik&@=zR^BQKKlnF3RQvk*;{&qwFeK&0rH7sWiS$FXX8e~*bS!~>N5Xe=EIj5mk4}JR_?=WuW7$B z!Dco}?`ix_!;1BF(mDTL>l5fxQr?zZ2?2kk$`N%kzJD0Pa~rGNHv2y*behAzt2_Ww zq_{VraHSQSUS68z+YAJkY(c|NGGwbjlSz34s2Zqa`$`> zM>xMJoiHvgbOq=L8PIScepmHY3I7&i^fd}pL5RBvGxg@j6l_D$2rqpaj=|(9f@9QU z>tYXG%2ozR&XpuMpD4%V|KsT_7~+bSWev;#gAeZRF2QwhClFkNySuwPA-KD{1b250 z?(P;KKp-#Yo_pVa*lTxnb$5N6NcidH$1;B6H}z$BEaRUwM_2iq8Xj7nE)zUJ39Agf zV`=%soS$Iv3E5rrh7x@Y5Y?X#D@6-cXx7MCbfuh}YiN_fs_ra8I&pa^0%JvGzGjaV1bNPA<|ZaanFF{uI} z3La3glEcM1{e2w(;B{3P;q-pkNXj<^*Y4{x3x0<)uz- zV=)%rUMQn#TMq@wD{107vs5cX@J%gJ+kw_Je2B>^9x&aUe-SxD!_&H-iODZu27NDx z;k71U#dqC?GLl7Ql$?e2P<+7cP+U-lFghW0(dzE{(pe6`0Dy7Ks+prYK zDZH8n20}3NSgrCgw$@jOuuNnwkK}F`Yr!rBq53A}3A9DOJk0VsR#K zkp-qL7xb_-4DR7^a_;izy=9*`c^PRc3kIsYbo-@Zm}D`^7o2>eTLo$W5#OH0AWr0Q zkQ2QQiZ~K#DP3soKv?BVvh)#gDzKIF>ol4AkU7piOz>W#&nF7NsI0#dj-aEhW#H5o z1(ZlAS!A<&h)L@|M%ADQks@zDdI|Y}e51c1II171;WI4%Bqp4jZ90WCp1SCx5#P5i z!<2FcZ?ZdbzN+51sG)J5{JJ{UQ#0UULuPLw<*t8@$~Yv2vNR5^JB$MrT7b#aG&sey zR{Mi;;`l$=pt-Am_&!Esslt{ibAq&22gz7)zC6{Mj#94?fVmSGj|JyT!ZVHeM3b-< z-F7hu>$!`Uo}dsPFxd~v@v%-jS`W8h1I=Gg^-!mlN7|QH*%*}{ci&Z}*4FkJ$m^6V ziJKf+*>`QFFw<%-gTIRP<+ZipS{qDY+Z^)hsBE_2^9|7Tn}H>Q{>eoR>@CO$R46fV zx4E(Xcp!OOh~{x;I0{0hpcQY%0Co0!5bBR#&tr4HnCVH1RZYL^6D9msqeKqJ#8nqj zuCm*ybgMG;q=+2U_8!Wjg>4-n0v6r;gZQBQ{C9Cp!~YhRg3CqXajJBBm%~NlYlx$@ zVqaFZ%{`C~F$kXahpGuFPdL&3w}TM(x+9bm9;qbOj=2RL-Id zIJ(gQ3$4f7wMIS4Dm7InSArkFWt z4xZ2@WGqzD%(%Z%J}EnwN4xkq!e!1t3`~iX36*v|uA-lP&j4;^ZSCmS&Gh|+Eo>sS`PA+LrYn9R~RPge@Zj1Gjpb?XlxJj$*yhA$NFS1!W(c8`5&y znIgg0Q#OvOcMOogrACT zErg>~Nyr4%eT0tqY#Vu=Q6<)7wzJIJyotZQVepJ;_6ZT#W?(Kv<%+mPjWyH6vdow% zMl7omCj*3>RQdvHU+T^8f&sZ45h&yeaU<5@`bcnhbMi(U3lnS(_7h z09}eR<3L5T)IUlp0RYW~E@_OWJPnMv-Qg;w?Hd^z+|BCGUP~fpk$>N%P&6z`e>pEd z11Rp)ZOGKxrZ;f3TL0u|$7$I8k!F17Ws{ z^8s-&bG|e%NeHb51p!&nP(J#;ncrq!6Ten4E>J;mGL6&SAGH-a6|E!|owT_rwOp6T z^!@Se)4zQtI2Sbf|6d4|Fj}JTUq&#dB=@D_*?3hjgbH8+9+@^i}S!I$eImFOmgiN zPnps@h9|6&%>$sC95o)R-uoEnuNG;o8Ril!ab)f&l!zDfDgXVk#J7-ImRbe{*VHue z?^aR5@U6;NV9Y1XRhCmVoB($ZdMd}gN?~TJR+BqxYIrQ}k%o8*+B2JFWsKuyq>YzMHWYqTy~HKq?#DqM($R!%c!5_7TADOl zQh3D@R>Vtv=O5HI@MD9GHa82qoSBWGF;%vtWf1}GgAaW~iL5Pm_}Wm*eh66;&(m6B zxe70CBsGKz=eM{6PJ0|d-$~yBA&M3-N=`Mg z%W4r|WxC(ruB@><5!DHjQCnajMGl<%9K^IqSR5_&M*!WiOT}<2&_-kX2zGZ3={k_l zij3X``|rmqHTU!2a#h)T3*Wh$G2$U)Vz)Xx?7&w_AtfZ;(%-PT4y_}!0B~)D>$t1H zoJ&k_Ddq%&G`c0+!cU6>zC_*Rbi<)ZEDrva%7wO~JO3zBmA}UeD*ktK32_fcsZEdU zYs&${AVy;ngdxtT3j?1#`9x=J!pwr?q2O?Wcj1XNL3wLP%l!>&lO*%Mxvr!f(VB}- z4_sM(R$}z+F7N=RTjVrcV_YSNJYmJXMPfWO;GxUYE7U`)p2H=8DOjOVBpY72Nr6*- z%aA{tV;>zAAk48At^Fm@i8!OS_5H%-6^uQ*FgjxL{m0ZhqZnVY|L}eOW4;7PEOb$t zbaric+P0Ck!?;n6gj-kkbe7vGzF;x;V^QcKc*~_zn zp3lSZghbzwwC0GEiK~#1VM-U12erDC4V2H5DVhT}U(NITMm$WM{A8iHyNrxOBFrc8 z^e187${sk;d3Ss)pJl#vgs__q^UtyVcCR!^t!noYU?b=Z3YjPo4m{espMtaK(-&Q& z@Q>#=MovTong317L(?A;<8d~ED2s*UKI0@~$~}T#3B7K3_OB6qZ8B58lDsDUy0;?K z&pL^SGcnV=w36oMZsmS0p6R1QTS+;LbFNi6@8Sg(5Ip%=d^5*kwZnffXc7a?gr=U2 z$ZBGfkhi5lC5tP&6x z{rQiulr1qXpFk?tbV!XL1uAmr;#1vfcSsVJWiA&JeDF#Nw@h%8d0f96Ci$^Avn(Bh z?x2veh!s=q5>!92DP`J1#_W9f3b>}onSrK5Wd(SAD*eJ5E}!Y4i75tEy;>fH!8kVF zQ;1VOmO~=kxd1gDP$jB#tI`e6i2G~uf*9Y?gzhOL4K54q_11Hdh>`?1*H4sk(^HKo z_C=m#58;1$k^cxy^1By~i;ukRKmZyzE-Q*z;DhuVr8dHOH< z^_7io2XIz{yuxe_4SkX+*|!bZrpxjPj15P0m>c{`LH}TbQ;trXStsp!X&rqI*;vVn zlfCcZ^n|gkMI7CA*-%QTqOh1PHO^uf2O!3ej8dvSPEpS;|EwnSEff9%n$ghXczH!z z_GjNquFqJneb;;-T)68nTr3ti6eF~Fhh7LKBtlF6+f-W(`l(uc_B`t%AcvH8J!7Bu zPY56+M_99mlKu&=J_#`RjAsO~OAx;oY=O}@PT8eNoI-xP76rR0jOz|oU7hMz8>M!b zhd8xhx44WF!vO{8{aI^VhcOB2etwH~T3|svV+UclT{s&>QsRt(Htv73XP`$dK$ch;=roWIt`i7@?8leXGMBraj0K=zm(J9XEYiNo%RemZ?(cx|UIXVA@&& z>}5}F8g4rT9_0)41)g~7XWi23P5*}*@;_@BfLMT~{_i_1o~BR~`3e<63hNBB=g3HB zq$cMC>SLDg5kdsudtyv=O>&whrt8=$4(=6_EZXYDbd}x#GCIub?R^hFF)poD2jKD$ zNxYYP>P_40D~-I%!@>VPx0<4xary>r-(1U=Fh<- zo3sW6le~Yfh6ZQ*!cF&O*mp>bSqf=~C%y+%%u@~bWF~T2+S0DF5(PzT8vXn41>DI{ zrN6(aOQ7wA(zFsl(&DvQIi=(=(4SH=3{1Y0V01vJ|P{2$FlpzZZo34qDx7Rx$yG0402p|Zl2w>mnnV3 z9AVxBU$}I(=^sP*m$_@w+PGfFisF@uBazP`HZ8pGTr`>G_88aMTPUz>-AU#vM@G|U zL$`az3r2mL%;|u zVV{#b4=kI&m))hPQDZrk3NDWz*7t_Ihn?KH|D?_T$4wwL{nJJH9PW?lsrOR+(Z^Va zSY+*UXPoJ{Siby6qBq-`F4P>L*b>I9>wEP{23*Ujd}V_|*CdE>Q|H1I^VMr8rF`TL z?qJ*;{}xsTLWPKy@CM7s#p{4Utk)MficMgYk*PU?WgN<1pMa^OY=?EO15HS_I?*e$ z0d3d`?62AvV+%MMSTJg^?X+=MQE}uj&JQ!SKZW18??{oa5!Ltph~pd>-k%-xgC;1m z7O7rjU)^RsR&00EHxo!ub9`~;T5n^y?9DJn%PLF`lMDN&&`xe2@o#RG$sZ?*r5-Xo zY!B+&?oM)9D$T9$!tA^!+>-fyue=Q_g}1Q?)Du0WAGz}75CwPTZLjqjgg}m%7bXpM zm71zH;a;ZGDK}BXn0Z-Y`a$+FfY^})Tr<`|(cerMlG#yUL zb8RVu((lQK1*kPLTz)Ea?>BJ&e`R+`1(IPBa8ecL?I(@E3~^PAw(bL`?-%d))CD6E zQQ<{FdjLduf@3d!?!*I6KO{Z8h1LY=L)ni5Blc`xJhxQ4m;@aq1Mv8L2S$SnYtNly zL+75*T{UZa=76?$Uk-E79p!4&yIVhwppFrAM@TPj1m}3iGgx~C>5cJZY2+SOtKr%<0@`LNCpX?lJm`E$H}^+ z(4)NE2!VDRw9+sA#Iy0bH*5SJU6IT@Z9W^@Y z!><hodm4q7-e`0G+xC=E3}H)+oJ;J51ouV=t!oiECwM5!OH}< z393LI=ys6{T|n#rak?R62Th^F3g7O##_yD6mAW53BL7Cbb%H#DnhSh|3Btql7j z`RgTofvi1xWTG*_hyZ#x*7jHp=k26<9R5U=v`+%|{$Fb}ba9*(hWx2P>-cwpvl$P4 ztF(WnNA%_wSq+~I>-`*H?UrQs2W6F|4;g|IY_%#^ti6iwPMsBHhNG{=QA#V_Ahmm+ z*0cJ!5%lsGLbRx~Rzy(*r>MZlX>`o(sx4$%N_!EWhS@cr@Vuz?zOa2}r4&WS-2129 zcdQ8`Re91G(9AFC?}s?QV`i%N9pZZ>$Ss=06s85bZItDW?*5;M~JS+ zc?=Kj&5$%W$3@;qAWg%0Lq$yG$T%99e2!l@ z3RN~<#t-02P2d@eI*NTyV#O}OTQP|E33TJT^V&A9=;XlevH6z_goMbJ-tlu!36EBa zOy98o-f?Ez17<~!?+yK4w;^!;+nTmHy$C?N$NcLbQoS-eP3^#@*J&C&<&v8MO~pAA zuRqyo-_5;@$4o>_7-OVi=h@BMf=RQ}`86!sSw<>9y}M$7Zi&LOeBW=!w`VWg-ZhcU<^(%@GaVq(cujNOtFQ=er_>l2|F;WZbSKTu@V0V6%`>N$8_iGa zH7*Vng*tN*VtU;7BGw_v8Q8B*I{(K_lxE1R$<%wF{H!L@74Ya8!MMA$a*VjWMrHh< zcB}~fW$wHBmc_+oB7!LM;)bjf6I8XaTNi%T_E$vWFuhu#vYmm9G_MLAKT-+rRYwQ> zY5wjF^-Ggix53ZbsR4t@Jpaf%p2iO+zu}3cY6eqzkUO4sbPT(q-Wa+ z@}+YHIC$t$uX8biGFuyd*(7?DJ+E>CRUD7BAksfnS|I(~^O)av7kJ14_3{YL79XN4 z-F?`Q46Jk9CM%lEnx0I0FsCq%CJWTmvIfE<-eB-b?ZuSz-v~6!Uo2bjsXh69%Dv^o zl)9%4biWP$1>NDZ&k-6MrSl|YJ|q*mmQ^Ewo(Z5Sf`B_yGo(Q*Fizm;mnaXEz~4tY z2#6YaKR*U!n3JXV_*;5!e*n+e`>hBHKXW2!b0LJz!+pkD)t^i@#i(fWFu-<-{npaFwWMm>S4frE;gS1s{m@Tg2CfCG&R1=6s5xWVRE2+-39Uj## zTPoozj6ILOpRqo9|Abo0HEyyk-nTrn2-o4bPC0HG+r^~mEpn>R8m$}VY|nBa5)j?} z_KMLm^V^)|C4kCbLH@{&2Vft@-~fyDER~raIQ-or-6vwO&sL26ZTC!8=rQ(_O==}a@TN$ z1rAt}#v)yyE9KM+EB|&`blV)IY;6TYFkmP#lhEOXI^vS^lnx@XAvwqcY@i~v<|$= z)fTK-&5ZNo<6Sht3fiPyZ&)~WeYd{Du@-a~GXze$Jh+xamiM(@$k#W2a@xKXf+MZY z_N~AH8*XM8%m*2I-*kNnp06YJQzeRnxzKx1eHld;b8?TVeMa%S5^OR_>c{CU|E<*j zAdK&uQVr#fL=`j}4jgUji$$PhX2W{xWg)r^m7g*P5$o)zZFdopU%9=zCvclv6Flr^G4@3kVXN)jZNTa+Vqa z0Mr8tckfOSpfb6X@59c?S=B%aT{QQRWpyM8C%MGwkj-ti$!B2GDQHzT#G}iES_=uOAkn@}3bh$UW)3#t zAF(}Hs1HuS{u9)C5__yf552sg;|nFpUEguSZu!&&i4@x`Lu8M*L-?2l7F&{_|$&OK%-9 zq_5tvXcQiW&7%4Dax7Fouosov0uC)uB>O8>99KNmVCjO~JlK%8X?not=tx&s7c~)+v=62RjWz`&C@? zn@%*HFWt*n6+9{diuRDaK;7PW}UK4=^-`K zRQAHW(s=LWLuxx7FIA|pRF2IZCQ&MmCO~-a>ACEAE|@UfQv3BMOJkd0Z1Tf`?HD8o zw`u#9qvT~qEzLKZp1~@dnMfVZ^(jUcBsW~+FJStWEiYv`0m1dgf~=MO(1$c4r6@p< z*cN7rD-)Y$F__;O_Zu;LR?o0|^X?uT2Falvj50E6EY*a6mWi(~)USWfIl#Pml==5p z172DzQ6bHV@6-&^w_@?>0ozGoOoqkMzw@5Ax$H~`Wrw67afq?0tiWYp_CIBYfhe=5 zD}(|o`w7mn)^r}#03kVoZ{6iRC7+v#7q1Mv6=`^cYJq%IepZtKE3VTY9{D_97Dd8` zybE*F`*z~$lp_-tITQ@g9tC^d+Oz6|$@{_f9GfvhT1cEdTEX#-M34$|zNP7C-PuSBPYT44W8$J#wk zlLJa~(57miN$Qn~jSbc1M*^DbN2Iji`M;-K1Z$FfAQ{mKjS1=f*sS3L!UPB1$L${P?tGbam2h(6eAG#SASM zVEF1q*|(QTGmpxOdd6ez7|MBYsbA#C(tSS~+|4Md`6W16c$e|t_>HK+W-kl2d5^KQ zpIRUI25O9_x|dN6VPZr7=ivzEPHe#GQsNQr4d;I0tYbVOL4Yf2Y)*!C*M58NRWO5J z0eUqDe-dktTf_Al$Q&*`^GxsqLd_8ML5)G5_x)JL-VMuY@(Epgvu-F`kJU{kh=_O9 zgJM&lnP&hE;5vVE^O`YYiellg9xRv>%8w5A+dB905;iB$`>O-n-##1%ePq_%57Jf& zG+@Od@BK~e3+Y!G2*O@Ggz}q^(7N45+@&w2Clm{xCk!mtxv5>%7z~k8K*?QCtDpwN z;z7dBuLb93EDwLey-IFxrcpagnUD^f(`h{_z4RcFFe8d~8dTFfJ@7S~i+u{{yn3pp zchms`hT*<4KWPtw=|f8vHmLTYCrR%j;kg5bNIB$$ZpAjK`{Il+zLf=ZpfC*>tBk-f zb5Sp&2KG6!QZ-T_NYfP73fXxZ-^Gx@E*lee9W{f*44EWoR|73u>wC!P1}N_b8mNtq z+jnYrAy+Ki$;n73?n#(G!oGYVb%u;nq>r+Y3XP&_x1rf8ptM$4uwoYd8Ud5UqH%6oDej7OlX`y{j9mMV+WLWwoI(ed%!IZh?|*~I3QC9=c4 zEvVk-d`i3~bit<-FVTq*GszHk{%aUwz$!l=Oz>vmX*Qpza~IhM!4YSKmp3(CF^-yE zjNX|_DD(NF|D@k?UROu%A20+q14L7lP1`NU23>7b$`_6>S1MCZTwEQN$ao=6IF*$> z8{SS{pHoe#luAavhXXUCXYO$*nAA?>u$oE$LweeR4)2FM%)*I9M3s7VzUOKDgB>o7 ze1y&GR2dQK#WVxfVmR#W*Ti$GN($j+(lqa_DDf{#fF(-9eo5~98PwijolGKhAwluJQh@Cd3OW5 zxAf|=R+pSSNbbjP`ylql6XA72peK?AQo{-O%<5UqPp#P~wYRM|W<8ZL#eA7E6-Z9H zK4Symq%#)UO5}U(fDxQzL$PAr&b1X`VW`*tE%^W`xbTw6&O!XUJxGt%=>?~~cA8$6 z*QiN{5fXL`%dE9yxAD9Vfj({mVenvgk~GO)(VWJSIcm^qx|4yy$1s~NR+(@HF_~-S zkNiPi$`afPlwjSH^bT_gwJ*x*<~vZ-6Hn+P6&lIs=UzD=Z0p9`e$YZ|A$o&k?4qiT z+vG-AANS_s*yqb4BNNYw$xl#5fsPp5Dz13_09zt~bV!u`d!&C`Zq(Nj#PrYbZr=NO#=4DJ~#DnX4LGR=g6= zP)z7oo~iKuCxxi?CC%{}dPP4H>1S#NK3i;tFKNNF&y0-%)p3^yQm~h7U};%3(J`h0 zqv5coA&cXery|EnG+x?PfYSutJlUwDaeoEHPsR>t*7gzjcqYNMiu7ik4Ap7 zWs1U&3c!`rtTylsm=hlB3Q=*efq6X`(B|IfaO|cPD)eT%bMOAV72Hu4NCpvjEXo=K z%Q@|y?y~jRpG;uy0Y47Ay%H1gQMz^XTA-(-h$&OF>G*J5k#`=#QfeS zAejual$!el|8oTEA$X|^o7|Xkj+uq4E+pPIzTe)Q9C|Dwd`F_gH(g`J9-_ z8C3U@8gQ$|GpG|!D>;;4*v=z5o*8+(?A5-wtv{MS;ZI3dk}ibfMN>8@ID${me;S@w z#svF#{z9kaCbU!BeEg%W8CwG_RxGtaS|29=z(*LwG|C(DV^xRwwSB$Z`s~yPt_{f( z262uzq`_FLefuEGVE|4wGJn$`9?BN882@vzs{_gR#d2nD)Z|=xes?Y@r0|O20++w8 zL+a}e;H)@0ZXC?Cw$8;reHe>o6zJHXsmz8^Z#susw$H7(8Eh=?x#s%Hx!$=~973R1i4Fc~;wE~2}7 zXIv%)@!yhmE11tKOQ*W)F6nPOxWj&#<5;j%_8M|(M2`(uMCo7CX*03u=VH9w?4VD! z44xi*k0Ne$ArlRUMZ+rlEB7y-*LFM!uM~k~A!>3Hf(h#a%&+siU9@*$o{50dxxViA!ziIm2=CQ zl~_4AK=E-s!5$6(l3(k)fyzXoiA32tPnl?1P`@bc=1`dm}PYR;Xez)?mN0U zSE$qquQ~*nJ)r*xFf`F{1T-4L6HAtH=6I{I8+$xOb(81`RydHBVz}Fb2<*t5iCp>~y-p<6sRK=_3 zEg9cJ*Mhd6r2EzJXjR=^oD=jraQd!4Wsf;V+1AS0RBZa!>D8rX<28I47D`G`wV(%M zrp23OwfrAaCYErH>%6Np!t@cXX0WxqHZ~0`CYP2zTkuwS+uzNC)^{n|VAv0k8hxQ$ zLsGQUmyq?bP~D^VsiNEu>>u>l@3FhpzvN6jVx9X!`~bjNDE z$h)GDqw-t#Jh2^R6%Y$8Jz zKYsPvzL|-tN&{FpMhY({3ULj?-u;;l^efI)LvlfsX7bBV=W2?j4aMl%z|H|v`QZxnvVMeccB2=N-S zq_k`xJW)0&mQ{n0W-71}p8o1)9}@k&8I~Ip1qCRo6OMw?6_4}1^KIind#x_Slvjrd zVy4h49;T=#&jT?X{G(*%JCZ1qb*SK%aF)0fj(aW1pmd3H_6Bz_jP#fOxmO}$V?2PFoBxHK)0~`h7^r+-81;ss=Ex1 zRPG~{VS~}2SSUNxvW~{%V55lXscDMCsi*D`<8GV^Een>*m?rfuL>b_t=`xKCd3`H~ zTMrwVgWv=s{L(pta*-6Xg#DgaJ{0b+mq=Z9MUR9I_yupU3M++wmh|K#D?&DFGzUXe#K`@jkd#%REel4O${KP#dm_Xr z+fvA?^#F?e>;qt#?~Tlg#2?BW(Rn&su=d(QvmD_e?F;x!YQ~Q*O>e2UrqDi3}#b z<&(As;+sillkKs_lG_R(kc_TtCFf&RC0aA7OjkoBkWVF~!h%4B%l&9KLI1yfVbLk4 zXtg7Y0|FTOkz}c&yGvZQUh*3O+?&_kL(;TT-d~2pCWtIKf#J$Yl|#|4(T?Pr)T7KY z)(@^cT^r+p%zQRiaFNkmvy%`mO~OM=c53I3?)d~sY_-vfGvd(<@bJ=+rEZ9I%foPf#qFpIL>ej-5woY=e zLVe2acSdw7404K3#@={2nA>w#o!?jw)43z&HDE3Sh8-WiL{fcbw*_{Qv{wrzG?bRn zv~Vj=kRlANhiaVVQi8LJW*TC${`sl1aYwO%S)S zub~*x_&STXVwY+zY~L6BXh)>d5JglTCE0nVmM{CYve@i6hZHZie2HsBRXV({jcQ^l z4B53kcQb+%Mn;kcXt9Qp!5Wh|AVb~cq`gk08VT(rGpWp+aW6SBKT9D!pSjt8%W zCw*abTo}3e2AzrhQin-|m@{JsESCw)y_O8BNkc?t&s&I5d2g-wT%p|2A)epJ8|XNu zI&AuLb`TDw-jFjaUO~O`^DY8TP8fHiH_!%08(Ipsg<_%ti})E=m_41!P>$YZVOj`9 zZIh>2CF!%SJ1?3ya=)WEOeS-@o%oPs1~~%6&bFYnKo6k|U%EI-7M$;S-7cyU>EjPM{||U6yQgSC8w%DZ%wn{em*}!Z@|}CW1uU-rd#DpR#-3)rgt>8 zZ zaIA)QRnyo$At*u5CvFK#l}nz1nb(wn&NWVm?L7Usf1~G^IFPPNqQPccUivvN9g-ct z-%XrMPaP+xX_8a8J0|>!!?Zdv0eh2JnmaBv!d4kXu4QVoNd|(u0|d@r(;A?`7yc8p zDm?$ZkaKh!g8M_u=yE2&xibMDLRJugs1Bd4cIWrqZVubIm;)f51!~&{A~%nk80d(` zQ~_HY?E3Rc45>w11t$T@T-`Ic@LONAC zTV^yuDi)~c$NAZz%-UGRCa=12B&te716rZ5&xSEwx*+3i zVmeT~`Ojpobu2qtLl(AEJ@k!qdQ#)&=dPu#Qh*;iA}(`aEe+F`kJdGO5?C_k=oUSj6B@CDzWc$pKb5vF4@GkB4xyg z6${NO+=KRa+Db7+Qa^E6mNn2_c}!ah7(Vf1E%`v@orOm;#MWpm*}@fo>^IiRUrhe| z8A-WKHmhQQGFRha*38zgl_0y`Mk_K*)Rh`e=IJN*tzWUECd?BP^Y2Gu6N7tmxTh7U zX|}ENH_m&LL-+cjDjq4-zVKIJa$}7pSIVA5-R$^#{?SR{?p^w!HWU>xa zyX=1BTT*yP^2opGL#LOLdYY5?G0(X8FlvIAp^pAt3c&h`$pg$NpTrJT$WxByPU|-( zkpdyWsapGAG)tCPf5x#%0xyJwUFlcor$`X>4P4qKc@=E8S~>YaB!;bS=WO`^k^z=T zP5hJsOY1}gPFpyNBH*>7k6XNF8PqaaaSFi?{HS6_xV{GIwSHhlEPz{YE_1%zDSP6c< zv6f~js_xAIK|tn+GPzj=*1PdrI^Skc8A1DXYNHcrrwTxu(X$o4@plvk1Jn@S--<*a zo#nm1wyKp=Th5goUIsLNWuX31`_i?zjN<`>42#fKWSs0rs~j|1{b7o>iJbCwZ|?U% z;{xU}h6H|_>ehe_(+1?L?9$mw+!@zgWr!Bn$RdO6<2YidZ~XIDLKssuegQmPZ&kaK zhLmD1im-Cabzy-dF>*rcKAm@~*oUX_DwX@F@fhusEhJgIa^V1S>I8xhi!~EsrXtF* zz1bO)dJHpH^4;kRWv{g79isHtY>yax6$T05t)?X}AfG2sbIK(tNeVj8k4=&qt~&I) z^E-|BX+4+Oi*Dh7{@@J;BrPLHgfn)35?(gq3noq;VAn6g*u$SW2B!MJ%qo zQ~K5tN+*_S1%&(w9U&-lzn4Y+Y`znJ8CNE+&RI zEC!MOVnJW*9`<;r>r6elc;P(=#p70pHl*4zW^r`-md^YTMdDb*;?>h>{(rjw1jEfX z(!|GJfkHZ1PJwIIq65&-YR;;CnYKs>cLbz$uV%Zc^s039Q5Xq4`> zX2?8Zg91P%;hVgat*{T52p;-N6m-)zy^R!mJzg0Kde!U(w`oQAtb4V>GsjNFjXRYg zu!C4vOn9ZWqIIt^mub#^JQM5k?a@B}iyhfew$dY?Va^atxqicYlbg!~guLi@U0ce@ zj58c~&nfntL;_o#zfAIC>u;Q0INBoHQ+ zpkS{Wrf?k+T{R6FB5tuE3mp1L%rxkFv$aOB8OnihE5PIvV# z5E;In6tHiDv*<%PF(TVg@6jXBy`O*jC)dWXhG1%lR((V%`mo#f*=khEEG659Q} zn+vq;*jS9=s{Pq*Vb4B1^pzw}l!qN`uK~1e%E+BYbZPH?$Og1mAtDxok6~s+q{ob& z2L<|hL=vl7$fmhbJf{b|IW>4pW&>KDj(a2y5S4bpoALXw3*Ng zO{m`Z@O=U4`lO;b>51Eag`)l?j}v)LyIqNIw|vf}AVmsKl?LT6&yL?;K>ky!l%~%) zndq|(0()Z}HUqvM!}f4rXhFR$-z%u{wbU#O%pRH4K65;)>^rXxGeT2v(k4^D7%2KV zP~10ltsS7m>t%oY33>zluUF-&`b;cqPJ+4DtF&s*@}NIpwYoNNJp`aXNJJ7(LI+k3NKl0WZ#PJWEQ&^X#7TY~ z&ntl-MYj_tsc&0UsNcnZm*8?V2!4h73(N6#MPFg@cYvv4XIHhmgZa-yiSr9`9jmW7 zc!wA5YYIWnFaO)~v}faVt{c0}&1Ij5wrUQ8{vAdP#9mXD*~vEw6WJYYQ&eeWvdY1R z#|zrYi-f~2Q13tF)M6h3&+Z=&EBJS=AG24(XSX6B#DB|$Io4n7Z^j$pR-P_C_l@Gk zdCj(8Sc^S*$mnE+kZAW=NmGUw$^+iOSFf)Q&RSnt4s=iM&SeAk-fuSqq5Q|;-(Qh#rUuzg z{Q%zMM63trF&6iKN*48YYk!*@{|vOwf_XRd^PJc)P}2IYx_xJ!4xFA(kTCv7B)y(Z zlRdO1tPdO1y7>{7mln4LhO8hWKPyHC&kbb<%*kUsajx5LMl40>NSu{$G^P%k zS75wh-0SW-VsufUluav0FZ)4xIMkt5SZe+YSACq#3R-pesy&`cks8;ZyqOKMf}rK| z^C2b>v3gR|@VlRI6r%n6i;Hz6=5|U!X9hR{e4<&jrdeCb8R!snTt`go2c2}dg68rF zDc7&Quc|v6fPab48_hC)@3b26$`tqK#=dna`OWMY)q?(ew}$xc`$2}wgWc??U~Y;- zYOjaxbU9(Mk1B1zNZ|1g=CqT}JHLnDsU1URJKiq{M0tK&8KeHFPVKHe(=#v3vq#kE zU5dwC1cBuw!dj%7qiRn{Mpr*YnNE)6onF!o9uuIzl>V=mB_H|-#dl3F*U{_&*3kJC z&YNlG=U4Pupg(fMZCQjvpu3g4TO{3%(d~u|i zvX?0fhxK^kW0h-5ln+5KDwewM5@u$r2*btp3U#(|+YmO!hnyU*(cjIwB`}ar0PI{f z+{hT!B*^a=w*Eh~(en$l%LFtS<$n+rM4`TeCx@%d>iVqu<6nA%ls(klLZ;*XBr64ZHkc0N&$X zJs@$%s}%j~?P*?85+dHq@$vLGd!pH>oyMiwzlT1@cg$8RTTA!c8C?PiX>Z8C4yg{Os@6HV(gcQ?tjMAFfz0*DsKJpFm5@`FE@mVmM!}dxJn=D-z_vX z5CekSbL~E*h+b(C?DP!dE!ebxdx_NRoy)_dw)P9e=~GO5 z(`Fsm4gd}lMEE|9Q}xO71o=qQV`PbkGLsb3&8i+9rboXKSoUqk8DO54WDfCe{f>lQ zUdwkf>5b?)U49N|Wj!O=JSt89BMII+V@ljSB0ape64@;CS*>bjYsw8nqe@DgIxDuy zkdo~!gc(8TlSS0=9?>jz?B=_?@KdjPbIx69rR<7K_WOnmh_^m!N5y`=dcDH5oi0~I zsO$yIATAK&4=z^FkqmtbWJCPGV_fRf?^s2-(!(<&LYdr6;{E3p75=cJJjrUItGqcEdJ@cDMoMQQ_G_+-qDhX?T_9-M3vmI=rCbN@w`KSyoRTAgdR^n zAV9ev&S{|ytfvvi(g>0;X?ll<(8^*Xw_d>HOmK(?bm=#+^-cOsZy4t0_+k}xOv@f* zy0dXCeL=DjEgB`;q>InHk3;;Vxc@=0cSmf0aKcKy{08#}-qV^=CU6 z)r;X?St81?t(h`qAAW~a@U*1ZzEQ3FlbP55Xf8xx4#Hu(W$y_kSbt8Em%Yh!bVk;z z#aU(#nzh30wRnwL4N{E(UFzuj8PS~omxQo#WgGJTh1RXc7U zS#A`!Sh|I90!x3e)L5|`*A6=XD4;c#O96{EeX-q38_~2smnSoGtK4$LiWD~npxg#i zqnDGIb+nnDEsoHlFybo^%5IrZ!phf14Ll3&c# zDq6X<3Os1ALcfBnT6Oh0cg-LDRu&1=xY~qvuLr%9;hslqTZCl3{QeEPAoo15lUy!u zAlnDi;Nkdi)*5OBjv}uQ4m6)1(yzeTE$Ep|k0sTU@F*~{A8!5^=&9_mj zKoEUi+Dbn=KTFxk|1GjP-&U9Y_%XUgve;T!bkup{w+*qwFE%_&qvV0r-t&e| zh6pj7zO0bBmQDZ#U%z+7&U#L3B<8|$i^mt6uq~$~u-vei+_Qef)A$aBjiaF3=9qb} z4_VCK!cftav(uP0m z{apE#aGc_aK zbJn0HL?BI}LiY*D%4X8ivgqoC)k%WVHPhkH`Rq5JE2hIKL~Clg*SG zNzC=HbM7XL^vY&#hqUwM<#~jzl9F$U#a_W(yWij8g)H>_2CDH`1jQo&tuw zLw)A$kr=h3z3N(UJMc;)$|E0qL~PPwfl>Uo&P2kwp#(W%8srdB(xQimICsK$HnYwj z$b?gp!h&#jyy)^oXkQ{u;!*}g?%W%Bo^(7E=JuxJ>&tfhw)murkR9I+`V({rJQ;h2 zhQ%Qql5JVh-?>Uy%g>NDYlGx?0tQU%+26B=f85r9;HnefOKzN6%3vl7 zF<~zz1=x^2^K-F%e*dmdO8E0s>&sw`Mib*K{o)4TR>lW%NL`y1dyP-+_ry)je`RB5 zA~Vd+QJem}ZWvCQ&n4|r%j%P{KN=2nICNo#vK-6wrCfGq6cmGrrNvtY_Cg;X3$ucdxg7Q5%H&va^B>7dw zlXaK!5`a=UA;~;m*~>l&7uXt zz!y@z-GYrWGqM((3P6X@7N!*brlE@piubcrN(6TGM+D!Voxu`yKk2#ZKDz ztud)YwAhFGTh@=X5A2-o=LJ49cKHvFmCjJYeexZ=pymtVMo8HlR`{hQQsqVMPH%7iR zGM0rDN+&05Sr~uT4O;`_FJ-C;=Gbwp2{V~i5i+;#)o2@63h38ugFG&^Y!jE^l6+bG z=~FgPM!xVjJuZs9;I)X0>*;4LGLi-mfwjfEl%bx;z~CWXW^HK>03QW&4e zcwt&_uRvf|>s1i%IG5%~miDpdFICfmn4;J+uCjZs`~Kncz+Z*Em2cA#5!kRBXZ(3= zYbB2D4S|ExSr~U#SjfDLd4U&ir$V~V!*=|p_nxTr&`TpLcHHxcC~$S0!Zh-?MuE4& z=WVZraWo*vj?GykI1weOPLCzPG1Dq86vBlwt_WZ#rLEUi*zXe*zTfd#)-YQ`qwsrn zgEv`40KAIH zdlK6POv`NCQc7(rcmlYS1U7vpF%Gidk8)$~_WdJjOiIInf0mqv53(4IbJVhr%`n)U z=#1o@il2rJux3N5LQEHgd{nvRL>SJMS)tApzkPgPOy4UJPo(Fl!in-0*PDx~U(Rgsut7PD=^2?& zIiK!}ddeFty&%ZT%d68-`0_rv`t`j23o61hD+i;&L3DR`)9A>;((v$}cg(RLjy0}i zYRDqxcavulp-;8Udop5UAp(}>hKdYT49wwx4WGqDbyteJ;(^1vACp-g-d$Cc4T2tW zW|1(dC`2k%Gb1bX(|vG_jijY|kk%(#kU|5ihx$B%@d=-3<+w=EuGC7Zv)zzx zJ$pI2;dG1pgDhj1y*UqOQ5XNUghVNm6{ELmNq-$USK$(B4d$jqF1R69dnTz@catm6 z_UTPLp1|Y!#BgoaFQR^AahWMI&L!QAUxhrYvQ8Q!SA}#v^V%h^uQWPk|H{GWxr`RI zV5F>(3;Iy^^}~k`3Ts{fgS-fByP=$kC_#!4)BAsT;tod26XHY7Bw0YTL*)DO$RFRg z@LtP_MKa|K(^vFZ#d3V9L=V7pO9VFX_X9$tOSkg*!)cE)VhQ1&$bI7NwhMfVd5!IK zv_g!Sy1x%YG`Q|tIsKbkbWAColo2)M7BdzOFjj?+HI8c5D${t`CvoQJq#B>$+W$71 zTbwH~psX>|maBa*^(qg)bQ4Z2Wm!CfpwJHA!lOklOtV7>41!!;LD3p3Wov_4U+}k>O!dwo=Q28urh5M3kE> zBW$ap{DMsIt-PFs3Cc{4vm<5?>nTqAiOKUmfq-9^-%=)$y+{BpR(@8-dbRl_b;bCw zcd}}EQL2i1ceyECj3#uA>^7cz46+&!hAd;U;1r-ecE$kVcu)5?|GaULruyN)I$^Jp zd(H*&oJ8B(h}|c9govgtjQWjiOLlG*b)*6@&;gnHk&Y&a*&$Z8QvD82JUP1o>Hff! z04UGk(<{}>y0YER7IL%{5bJEEimWbjRTkY*HdcPP|9-fO{H~e9Ul`B628Ylq)W8b; zA~W!Ol%Nw{*Pmay984(7KTiX~M#?QQs?JGTHUux>E5r~hv{-!9~6Qc;mGT|pKBwmGb1Am?J}`Y{{ZP}=MN&(zI9pDk zMO(-)%&@PY^N%PFgcSVmz3daNdPg~^O7V%2p>!vcoRZ%~W2n)zSvukbLIcfY8nnZC_FKx@>n4(hEy=*~s>NTy2ZoC|}J|plekxkdvCem~T-B zaGHGitRC~EKI5N=xs-C|0dv*nrTJ-KmR39l5vq!E3WGemG*BG#2{TU5aX`M#QMf;1HBkBlsTjsP1 zACT-!vy!B|;uQNh`zzP+e#qZQG^`j61N4y>c)SbPop+ekx8#>n7fk}I#_H1V2Grvg z%kmcT0gj9Itb`sVl+ZpjAs7gFJHku*{T?B=y;RFVYn96 zz)h`BcHV(P)V-)_`L7tlniTjJ`{>{#H@kto@AcZgq_elUxHvlUo*m~5VP5(rvO_6p zBTIqcBhJX)SK>|d{!egIWZy~#Bb80faR1Vv9esYE)bumAnE{3t-fL7KEUL?#aIlY1 zq~=&upg6fILvsUIlksiYDaYZ#ri$HIL7I9&B}%&4T@`-M|LyNDpFfv^aP)$k4^YOC znHuT-)+AdWi1a@rDEUe5(*7!(bsHpXRo_~Lp(}9hA&w3qEk$8;L2A{6iZJhETfriABII&QF!<{ymJIMrxuX{H zYG7+^kRdK#QTq_r+xaC`^EUdinS$WXc!ntv_(SGjdWDFLYvG+a8Yg$w6b3=PLsM9z z@Z_G{%_4rvv=)Hl!xgB_QIDnL*yDTPFTcz1%vW@BR0DIm5-vnw@g90XRK0#VnW}8Y4@Zp48HDOvf z+)mrvACG0J)^%QCh@Sfvve9o0M95+w&~x=IwjAe!bug)i11?d(=FYewg( zolri&#!17}`NY@?%HIF5J8K_H?m|GCYK#dzlZ4l4;`xUtk(v(l-f^N}V#1#LKu&VPr)u zMX_QGsl0L+dA)_cMy}cHgDLyJ$&zmbpk*_I2_VGBZa;>9B?ftrF#u>}@s35;rnWLX z66N^fDj{RZAulB`{pgvBt<57RRFDvuRq2}$fJ^^$k9w&f3xc2KZ^Ve z9u=kiMd5EusoiVJ1%=@V0tN)PO}WGdWEY*BMPQI>Tg%P?Qhv7FJA}$Tw@=MFpvR@N zsq%V6q3{-E4xC~1SJT-31+zzF{Gal}0R>S2Adx|j=%7jS9d0Tb_VBxiBp22ho>oMd zDk+?k?o<6%zdj9(2Iw4}6Kuzx$R@MZ%m&CW1O+`a;3pd2$|G!;`l`SfO7b6PyhUPO zyaJNtM{^1^ssfy6m{6}+jb9VCE_*0M!dYf?2;YF#dG-gtr$cE)pQOXw!QUCy6n5L?-$tHS^bAyxpGpn{Ez{3`@mImtq zE81uam-N()ik!s;ApXmuQ;UylQkKMVOJATqyx6;XETQ2BV1I>tEJ5J$F0N@{cVsn5 zO%xi&BCMRi#Iicx3sKnn7jUs-GD@2uN zQJs6@{^b+9n>1j6nYS(_If!yjXl9@KxcxS*U2twH_zKs3B}AcSe|edHN?hY~=Aip^wpgv>x-z2GNXnO4htJDmsZ;n~R41ssI z0z+$Sly^$xt1i-O`-e+fuUiJUeqhW(;^1Jr}2j8z6+1Gk$Y1^uKS(DzrH;|2X8p0wj@a|Ys(O_@6yV&>Hk z$%1cY15BsI=|_*uSE{h?OhDGNkxf1)ZtTX`hkr?laHRG0x104sC61)9nV7V21!8!> zq#%WiY#=ouL<3J!ZOgr?2ap)?0YK3|$OuTjk=%5||JPTal{qeUU?=aVf1Jlojg7EsI^OOvw}U^OeAc_l84>=;Gh5JsW7|KbzRqWY z00d`(diTnFSit?)J4$%}7727HwDBW&f5_gBe}(Y> q4uA3ffB)zICuaT++y7b1aC=|KXNbYbk6ILP`)R7_saC4kMf@MDusmA; diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index 79e213ee39..4ccb810703 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -315,8 +315,6 @@ public: } -#include "opencv2/objdetect/linemod.hpp" -#include "opencv2/objdetect/erfilter.hpp" #include "opencv2/objdetect/detection_based_tracker.hpp" #endif diff --git a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp deleted file mode 100644 index d7e07d80d8..0000000000 --- a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp +++ /dev/null @@ -1,266 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OBJDETECT_ERFILTER_HPP__ -#define __OPENCV_OBJDETECT_ERFILTER_HPP__ - -#include "opencv2/core.hpp" -#include -#include -#include - -namespace cv -{ - -/*! - Extremal Region Stat structure - - The ERStat structure represents a class-specific Extremal Region (ER). - - An ER is a 4-connected set of pixels with all its grey-level values smaller than the values - in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's - in the component tree of the image. -*/ -struct CV_EXPORTS ERStat -{ -public: - //! Constructor - explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0); - //! Destructor - ~ERStat() { } - - //! seed point and the threshold (max grey-level value) - int pixel; - int level; - - //! incrementally computable features - int area; - int perimeter; - int euler; //!< euler number - Rect rect; - double raw_moments[2]; //!< order 1 raw moments to derive the centroid - double central_moments[3]; //!< order 2 central moments to construct the covariance matrix - std::deque *crossings;//!< horizontal crossings - float med_crossings; //!< median of the crossings at three different height levels - - //! 2nd stage features - float hole_area_ratio; - float convex_hull_ratio; - float num_inflexion_points; - - // TODO Other features can be added (average color, standard deviation, and such) - - - // TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ? - std::vector *pixels; - - //! probability that the ER belongs to the class we are looking for - double probability; - - //! pointers preserving the tree structure of the component tree - ERStat* parent; - ERStat* child; - ERStat* next; - ERStat* prev; - - //! wenever the regions is a local maxima of the probability - bool local_maxima; - ERStat* max_probability_ancestor; - ERStat* min_probability_ancestor; -}; - -/*! - Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithms - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 - - Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. -*/ -class CV_EXPORTS ERFilter : public Algorithm -{ -public: - - //! callback with the classifier is made a class. By doing it we hide SVM, Boost etc. - class CV_EXPORTS Callback - { - public: - virtual ~Callback() { } - //! The classifier must return probability measure for the region. - virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here? - }; - - /*! - the key method. Takes image on input and returns the selected regions in a vector of ERStat - only distinctive ERs which correspond to characters are selected by a sequential classifier - \param image is the input image - \param regions is output for the first stage, input/output for the second one. - */ - virtual void run( InputArray image, std::vector& regions ) = 0; - - - //! set/get methods to set the algorithm properties, - virtual void setCallback(const Ptr& cb) = 0; - virtual void setThresholdDelta(int thresholdDelta) = 0; - virtual void setMinArea(float minArea) = 0; - virtual void setMaxArea(float maxArea) = 0; - virtual void setMinProbability(float minProbability) = 0; - virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0; - virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0; - virtual int getNumRejected() = 0; -}; - - -/*! - Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 - - The component tree of the image is extracted by a threshold increased step by step - from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness, - number of holes, and number of horizontal crossings) are computed for each ER - and used as features for a classifier which estimates the class-conditional - probability P(er|character). The value of P(er|character) is tracked using the inclusion - relation of ER across all thresholds and only the ERs which correspond to local maximum - of the probability P(er|character) are selected (if the local maximum of the - probability is above a global limit pmin and the difference between local maximum and - local minimum is greater than minProbabilityDiff). - - \param cb Callback with the classifier. - default classifier can be implicitly load with function loadClassifierNM1() - from file in samples/cpp/trained_classifierNM1.xml - \param thresholdDelta Threshold step in subsequent thresholds when extracting the component tree - \param minArea The minimum area (% of image size) allowed for retreived ER's - \param minArea The maximum area (% of image size) allowed for retreived ER's - \param minProbability The minimum probability P(er|character) allowed for retreived ER's - \param nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities - \param minProbability The minimum probability difference between local maxima and local minima ERs -*/ -CV_EXPORTS Ptr createERFilterNM1(const Ptr& cb, - int thresholdDelta = 1, float minArea = 0.00025, - float maxArea = 0.13, float minProbability = 0.4, - bool nonMaxSuppression = true, - float minProbabilityDiff = 0.1); - -/*! - Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 - - In the second stage, the ERs that passed the first stage are classified into character - and non-character classes using more informative but also more computationally expensive - features. The classifier uses all the features calculated in the first stage and the following - additional features: hole area ratio, convex hull ratio, and number of outer inflexion points. - - \param cb Callback with the classifier - default classifier can be implicitly load with function loadClassifierNM2() - from file in samples/cpp/trained_classifierNM2.xml - \param minProbability The minimum probability P(er|character) allowed for retreived ER's -*/ -CV_EXPORTS Ptr createERFilterNM2(const Ptr& cb, - float minProbability = 0.3); - - -/*! - Allow to implicitly load the default classifier when creating an ERFilter object. - The function takes as parameter the XML or YAML file with the classifier model - (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback. -*/ - -CV_EXPORTS Ptr loadClassifierNM1(const std::string& filename); - -/*! - Allow to implicitly load the default classifier when creating an ERFilter object. - The function takes as parameter the XML or YAML file with the classifier model - (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback. -*/ - -CV_EXPORTS Ptr loadClassifierNM2(const std::string& filename); - - -// computeNMChannels operation modes -enum { ERFILTER_NM_RGBLGrad = 0, - ERFILTER_NM_IHSGrad = 1 - }; - -/*! - Compute the different channels to be processed independently in the N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 - - In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient - magnitude channels (Grad) are used in order to obtain high localization recall. - This implementation also provides an alternative combination of red (R), green (G), blue (B), - lightness (L), and gradient magnitude (Grad). - - \param _src Source image. Must be RGB CV_8UC3. - \param _channels Output vector where computed channels are stored. - \param _mode Mode of operation. Currently the only available options are - ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad. - -*/ -CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); - - -/*! - Find groups of Extremal Regions that are organized as text blocks. This function implements - the grouping algorithm described in: - Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013. - Notice that this implementation constrains the results to horizontally-aligned text and - latin script (since ERFilter classifiers are trained only for latin script detection). - - The algorithm combines two different clustering techniques in a single parameter-free procedure - to detect groups of regions organized as text. The maximally meaningful groups are fist detected - in several feature spaces, where each feature space is a combination of proximity information - (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), - thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to - combine all these hypotheses to get the final estimate. Each of the resulting groups are finally - validated using a classifier in order to assest if they form a valid horizontally-aligned text block. - - \param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted. - \param regions Vector of ER's retreived from the ERFilter algorithm from each channel - \param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml) - \param minProbability The minimum probability for accepting a group - \param groups The output of the algorithm are stored in this parameter as list of rectangles. -*/ -CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector > ®ions, - const std::string& filename, float minProbablity, - std::vector &groups); - -} -#endif // _OPENCV_ERFILTER_HPP_ diff --git a/modules/objdetect/include/opencv2/objdetect/linemod.hpp b/modules/objdetect/include/opencv2/objdetect/linemod.hpp deleted file mode 100644 index 46d869926f..0000000000 --- a/modules/objdetect/include/opencv2/objdetect/linemod.hpp +++ /dev/null @@ -1,455 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OBJDETECT_LINEMOD_HPP__ -#define __OPENCV_OBJDETECT_LINEMOD_HPP__ - -#include "opencv2/core.hpp" -#include - -/****************************************************************************************\ -* LINE-MOD * -\****************************************************************************************/ - -namespace cv { -namespace linemod { - -/// @todo Convert doxy comments to rst - -/** - * \brief Discriminant feature described by its location and label. - */ -struct CV_EXPORTS Feature -{ - int x; ///< x offset - int y; ///< y offset - int label; ///< Quantization - - Feature() : x(0), y(0), label(0) {} - Feature(int x, int y, int label); - - void read(const FileNode& fn); - void write(FileStorage& fs) const; -}; - -inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {} - -struct CV_EXPORTS Template -{ - int width; - int height; - int pyramid_level; - std::vector features; - - void read(const FileNode& fn); - void write(FileStorage& fs) const; -}; - -/** - * \brief Represents a modality operating over an image pyramid. - */ -class QuantizedPyramid -{ -public: - // Virtual destructor - virtual ~QuantizedPyramid() {} - - /** - * \brief Compute quantized image at current pyramid level for online detection. - * - * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set, - * representing its classification. - */ - virtual void quantize(Mat& dst) const =0; - - /** - * \brief Extract most discriminant features at current pyramid level to form a new template. - * - * \param[out] templ The new template. - */ - virtual bool extractTemplate(Template& templ) const =0; - - /** - * \brief Go to the next pyramid level. - * - * \todo Allow pyramid scale factor other than 2 - */ - virtual void pyrDown() =0; - -protected: - /// Candidate feature with a score - struct Candidate - { - Candidate(int x, int y, int label, float score); - - /// Sort candidates with high score to the front - bool operator<(const Candidate& rhs) const - { - return score > rhs.score; - } - - Feature f; - float score; - }; - - /** - * \brief Choose candidate features so that they are not bunched together. - * - * \param[in] candidates Candidate features sorted by score. - * \param[out] features Destination vector of selected features. - * \param[in] num_features Number of candidates to select. - * \param[in] distance Hint for desired distance between features. - */ - static void selectScatteredFeatures(const std::vector& candidates, - std::vector& features, - size_t num_features, float distance); -}; - -inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {} - -/** - * \brief Interface for modalities that plug into the LINE template matching representation. - * - * \todo Max response, to allow optimization of summing (255/MAX) features as uint8 - */ -class CV_EXPORTS Modality -{ -public: - // Virtual destructor - virtual ~Modality() {} - - /** - * \brief Form a quantized image pyramid from a source image. - * - * \param[in] src The source image. Type depends on the modality. - * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero - * in quantized image and cannot be extracted as features. - */ - Ptr process(const Mat& src, - const Mat& mask = Mat()) const - { - return processImpl(src, mask); - } - - virtual String name() const =0; - - virtual void read(const FileNode& fn) =0; - virtual void write(FileStorage& fs) const =0; - - /** - * \brief Create modality by name. - * - * The following modality types are supported: - * - "ColorGradient" - * - "DepthNormal" - */ - static Ptr create(const String& modality_type); - - /** - * \brief Load a modality from file. - */ - static Ptr create(const FileNode& fn); - -protected: - // Indirection is because process() has a default parameter. - virtual Ptr processImpl(const Mat& src, - const Mat& mask) const =0; -}; - -/** - * \brief Modality that computes quantized gradient orientations from a color image. - */ -class CV_EXPORTS ColorGradient : public Modality -{ -public: - /** - * \brief Default constructor. Uses reasonable default parameter values. - */ - ColorGradient(); - - /** - * \brief Constructor. - * - * \param weak_threshold When quantizing, discard gradients with magnitude less than this. - * \param num_features How many features a template must contain. - * \param strong_threshold Consider as candidate features only gradients whose norms are - * larger than this. - */ - ColorGradient(float weak_threshold, size_t num_features, float strong_threshold); - - virtual String name() const; - - virtual void read(const FileNode& fn); - virtual void write(FileStorage& fs) const; - - float weak_threshold; - size_t num_features; - float strong_threshold; - -protected: - virtual Ptr processImpl(const Mat& src, - const Mat& mask) const; -}; - -/** - * \brief Modality that computes quantized surface normals from a dense depth map. - */ -class CV_EXPORTS DepthNormal : public Modality -{ -public: - /** - * \brief Default constructor. Uses reasonable default parameter values. - */ - DepthNormal(); - - /** - * \brief Constructor. - * - * \param distance_threshold Ignore pixels beyond this distance. - * \param difference_threshold When computing normals, ignore contributions of pixels whose - * depth difference with the central pixel is above this threshold. - * \param num_features How many features a template must contain. - * \param extract_threshold Consider as candidate feature only if there are no differing - * orientations within a distance of extract_threshold. - */ - DepthNormal(int distance_threshold, int difference_threshold, size_t num_features, - int extract_threshold); - - virtual String name() const; - - virtual void read(const FileNode& fn); - virtual void write(FileStorage& fs) const; - - int distance_threshold; - int difference_threshold; - size_t num_features; - int extract_threshold; - -protected: - virtual Ptr processImpl(const Mat& src, - const Mat& mask) const; -}; - -/** - * \brief Debug function to colormap a quantized image for viewing. - */ -void colormap(const Mat& quantized, Mat& dst); - -/** - * \brief Represents a successful template match. - */ -struct CV_EXPORTS Match -{ - Match() - { - } - - Match(int x, int y, float similarity, const String& class_id, int template_id); - - /// Sort matches with high similarity to the front - bool operator<(const Match& rhs) const - { - // Secondarily sort on template_id for the sake of duplicate removal - if (similarity != rhs.similarity) - return similarity > rhs.similarity; - else - return template_id < rhs.template_id; - } - - bool operator==(const Match& rhs) const - { - return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id; - } - - int x; - int y; - float similarity; - String class_id; - int template_id; -}; - -inline -Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id) - : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id) -{} - -/** - * \brief Object detector using the LINE template matching algorithm with any set of - * modalities. - */ -class CV_EXPORTS Detector -{ -public: - /** - * \brief Empty constructor, initialize with read(). - */ - Detector(); - - /** - * \brief Constructor. - * - * \param modalities Modalities to use (color gradients, depth normals, ...). - * \param T_pyramid Value of the sampling step T at each pyramid level. The - * number of pyramid levels is T_pyramid.size(). - */ - Detector(const std::vector< Ptr >& modalities, const std::vector& T_pyramid); - - /** - * \brief Detect objects by template matching. - * - * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid. - * - * \param sources Source images, one for each modality. - * \param threshold Similarity threshold, a percentage between 0 and 100. - * \param[out] matches Template matches, sorted by similarity score. - * \param class_ids If non-empty, only search for the desired object classes. - * \param[out] quantized_images Optionally return vector of quantized images. - * \param masks The masks for consideration during matching. The masks should be CV_8UC1 - * where 255 represents a valid pixel. If non-empty, the vector must be - * the same size as sources. Each element must be - * empty or the same size as its corresponding source. - */ - void match(const std::vector& sources, float threshold, std::vector& matches, - const std::vector& class_ids = std::vector(), - OutputArrayOfArrays quantized_images = noArray(), - const std::vector& masks = std::vector()) const; - - /** - * \brief Add new object template. - * - * \param sources Source images, one for each modality. - * \param class_id Object class ID. - * \param object_mask Mask separating object from background. - * \param[out] bounding_box Optionally return bounding box of the extracted features. - * - * \return Template ID, or -1 if failed to extract a valid template. - */ - int addTemplate(const std::vector& sources, const String& class_id, - const Mat& object_mask, Rect* bounding_box = NULL); - - /** - * \brief Add a new object template computed by external means. - */ - int addSyntheticTemplate(const std::vector