diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index ecea028c79..1fb901d257 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -60,4 +60,3 @@ foreach(sample_filename ${all_samples}) ocv_target_link_libraries(${tgt} opencv_cudacodec) endif() endforeach() -include("performance/CMakeLists.txt" OPTIONAL) diff --git a/samples/gpu/performance/CMakeLists.txt b/samples/gpu/performance/CMakeLists.txt deleted file mode 100644 index 641ea6e1b1..0000000000 --- a/samples/gpu/performance/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -set(the_target "example_gpu_performance") - -file(GLOB sources "performance/*.cpp") -file(GLOB headers "performance/*.h") - -if(HAVE_opencv_xfeatures2d) - ocv_include_modules_recurse(opencv_xfeatures2d) -endif() - -if(HAVE_opencv_bgsegm) - ocv_include_modules_recurse(opencv_bgsegm) -endif() - -add_executable(${the_target} ${sources} ${headers}) -ocv_target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) - -if(HAVE_opencv_xfeatures2d) - ocv_target_link_libraries(${the_target} opencv_xfeatures2d) -endif() - -if(HAVE_opencv_bgsegm) - ocv_target_link_libraries(${the_target} opencv_bgsegm) -endif() - -set_target_properties(${the_target} PROPERTIES - OUTPUT_NAME "performance_gpu" - PROJECT_LABEL "(EXAMPLE_CUDA) performance") - -if(ENABLE_SOLUTION_FOLDERS) - set_target_properties(${the_target} PROPERTIES FOLDER "samples//gpu") -endif() - -if(WIN32) - install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/gpu" COMPONENT samples) -endif() - -ocv_install_example_src("gpu/performance" performance/*.cpp performance/*.h) diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp deleted file mode 100644 index cef979954c..0000000000 --- a/samples/gpu/performance/performance.cpp +++ /dev/null @@ -1,227 +0,0 @@ -#include -#include -#include -#include "performance.h" -#include "opencv2/core/cuda.hpp" - -using namespace std; -using namespace cv; -using namespace cv::cuda; - -void TestSystem::run() -{ - if (is_list_mode_) - { - for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) - cout << (*it)->name() << endl; - - return; - } - - // Run test initializers - for (vector::iterator it = inits_.begin(); it != inits_.end(); ++it) - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - (*it)->run(); - } - - printHeading(); - - // Run tests - for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) - { - try - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - { - cout << endl << (*it)->name() << ":\n"; - (*it)->run(); - finishCurrentSubtest(); - } - } - catch (const Exception&) - { - // Message is printed via callback - resetCurrentSubtest(); - } - catch (const runtime_error& e) - { - printError(e.what()); - resetCurrentSubtest(); - } - } - - printSummary(); -} - - -void TestSystem::finishCurrentSubtest() -{ - if (cur_subtest_is_empty_) - // There is no need to print subtest statistics - return; - - double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; - double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; - - double speedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); - speedup_total_ += speedup; - - printMetrics(cpu_time, gpu_time, speedup); - - num_subtests_called_++; - resetCurrentSubtest(); -} - - -double TestSystem::meanTime(const vector &samples) -{ - double sum = accumulate(samples.begin(), samples.end(), 0.); - if (samples.size() > 1) - return (sum - samples[0]) / (samples.size() - 1); - return sum; -} - - -void TestSystem::printHeading() -{ - cout << endl; - cout << setiosflags(ios_base::left); - cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(14) << "SPEEDUP" - << "DESCRIPTION\n"; - cout << resetiosflags(ios_base::left); -} - - -void TestSystem::printSummary() -{ - cout << setiosflags(ios_base::fixed); - cout << "\naverage GPU speedup: x" - << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) - << endl; - cout << resetiosflags(ios_base::fixed); -} - - -void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup) -{ - cout << TAB << setiosflags(ios_base::left); - stringstream stream; - - stream << cpu_time; - cout << setw(10) << stream.str(); - - stream.str(""); - stream << gpu_time; - cout << setw(10) << stream.str(); - - stream.str(""); - stream << "x" << setprecision(3) << speedup; - cout << setw(14) << stream.str(); - - cout << cur_subtest_description_.str(); - cout << resetiosflags(ios_base::left) << endl; -} - - -void TestSystem::printError(const std::string& msg) -{ - cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; -} - - -void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high) -{ - mat.create(rows, cols, type); - RNG rng(0); - rng.fill(mat, RNG::UNIFORM, low, high); -} - - -string abspath(const string& relpath) -{ - return TestSystem::instance().workingDir() + relpath; -} - - -static int cvErrorCallback(int /*status*/, const char* /*func_name*/, - const char* err_msg, const char* /*file_name*/, - int /*line*/, void* /*userdata*/) -{ - TestSystem::instance().printError(err_msg); - return 0; -} - - -int main(int argc, const char* argv[]) -{ - int num_devices = getCudaEnabledDeviceCount(); - if (num_devices == 0) - { - cerr << "No GPU found or the library was compiled without CUDA support"; - return -1; - } - - redirectError(cvErrorCallback); - - const char* keys = - "{ h help | | print help message }" - "{ f filter | | filter for test }" - "{ w workdir | | set working directory }" - "{ l list | | show all tests }" - "{ d device | 0 | device id }" - "{ i iters | 10 | iteration count }"; - - CommandLineParser cmd(argc, argv, keys); - - if (cmd.has("help") || !cmd.check()) - { - cmd.printMessage(); - cmd.printErrors(); - return 0; - } - - - int device = cmd.get("device"); - if (device < 0 || device >= num_devices) - { - cerr << "Invalid device ID" << endl; - return -1; - } - DeviceInfo dev_info(device); - if (!dev_info.isCompatible()) - { - cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl; - return -1; - } - setDevice(device); - printShortCudaDeviceInfo(device); - - string filter = cmd.get("filter"); - string workdir = cmd.get("workdir"); - bool list = cmd.has("list"); - int iters = cmd.get("iters"); - - if (!filter.empty()) - TestSystem::instance().setTestFilter(filter); - - if (!workdir.empty()) - { - if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') - workdir += '/'; - - TestSystem::instance().setWorkingDir(workdir); - } - - if (list) - TestSystem::instance().setListMode(true); - - TestSystem::instance().setNumIters(iters); - - cout << "\nNote: the timings for GPU don't include data transfer" << endl; - - TestSystem::instance().run(); - - return 0; -} diff --git a/samples/gpu/performance/performance.h b/samples/gpu/performance/performance.h deleted file mode 100644 index 98889f3422..0000000000 --- a/samples/gpu/performance/performance.h +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef OPENCV_CUDA_SAMPLE_PERFORMANCE_H_ -#define OPENCV_CUDA_SAMPLE_PERFORMANCE_H_ - -#include -#include -#include -#include -#include -#include - -#define TAB " " - -class Runnable -{ -public: - explicit Runnable(const std::string& nameStr): name_(nameStr) {} - virtual ~Runnable() {} - - const std::string& name() const { return name_; } - - virtual void run() = 0; - -private: - std::string name_; -}; - - -class TestSystem -{ -public: - static TestSystem& instance() - { - static TestSystem me; - return me; - } - - void setWorkingDir(const std::string& val) { working_dir_ = val; } - const std::string& workingDir() const { return working_dir_; } - - void setTestFilter(const std::string& val) { test_filter_ = val; } - const std::string& testFilter() const { return test_filter_; } - - void setNumIters(int num_iters) { num_iters_ = num_iters; } - - void addInit(Runnable* init) { inits_.push_back(init); } - void addTest(Runnable* test) { tests_.push_back(test); } - void run(); - - // It's public because OpenCV callback uses it - void printError(const std::string& msg); - - std::stringstream& startNewSubtest() - { - finishCurrentSubtest(); - return cur_subtest_description_; - } - - bool stop() const { return cur_iter_idx_ >= num_iters_; } - - void cpuOn() { cpu_started_ = cv::getTickCount(); } - void cpuOff() - { - int64 delta = cv::getTickCount() - cpu_started_; - cpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void cpuComplete() - { - cpu_elapsed_ += meanTime(cpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - void gpuOn() { gpu_started_ = cv::getTickCount(); } - void gpuOff() - { - int64 delta = cv::getTickCount() - gpu_started_; - gpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void gpuComplete() - { - gpu_elapsed_ += meanTime(gpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - bool isListMode() const { return is_list_mode_; } - void setListMode(bool value) { is_list_mode_ = value; } - -private: - TestSystem(): - cur_subtest_is_empty_(true), cpu_elapsed_(0), - gpu_elapsed_(0), speedup_total_(0.0), - num_subtests_called_(0), is_list_mode_(false), - num_iters_(10), cur_iter_idx_(0) - { - cpu_times_.reserve(num_iters_); - gpu_times_.reserve(num_iters_); - } - - void finishCurrentSubtest(); - void resetCurrentSubtest() - { - cpu_elapsed_ = 0; - gpu_elapsed_ = 0; - cur_subtest_description_.str(""); - cur_subtest_is_empty_ = true; - cur_iter_idx_ = 0; - cpu_times_.clear(); - gpu_times_.clear(); - } - - double meanTime(const std::vector &samples); - - void printHeading(); - void printSummary(); - void printMetrics(double cpu_time, double gpu_time, double speedup); - - std::string working_dir_; - std::string test_filter_; - - std::vector inits_; - std::vector tests_; - - std::stringstream cur_subtest_description_; - bool cur_subtest_is_empty_; - - int64 cpu_started_; - int64 gpu_started_; - double cpu_elapsed_; - double gpu_elapsed_; - - double speedup_total_; - int num_subtests_called_; - - bool is_list_mode_; - - int num_iters_; - int cur_iter_idx_; - std::vector cpu_times_; - std::vector gpu_times_; -}; - - -#define GLOBAL_INIT(name) \ - struct name##_init: Runnable { \ - name##_init(): Runnable(#name) { \ - TestSystem::instance().addInit(this); \ - } \ - void run(); \ - } name##_init_instance; \ - void name##_init::run() - - -#define TEST(name) \ - struct name##_test: Runnable { \ - name##_test(): Runnable(#name) { \ - TestSystem::instance().addTest(this); \ - } \ - void run(); \ - } name##_test_instance; \ - void name##_test::run() - -#define SUBTEST TestSystem::instance().startNewSubtest() - -#define CPU_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().cpuOn() -#define CPU_OFF \ - TestSystem::instance().cpuOff(); \ - } TestSystem::instance().cpuComplete() - -#define CUDA_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().gpuOn() -#define CUDA_OFF \ - TestSystem::instance().gpuOff(); \ - } TestSystem::instance().gpuComplete() - -// Generates a matrix -void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low, - cv::Scalar high); - -// Returns abs path taking into account test system working dir -std::string abspath(const std::string& relpath); - -#endif // OPENCV_CUDA_SAMPLE_PERFORMANCE_H_ diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp deleted file mode 100644 index aad40bf09f..0000000000 --- a/samples/gpu/performance/tests.cpp +++ /dev/null @@ -1,1300 +0,0 @@ -#include -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/calib3d.hpp" -#include "opencv2/video.hpp" -#include "opencv2/cudalegacy.hpp" -#include "opencv2/cudaimgproc.hpp" -#include "opencv2/cudaarithm.hpp" -#include "opencv2/cudawarping.hpp" -#include "opencv2/cudafeatures2d.hpp" -#include "opencv2/cudafilters.hpp" -#include "opencv2/cudaoptflow.hpp" -#include "opencv2/cudabgsegm.hpp" - -#include "performance.h" - -#include "opencv2/opencv_modules.hpp" - -#ifdef HAVE_OPENCV_XFEATURES2D -#include "opencv2/xfeatures2d/cuda.hpp" -#include "opencv2/xfeatures2d/nonfree.hpp" -#endif - -#ifdef HAVE_OPENCV_BGSEGM -#include "opencv2/bgsegm.hpp" -#endif - -using namespace std; -using namespace cv; - - -TEST(matchTemplate) -{ - Mat src, templ, dst; - gen(src, 3000, 3000, CV_32F, 0, 1); - - cuda::GpuMat d_src(src), d_templ, d_dst; - - Ptr alg = cuda::createTemplateMatching(src.type(), TM_CCORR); - - for (int templ_size = 5; templ_size < 200; templ_size *= 5) - { - SUBTEST << src.cols << 'x' << src.rows << ", 32FC1" << ", templ " << templ_size << 'x' << templ_size << ", CCORR"; - - gen(templ, templ_size, templ_size, CV_32F, 0, 1); - matchTemplate(src, templ, dst, TM_CCORR); - - CPU_ON; - matchTemplate(src, templ, dst, TM_CCORR); - CPU_OFF; - - d_templ.upload(templ); - alg->match(d_src, d_templ, d_dst); - - CUDA_ON; - alg->match(d_src, d_templ, d_dst); - CUDA_OFF; - } -} - - -TEST(minMaxLoc) -{ - Mat src; - cuda::GpuMat d_src; - - double min_val, max_val; - Point min_loc, max_loc; - - for (int size = 2000; size <= 8000; size *= 2) - { - SUBTEST << size << 'x' << size << ", 32F"; - - gen(src, size, size, CV_32F, 0, 1); - - CPU_ON; - minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); - CPU_OFF; - - d_src.upload(src); - - CUDA_ON; - cuda::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - CUDA_OFF; - } -} - - -TEST(remap) -{ - Mat src, dst, xmap, ymap; - cuda::GpuMat d_src, d_dst, d_xmap, d_ymap; - - int interpolation = INTER_LINEAR; - int borderMode = BORDER_REPLICATE; - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << ", 8UC4, INTER_LINEAR, BORDER_REPLICATE"; - - gen(src, size, size, CV_8UC4, 0, 256); - - xmap.create(size, size, CV_32F); - ymap.create(size, size, CV_32F); - for (int i = 0; i < size; ++i) - { - float* xmap_row = xmap.ptr(i); - float* ymap_row = ymap.ptr(i); - for (int j = 0; j < size; ++j) - { - xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; - ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; - } - } - - remap(src, dst, xmap, ymap, interpolation, borderMode); - - CPU_ON; - remap(src, dst, xmap, ymap, interpolation, borderMode); - CPU_OFF; - - d_src.upload(src); - d_xmap.upload(xmap); - d_ymap.upload(ymap); - - cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - - CUDA_ON; - cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - CUDA_OFF; - } -} - - -TEST(dft) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << ", 32FC2, complex-to-complex"; - - gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1)); - - dft(src, dst); - - CPU_ON; - dft(src, dst); - CPU_OFF; - - d_src.upload(src); - - cuda::dft(d_src, d_dst, Size(size, size)); - - CUDA_ON; - cuda::dft(d_src, d_dst, Size(size, size)); - CUDA_OFF; - } -} - - -TEST(cornerHarris) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << ", 32FC1, BORDER_REFLECT101"; - - gen(src, size, size, CV_32F, 0, 1); - - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101); - - CPU_ON; - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101); - CPU_OFF; - - d_src.upload(src); - - Ptr harris = cuda::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101); - - harris->compute(d_src, d_dst); - - CUDA_ON; - harris->compute(d_src, d_dst); - CUDA_OFF; - } -} - - -TEST(integral) -{ - Mat src, sum; - cuda::GpuMat d_src, d_sum; - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << ", 8UC1"; - - gen(src, size, size, CV_8U, 0, 256); - - integral(src, sum); - - CPU_ON; - integral(src, sum); - CPU_OFF; - - d_src.upload(src); - - cuda::integral(d_src, d_sum); - - CUDA_ON; - cuda::integral(d_src, d_sum); - CUDA_OFF; - } -} - - -TEST(norm) -{ - Mat src; - cuda::GpuMat d_src, d_buf; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32FC4, NORM_INF"; - - gen(src, size, size, CV_32FC4, Scalar::all(0), Scalar::all(1)); - - norm(src, NORM_INF); - - CPU_ON; - norm(src, NORM_INF); - CPU_OFF; - - d_src.upload(src); - - cuda::norm(d_src, NORM_INF, d_buf); - - CUDA_ON; - cuda::norm(d_src, NORM_INF, d_buf); - CUDA_OFF; - } -} - - -TEST(meanShift) -{ - int sp = 10, sr = 10; - - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 400; size <= 800; size *= 2) - { - SUBTEST << size << 'x' << size << ", 8UC3 vs 8UC4"; - - gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); - - pyrMeanShiftFiltering(src, dst, sp, sr); - - CPU_ON; - pyrMeanShiftFiltering(src, dst, sp, sr); - CPU_OFF; - - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - - d_src.upload(src); - - cuda::meanShiftFiltering(d_src, d_dst, sp, sr); - - CUDA_ON; - cuda::meanShiftFiltering(d_src, d_dst, sp, sr); - CUDA_OFF; - } -} - -#ifdef HAVE_OPENCV_XFEATURES2D - -TEST(SURF) -{ - Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE); - if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg"); - - Ptr surf = xfeatures2d::SURF::create(); - vector keypoints; - Mat descriptors; - - surf->detectAndCompute(src, Mat(), keypoints, descriptors); - - CPU_ON; - surf->detectAndCompute(src, Mat(), keypoints, descriptors); - CPU_OFF; - - cuda::SURF_CUDA d_surf; - cuda::GpuMat d_src(src); - cuda::GpuMat d_keypoints; - cuda::GpuMat d_descriptors; - - d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); - - CUDA_ON; - d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); - CUDA_OFF; -} - -#endif - - -TEST(FAST) -{ - Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE); - if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg"); - - vector keypoints; - - FAST(src, keypoints, 20); - - CPU_ON; - FAST(src, keypoints, 20); - CPU_OFF; - - cv::Ptr d_FAST = cv::cuda::FastFeatureDetector::create(20); - cuda::GpuMat d_src(src); - cuda::GpuMat d_keypoints; - - d_FAST->detectAsync(d_src, d_keypoints); - - CUDA_ON; - d_FAST->detectAsync(d_src, d_keypoints); - CUDA_OFF; -} - - -TEST(ORB) -{ - Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE); - if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg"); - - Ptr orb = ORB::create(4000); - - vector keypoints; - Mat descriptors; - - orb->detectAndCompute(src, Mat(), keypoints, descriptors); - - CPU_ON; - orb->detectAndCompute(src, Mat(), keypoints, descriptors); - CPU_OFF; - - Ptr d_orb = cuda::ORB::create(); - cuda::GpuMat d_src(src); - cuda::GpuMat d_keypoints; - cuda::GpuMat d_descriptors; - - d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); - - CUDA_ON; - d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); - CUDA_OFF; -} - - -TEST(BruteForceMatcher) -{ - // Init CPU matcher - - int desc_len = 64; - - BFMatcher matcher(NORM_L2); - - Mat query; - gen(query, 3000, desc_len, CV_32F, 0, 1); - - Mat train; - gen(train, 3000, desc_len, CV_32F, 0, 1); - - // Init CUDA matcher - - Ptr d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2); - - cuda::GpuMat d_query(query); - cuda::GpuMat d_train(train); - - // Output - vector< vector > matches(2); - cuda::GpuMat d_matches; - - SUBTEST << "match"; - - matcher.match(query, train, matches[0]); - - CPU_ON; - matcher.match(query, train, matches[0]); - CPU_OFF; - - d_matcher->matchAsync(d_query, d_train, d_matches); - - CUDA_ON; - d_matcher->matchAsync(d_query, d_train, d_matches); - CUDA_OFF; - - SUBTEST << "knnMatch"; - - matcher.knnMatch(query, train, matches, 2); - - CPU_ON; - matcher.knnMatch(query, train, matches, 2); - CPU_OFF; - - d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); - - CUDA_ON; - d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); - CUDA_OFF; - - SUBTEST << "radiusMatch"; - - float max_distance = 2.0f; - - matcher.radiusMatch(query, train, matches, max_distance); - - CPU_ON; - matcher.radiusMatch(query, train, matches, max_distance); - CPU_OFF; - - d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); - - CUDA_ON; - d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); - CUDA_OFF; -} - - -TEST(magnitude) -{ - Mat x, y, mag; - cuda::GpuMat d_x, d_y, d_mag; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32FC1"; - - gen(x, size, size, CV_32F, 0, 1); - gen(y, size, size, CV_32F, 0, 1); - - magnitude(x, y, mag); - - CPU_ON; - magnitude(x, y, mag); - CPU_OFF; - - d_x.upload(x); - d_y.upload(y); - - cuda::magnitude(d_x, d_y, d_mag); - - CUDA_ON; - cuda::magnitude(d_x, d_y, d_mag); - CUDA_OFF; - } -} - - -TEST(add) -{ - Mat src1, src2, dst; - cuda::GpuMat d_src1, d_src2, d_dst; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32FC1"; - - gen(src1, size, size, CV_32F, 0, 1); - gen(src2, size, size, CV_32F, 0, 1); - - add(src1, src2, dst); - - CPU_ON; - add(src1, src2, dst); - CPU_OFF; - - d_src1.upload(src1); - d_src2.upload(src2); - - cuda::add(d_src1, d_src2, d_dst); - - CUDA_ON; - cuda::add(d_src1, d_src2, d_dst); - CUDA_OFF; - } -} - - -TEST(log) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32F"; - - gen(src, size, size, CV_32F, 1, 10); - - log(src, dst); - - CPU_ON; - log(src, dst); - CPU_OFF; - - d_src.upload(src); - - cuda::log(d_src, d_dst); - - CUDA_ON; - cuda::log(d_src, d_dst); - CUDA_OFF; - } -} - - -TEST(mulSpectrums) -{ - Mat src1, src2, dst; - cuda::GpuMat d_src1, d_src2, d_dst; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size; - - gen(src1, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1)); - gen(src2, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1)); - - mulSpectrums(src1, src2, dst, 0, true); - - CPU_ON; - mulSpectrums(src1, src2, dst, 0, true); - CPU_OFF; - - d_src1.upload(src1); - d_src2.upload(src2); - - cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true); - - CUDA_ON; - cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true); - CUDA_OFF; - } -} - - -TEST(resize) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 1000; size <= 3000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 8UC4, up"; - - gen(src, size, size, CV_8UC4, 0, 256); - - resize(src, dst, Size(), 2.0, 2.0); - - CPU_ON; - resize(src, dst, Size(), 2.0, 2.0); - CPU_OFF; - - d_src.upload(src); - - cuda::resize(d_src, d_dst, Size(), 2.0, 2.0); - - CUDA_ON; - cuda::resize(d_src, d_dst, Size(), 2.0, 2.0); - CUDA_OFF; - } - - for (int size = 1000; size <= 3000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 8UC4, down"; - - gen(src, size, size, CV_8UC4, 0, 256); - - resize(src, dst, Size(), 0.5, 0.5); - - CPU_ON; - resize(src, dst, Size(), 0.5, 0.5); - CPU_OFF; - - d_src.upload(src); - - cuda::resize(d_src, d_dst, Size(), 0.5, 0.5); - - CUDA_ON; - cuda::resize(d_src, d_dst, Size(), 0.5, 0.5); - CUDA_OFF; - } -} - - -TEST(cvtColor) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - gen(src, 4000, 4000, CV_8UC1, 0, 255); - d_src.upload(src); - - SUBTEST << "4000x4000, 8UC1, COLOR_GRAY2BGRA"; - - cvtColor(src, dst, COLOR_GRAY2BGRA, 4); - - CPU_ON; - cvtColor(src, dst, COLOR_GRAY2BGRA, 4); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2YCrCb"; - - cvtColor(src, dst, COLOR_BGR2YCrCb); - - CPU_ON; - cvtColor(src, dst, COLOR_BGR2YCrCb); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC4, COLOR_YCrCb2BGR"; - - cvtColor(src, dst, COLOR_YCrCb2BGR, 4); - - CPU_ON; - cvtColor(src, dst, COLOR_YCrCb2BGR, 4); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2XYZ"; - - cvtColor(src, dst, COLOR_BGR2XYZ); - - CPU_ON; - cvtColor(src, dst, COLOR_BGR2XYZ); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC4, COLOR_XYZ2BGR"; - - cvtColor(src, dst, COLOR_XYZ2BGR, 4); - - CPU_ON; - cvtColor(src, dst, COLOR_XYZ2BGR, 4); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2HSV"; - - cvtColor(src, dst, COLOR_BGR2HSV); - - CPU_ON; - cvtColor(src, dst, COLOR_BGR2HSV); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); - - SUBTEST << "4000x4000, 8UC4, COLOR_HSV2BGR"; - - cvtColor(src, dst, COLOR_HSV2BGR, 4); - - CPU_ON; - cvtColor(src, dst, COLOR_HSV2BGR, 4); - CPU_OFF; - - cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4); - - CUDA_ON; - cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4); - CUDA_OFF; - - cv::swap(src, dst); - d_src.swap(d_dst); -} - - -TEST(erode) -{ - Mat src, dst, ker; - cuda::GpuMat d_src, d_buf, d_dst; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size; - - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - ker = getStructuringElement(MORPH_RECT, Size(3, 3)); - - erode(src, dst, ker); - - CPU_ON; - erode(src, dst, ker); - CPU_OFF; - - d_src.upload(src); - - Ptr erode = cuda::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker); - - erode->apply(d_src, d_dst); - - CUDA_ON; - erode->apply(d_src, d_dst); - CUDA_OFF; - } -} - -TEST(threshold) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 8UC1, THRESH_BINARY"; - - gen(src, size, size, CV_8U, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - CPU_OFF; - - d_src.upload(src); - - cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - - CUDA_ON; - cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - CUDA_OFF; - } - - for (int size = 2000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32FC1, THRESH_TRUNC [NPP]"; - - gen(src, size, size, CV_32FC1, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - CPU_OFF; - - d_src.upload(src); - - cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - - CUDA_ON; - cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - CUDA_OFF; - } -} - -TEST(pow) -{ - Mat src, dst; - cuda::GpuMat d_src, d_dst; - - for (int size = 1000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 32F"; - - gen(src, size, size, CV_32F, 0, 100); - - pow(src, -2.0, dst); - - CPU_ON; - pow(src, -2.0, dst); - CPU_OFF; - - d_src.upload(src); - - cuda::pow(d_src, -2.0, d_dst); - - CUDA_ON; - cuda::pow(d_src, -2.0, d_dst); - CUDA_OFF; - } -} - - -TEST(projectPoints) -{ - Mat src; - vector dst; - cuda::GpuMat d_src, d_dst; - - Mat rvec; gen(rvec, 1, 3, CV_32F, 0, 1); - Mat tvec; gen(tvec, 1, 3, CV_32F, 0, 1); - Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0, 1); - camera_mat.at(0, 1) = 0.f; - camera_mat.at(1, 0) = 0.f; - camera_mat.at(2, 0) = 0.f; - camera_mat.at(2, 1) = 0.f; - - for (int size = (int)1e6, count = 0; size >= 1e5 && count < 5; size = int(size / 1.4), count++) - { - SUBTEST << size; - - gen(src, 1, size, CV_32FC3, Scalar::all(0), Scalar::all(10)); - - projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst); - - CPU_ON; - projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst); - CPU_OFF; - - d_src.upload(src); - - cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst); - - CUDA_ON; - cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst); - CUDA_OFF; - } -} - - -static void InitSolvePnpRansac() -{ - Mat object; gen(object, 1, 4, CV_32FC3, Scalar::all(0), Scalar::all(100)); - Mat image; gen(image, 1, 4, CV_32FC2, Scalar::all(0), Scalar::all(100)); - Mat rvec, tvec; - cuda::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec); -} - - -TEST(solvePnPRansac) -{ - InitSolvePnpRansac(); - - for (int num_points = 5000; num_points <= 300000; num_points = int(num_points * 3.76)) - { - SUBTEST << num_points; - - Mat object; gen(object, 1, num_points, CV_32FC3, Scalar::all(10), Scalar::all(100)); - Mat image; gen(image, 1, num_points, CV_32FC2, Scalar::all(10), Scalar::all(100)); - Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0.5, 1); - camera_mat.at(0, 1) = 0.f; - camera_mat.at(1, 0) = 0.f; - camera_mat.at(2, 0) = 0.f; - camera_mat.at(2, 1) = 0.f; - - Mat rvec, tvec; - const int num_iters = 200; - const float max_dist = 2.0f; - vector inliers_cpu, inliers_gpu; - - CPU_ON; - solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters, - max_dist, int(num_points * 0.05), inliers_cpu); - CPU_OFF; - - CUDA_ON; - cuda::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters, - max_dist, int(num_points * 0.05), &inliers_gpu); - CUDA_OFF; - } -} - -TEST(GaussianBlur) -{ - for (int size = 1000; size <= 4000; size += 1000) - { - SUBTEST << size << 'x' << size << ", 8UC4"; - - Mat src, dst; - - gen(src, size, size, CV_8UC4, 0, 256); - - GaussianBlur(src, dst, Size(3, 3), 1); - - CPU_ON; - GaussianBlur(src, dst, Size(3, 3), 1); - CPU_OFF; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst(src.size(), src.type()); - cuda::GpuMat d_buf; - - cv::Ptr gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1); - - gauss->apply(d_src, d_dst); - - CUDA_ON; - gauss->apply(d_src, d_dst); - CUDA_OFF; - } -} - -TEST(filter2D) -{ - for (int size = 512; size <= 2048; size *= 2) - { - Mat src; - gen(src, size, size, CV_8UC4, 0, 256); - - for (int ksize = 3; ksize <= 16; ksize += 2) - { - SUBTEST << "ksize = " << ksize << ", " << size << 'x' << size << ", 8UC4"; - - Mat kernel; - gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - - Mat dst; - cv::filter2D(src, dst, -1, kernel); - - CPU_ON; - cv::filter2D(src, dst, -1, kernel); - CPU_OFF; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst; - - Ptr filter2D = cuda::createLinearFilter(d_src.type(), -1, kernel); - filter2D->apply(d_src, d_dst); - - CUDA_ON; - filter2D->apply(d_src, d_dst); - CUDA_OFF; - } - } -} - -TEST(pyrDown) -{ - for (int size = 4000; size >= 1000; size -= 1000) - { - SUBTEST << size << 'x' << size << ", 8UC4"; - - Mat src, dst; - gen(src, size, size, CV_8UC4, 0, 256); - - pyrDown(src, dst); - - CPU_ON; - pyrDown(src, dst); - CPU_OFF; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst; - - cuda::pyrDown(d_src, d_dst); - - CUDA_ON; - cuda::pyrDown(d_src, d_dst); - CUDA_OFF; - } -} - -TEST(pyrUp) -{ - for (int size = 2000; size >= 1000; size -= 1000) - { - SUBTEST << size << 'x' << size << ", 8UC4"; - - Mat src, dst; - - gen(src, size, size, CV_8UC4, 0, 256); - - pyrUp(src, dst); - - CPU_ON; - pyrUp(src, dst); - CPU_OFF; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst; - - cuda::pyrUp(d_src, d_dst); - - CUDA_ON; - cuda::pyrUp(d_src, d_dst); - CUDA_OFF; - } -} - - -TEST(equalizeHist) -{ - for (int size = 1000; size < 4000; size += 1000) - { - SUBTEST << size << 'x' << size; - - Mat src, dst; - - gen(src, size, size, CV_8UC1, 0, 256); - - equalizeHist(src, dst); - - CPU_ON; - equalizeHist(src, dst); - CPU_OFF; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst; - - cuda::equalizeHist(d_src, d_dst); - - CUDA_ON; - cuda::equalizeHist(d_src, d_dst); - CUDA_OFF; - } -} - - -TEST(Canny) -{ - Mat img = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE); - - if (img.empty()) throw runtime_error("can't open ../data/aloeL.jpg"); - - Mat edges(img.size(), CV_8UC1); - - CPU_ON; - Canny(img, edges, 50.0, 100.0); - CPU_OFF; - - cuda::GpuMat d_img(img); - cuda::GpuMat d_edges; - - Ptr canny = cuda::createCannyEdgeDetector(50.0, 100.0); - - canny->detect(d_img, d_edges); - - CUDA_ON; - canny->detect(d_img, d_edges); - CUDA_OFF; -} - - -TEST(reduce) -{ - for (int size = 1000; size < 4000; size += 1000) - { - Mat src; - gen(src, size, size, CV_32F, 0, 255); - - Mat dst0; - Mat dst1; - - cuda::GpuMat d_src(src); - cuda::GpuMat d_dst0; - cuda::GpuMat d_dst1; - - SUBTEST << size << 'x' << size << ", dim = 0"; - - reduce(src, dst0, 0, REDUCE_MIN); - - CPU_ON; - reduce(src, dst0, 0, REDUCE_MIN); - CPU_OFF; - - cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN); - - CUDA_ON; - cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN); - CUDA_OFF; - - SUBTEST << size << 'x' << size << ", dim = 1"; - - reduce(src, dst1, 1, REDUCE_MIN); - - CPU_ON; - reduce(src, dst1, 1, REDUCE_MIN); - CPU_OFF; - - cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN); - - CUDA_ON; - cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN); - CUDA_OFF; - } -} - - -TEST(gemm) -{ - Mat src1, src2, src3, dst; - cuda::GpuMat d_src1, d_src2, d_src3, d_dst; - - for (int size = 512; size <= 1024; size *= 2) - { - SUBTEST << size << 'x' << size; - - gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - - gemm(src1, src2, 1.0, src3, 1.0, dst); - - CPU_ON; - gemm(src1, src2, 1.0, src3, 1.0, dst); - CPU_OFF; - - d_src1.upload(src1); - d_src2.upload(src2); - d_src3.upload(src3); - - cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - - CUDA_ON; - cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - CUDA_OFF; - } -} - -TEST(GoodFeaturesToTrack) -{ - Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE); - if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg"); - - vector pts; - - goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0); - - CPU_ON; - goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0); - CPU_OFF; - - Ptr detector = cuda::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0); - - cuda::GpuMat d_src(src); - cuda::GpuMat d_pts; - - detector->detect(d_src, d_pts); - - CUDA_ON; - detector->detect(d_src, d_pts); - CUDA_OFF; -} - -#ifdef HAVE_OPENCV_BGSEGM - -TEST(MOG) -{ - const std::string inputFile = abspath("../data/vtest.avi"); - - cv::VideoCapture cap(inputFile); - if (!cap.isOpened()) throw runtime_error("can't open ../data/vtest.avi"); - - cv::Mat frame; - cap >> frame; - - cv::Ptr mog = cv::bgsegm::createBackgroundSubtractorMOG(); - cv::Mat foreground; - - mog->apply(frame, foreground, 0.01); - - while (!TestSystem::instance().stop()) - { - cap >> frame; - - TestSystem::instance().cpuOn(); - - mog->apply(frame, foreground, 0.01); - - TestSystem::instance().cpuOff(); - } - TestSystem::instance().cpuComplete(); - - cap.open(inputFile); - - cap >> frame; - - cv::cuda::GpuMat d_frame(frame); - cv::Ptr d_mog = cv::cuda::createBackgroundSubtractorMOG(); - cv::cuda::GpuMat d_foreground; - - d_mog->apply(d_frame, d_foreground, 0.01); - - while (!TestSystem::instance().stop()) - { - cap >> frame; - d_frame.upload(frame); - - TestSystem::instance().gpuOn(); - - d_mog->apply(d_frame, d_foreground, 0.01); - - TestSystem::instance().gpuOff(); - } - TestSystem::instance().gpuComplete(); -} - -#endif - -TEST(MOG2) -{ - const std::string inputFile = abspath("../data/768x576.avi"); - - cv::VideoCapture cap(inputFile); - if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi"); - - cv::Mat frame; - cap >> frame; - - cv::Ptr mog2 = cv::createBackgroundSubtractorMOG2(); - cv::Mat foreground; - cv::Mat background; - - mog2->apply(frame, foreground); - mog2->getBackgroundImage(background); - - while (!TestSystem::instance().stop()) - { - cap >> frame; - - TestSystem::instance().cpuOn(); - - mog2->apply(frame, foreground); - mog2->getBackgroundImage(background); - - TestSystem::instance().cpuOff(); - } - TestSystem::instance().cpuComplete(); - - cap.open(inputFile); - - cap >> frame; - - cv::Ptr d_mog2 = cv::cuda::createBackgroundSubtractorMOG2(); - cv::cuda::GpuMat d_frame(frame); - cv::cuda::GpuMat d_foreground; - cv::cuda::GpuMat d_background; - - d_mog2->apply(d_frame, d_foreground); - d_mog2->getBackgroundImage(d_background); - - while (!TestSystem::instance().stop()) - { - cap >> frame; - d_frame.upload(frame); - - TestSystem::instance().gpuOn(); - - d_mog2->apply(d_frame, d_foreground); - d_mog2->getBackgroundImage(d_background); - - TestSystem::instance().gpuOff(); - } - TestSystem::instance().gpuComplete(); -}