From 60b806f9b858b0ffc0e65b18b93fdb635f42c2c8 Mon Sep 17 00:00:00 2001 From: Pierre Chatelier Date: Fri, 9 Jun 2023 12:37:20 +0200 Subject: [PATCH] Merge pull request #22947 from chacha21:hasNonZero Added cv::hasNonZero() #22947 `cv::hasNonZero()` is semantically equivalent to (`cv::countNonZero()>0`) but stops parsing the image when a non-zero value is found, for a performance gain - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake This pull request might be refused, but I submit it to know if further work is needed or if I just stop working on it. The idea is only a performance gain vs `countNonZero()>0` at the cost of more code. Reasons why it might be refused : - this is just more code - the execution time is "unfair"/"unpredictable" since it depends on the position of the first non-zero value - the user must be aware that default search is from first row/col to last row/col and has no way to customize that, even if his use case lets him know where a non zero could be found - the PR in its current state is using, for the ocl implementation, a mere `countNonZero()>0` ; there is not much sense in trying to break early the ocl kernel call when non-zero is encountered. So the ocl implementation does not bring any improvement. - there is no IPP function that can help (`countNonZero()` is based in `ippCountInRange`) - the PR in its current state might be slower than a call to `countNonZero()>0` in some cases (see "challenges" below) Reasons why it might be accepted : - the performance gain is huge on average, if we consider that "on average" means "non zero in the middle of the image" - the "missing" IPP implementation is replaced by an "Open-CV universal intrinsics" implementation - the PR in its current state is almost always faster than a call to `countNonZero()>0`, is only slightly slower in the worst cases, and not even for all matrices **Challenges** The worst case is either an all-zero matrix, or a non-zero at the very last position. In such a case, the `hasNonZero()` implementation will parse the whole matrix like `countNonZero()` would do. But we expect the performance to be the same in this case. And `ippCountInRange` is hard to beat ! There is also the case of very small matrices (<=32x32...) in 8b, where the SIMD can be hard to feed. For all cases but the worse, my custom `hasNonZero()` performs better than `ippCountInRange()` For the worst case, my custom `hasNonZero()` performs better than `ippCountInRange()` *except for large matrices of type CV_32S or CV_64F* (but surprisingly, not CV_32F). The difference is small, but it exists (and I don't understand why). For very small CV_8U matrices `ippCountInRange()` seems unbeatable. Here is the code that I use to check timings ``` //test cv::hasNonZero() vs (cv::countNonZero()>0) for different matrices sizes, types, strides... { cv::setRNGSeed(1234); const std::vector sizes = {{32, 32}, {64, 64}, {128, 128}, {320, 240}, {512, 512}, {640, 480}, {1024, 768}, {2048, 2048}, {1031, 1000}}; const std::vector types = {CV_8U, CV_16U, CV_32S, CV_32F, CV_64F}; const size_t iterations = 1000; for(const cv::Size& size : sizes) { for(const int type : types) { for(int c = 0 ; c<2 ; ++c) { const bool continuous = !c; for(int i = 0 ; i<4 ; ++i) { cv::Mat m = continuous ? cv::Mat::zeros(size, type) : cv::Mat(cv::Mat::zeros(cv::Size(2*size.width, size.height), type), cv::Rect(cv::Point(0, 0), size)); const bool nz = (i <= 2); const unsigned int nzOffsetRange = 10; const unsigned int nzOffset = cv::randu()%nzOffsetRange; const cv::Point pos = (i == 0) ? cv::Point(nzOffset, 0) : (i == 1) ? cv::Point(size.width/2-nzOffsetRange/2+nzOffset, size.height/2) : (i == 2) ? cv::Point(size.width-1-nzOffset, size.height-1) : cv::Point(0, 0); std::cout << "============================================================" << std::endl; std::cout << "size:" << size << " type:" << type << " continuous = " << (continuous ? "true" : "false") << " iterations:" << iterations << " nz=" << (nz ? "true" : "false"); std::cout << " pos=" << ((i == 0) ? "begin" : (i == 1) ? "middle" : (i == 2) ? "end" : "none"); std::cout << std::endl; cv::Mat mask = cv::Mat::zeros(size, CV_8UC1); mask.at(pos) = 0xFF; m.setTo(cv::Scalar::all(0)); m.setTo(cv::Scalar::all(nz ? 1 : 0), mask); std::vector results; std::vector timings; { bool res = false; auto ref = cv::getTickCount(); for(size_t k = 0 ; k0); auto now = cv::getTickCount(); const bool error = (res != nz); if (error) printf("!!ERROR!!\r\n"); results.push_back(res); timings.push_back(1000.*(now-ref)/cv::getTickFrequency()); } const size_t bestTimingIndex = (std::min_element(timings.begin(), timings.end())-timings.begin()); if ((bestTimingIndex != 0) || (std::find_if_not(results.begin(), results.end(), [&](bool r) {return (r == nz);}) != results.end())) { std::cout << "cv::hasNonZero\t\t=>" << results[0] << ((results[0] != nz) ? " ERROR" : "") << " perf:" << timings[0] << "ms => " << (iterations/timings[0]*1000) << " im/s" << ((bestTimingIndex == 0) ? " * " : "") << std::endl; std::cout << "cv::countNonZero\t=>" << results[1] << ((results[1] != nz) ? " ERROR" : "") << " perf:" << timings[1] << "ms => " << (iterations/timings[1]*1000) << " im/s" << ((bestTimingIndex == 1) ? " * " : "") << std::endl; } } } } } } ``` Here is a report of this benchmark (it only reports timings when `cv::countNonZero()` is faster) My CPU is an Intel Core I7 4790 @ 3.60Ghz ``` ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=middle cv::hasNonZero =>1 perf:0.353764ms => 2.82674e+06 im/s cv::countNonZero =>1 perf:0.282044ms => 3.54555e+06 im/s * ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:0.610478ms => 1.63806e+06 im/s cv::countNonZero =>1 perf:0.283182ms => 3.5313e+06 im/s * ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:0.630115ms => 1.58701e+06 im/s cv::countNonZero =>0 perf:0.282044ms => 3.54555e+06 im/s * ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:0.607347ms => 1.64651e+06 im/s cv::countNonZero =>1 perf:0.467037ms => 2.14116e+06 im/s * ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:0.618162ms => 1.6177e+06 im/s cv::countNonZero =>0 perf:0.468175ms => 2.13595e+06 im/s * ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:895.381ms => 1116.84 im/s cv::countNonZero =>1 perf:882.569ms => 1133.06 im/s * ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:899.53ms => 1111.69 im/s cv::countNonZero =>0 perf:870.894ms => 1148.24 im/s * ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:2018.92ms => 495.313 im/s cv::countNonZero =>1 perf:1966.37ms => 508.552 im/s * ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:2005.87ms => 498.537 im/s cv::countNonZero =>0 perf:1992.78ms => 501.812 im/s * ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=false pos=none done ``` --- modules/core/CMakeLists.txt | 1 + modules/core/include/opencv2/core.hpp | 8 + modules/core/perf/opencl/perf_arithm.cpp | 24 ++ modules/core/perf/perf_stat.cpp | 16 + modules/core/src/has_non_zero.dispatch.cpp | 107 +++++++ modules/core/src/has_non_zero.simd.hpp | 327 +++++++++++++++++++++ modules/core/test/test_hasnonzero.cpp | 201 +++++++++++++ 7 files changed, 684 insertions(+) create mode 100644 modules/core/src/has_non_zero.dispatch.cpp create mode 100644 modules/core/src/has_non_zero.simd.hpp create mode 100644 modules/core/test/test_hasnonzero.cpp diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 517b0f31a5..1b3f574275 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -6,6 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3) ocv_add_dispatched_file(convert SSE2 AVX2 VSX3) ocv_add_dispatched_file(convert_scale SSE2 AVX2) ocv_add_dispatched_file(count_non_zero SSE2 AVX2) +ocv_add_dispatched_file(has_non_zero SSE2 AVX2) ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD) ocv_add_dispatched_file(mean SSE2 AVX2) ocv_add_dispatched_file(merge SSE2 AVX2) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 9b94c72a43..d9a21701f2 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -572,6 +572,14 @@ independently for each channel. */ CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src); +/** @brief Checks for the presence of at least one non-zero array element. + +The function returns whether there are non-zero elements in src +@param src single-channel array. +@sa mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W bool hasNonZero( InputArray src ); + /** @brief Counts non-zero array elements. The function returns the number of non-zero elements in src : diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 526bc4e874..8d1e7a6288 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -460,6 +460,30 @@ OCL_PERF_TEST_P(CountNonZeroFixture, CountNonZero, SANITY_CHECK(result); } +///////////// countNonZero //////////////////////// + +typedef Size_MatType HasNonZeroFixture; + +OCL_PERF_TEST_P(HasNonZeroFixture, HasNonZero, + ::testing::Combine(OCL_TEST_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type); + /*bool result = false;*/ + randu(src, 0, 10); + declare.in(src); + + OCL_TEST_CYCLE() /*result =*/ cv::hasNonZero(src); + + SANITY_CHECK_NOTHING(); +} + ///////////// Phase //////////////////////// typedef Size_MatType PhaseFixture; diff --git a/modules/core/perf/perf_stat.cpp b/modules/core/perf/perf_stat.cpp index 15ca2e6559..025700c989 100644 --- a/modules/core/perf/perf_stat.cpp +++ b/modules/core/perf/perf_stat.cpp @@ -101,4 +101,20 @@ PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPIC SANITY_CHECK(cnt); } +PERF_TEST_P(Size_MatType, hasNonZero, testing::Combine( testing::Values( TYPICAL_MAT_SIZES ), testing::Values( CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1 ) )) +{ + Size sz = get<0>(GetParam()); + int matType = get<1>(GetParam()); + + Mat src(sz, matType); + /*bool hnz = false;*/ + + declare.in(src, WARMUP_RNG); + + int runs = (sz.width <= 640) ? 8 : 1; + TEST_CYCLE_MULTIRUN(runs) /*hnz =*/ hasNonZero(src); + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/core/src/has_non_zero.dispatch.cpp b/modules/core/src/has_non_zero.dispatch.cpp new file mode 100644 index 0000000000..6de78ec7a3 --- /dev/null +++ b/modules/core/src/has_non_zero.dispatch.cpp @@ -0,0 +1,107 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + + +#include "precomp.hpp" +#include "opencl_kernels_core.hpp" +#include "stat.hpp" + +#include "has_non_zero.simd.hpp" +#include "has_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +static HasNonZeroFunc getHasNonZeroTab(int depth) +{ + CV_INSTRUMENT_REGION(); + CV_CPU_DISPATCH(getHasNonZeroTab, (depth), + CV_CPU_DISPATCH_MODES_ALL); +} + +#ifdef HAVE_OPENCL +static bool ocl_hasNonZero( InputArray _src, bool & res ) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (depth == CV_64F && !doubleSupport) + return false; + + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO" + " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(depth), (int)wgs, kercn, + wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + _src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), + dbsize, ocl::KernelArg::PtrWriteOnly(db)); + + size_t globalsize = dbsize * wgs; + if (k.run(1, &globalsize, &wgs, true)) + return res = (saturate_cast(cv::sum(db.getMat(ACCESS_READ))[0])>0), true; + return false; +} +#endif + +bool hasNonZero(InputArray _src) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(), cn = CV_MAT_CN(type); + CV_Assert( cn == 1 ); + + bool res = false; + +#ifdef HAVE_OPENCL + CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, + ocl_hasNonZero(_src, res), + res) +#endif + + Mat src = _src.getMat(); + + HasNonZeroFunc func = getHasNonZeroTab(src.depth()); + CV_Assert( func != 0 ); + + if (src.dims == 2)//fast path to avoid creating planes of single rows + { + if (src.isContinuous()) + res |= func(src.ptr(0), src.total()); + else + for(int row = 0, rowsCount = src.rows ; !res && (row(row), src.cols); + } + else//if (src.dims != 2) + { + const Mat* arrays[] = {&src, nullptr}; + Mat planes[1]; + NAryMatIterator itNAry(arrays, planes, 1); + for(size_t p = 0 ; !res && (p(0), plane.total()); + else + for(int row = 0, rowsCount = plane.rows ; !res && (row(row), plane.cols); + } + } + + return res; +} + +} // namespace diff --git a/modules/core/src/has_non_zero.simd.hpp b/modules/core/src/has_non_zero.simd.hpp new file mode 100644 index 0000000000..6ea8bcd7d2 --- /dev/null +++ b/modules/core/src/has_non_zero.simd.hpp @@ -0,0 +1,327 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" + +namespace cv { + +typedef bool (*HasNonZeroFunc)(const uchar*, size_t); + + +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +HasNonZeroFunc getHasNonZeroTab(int depth); + + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +template +inline bool hasNonZero_(const T* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + #if CV_ENABLE_UNROLLED + for(; !res && (i+4 <= len); i += 4 ) + res |= ((src[i] | src[i+1] | src[i+2] | src[i+3]) != 0); + #endif + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +template<> +inline bool hasNonZero_(const float* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + if (sizeof(float) == sizeof(unsigned int)) + { + #if CV_ENABLE_UNROLLED + typedef unsigned int float_as_uint_t; + const float_as_uint_t* src_as_ui = reinterpret_cast(src); + for(; !res && (i+4 <= len); i += 4 ) + { + const float_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); + res |= ((gathered<<1) != 0);//remove what would be the sign bit + } + #endif + } + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +template<> +inline bool hasNonZero_(const double* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + if (sizeof(double) == sizeof(uint64_t)) + { + #if CV_ENABLE_UNROLLED + typedef uint64_t double_as_uint_t; + const double_as_uint_t* src_as_ui = reinterpret_cast(src); + for(; !res && (i+4 <= len); i += 4 ) + { + const double_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); + res |= ((gathered<<1) != 0);//remove what would be the sign bit + } + #endif + } + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +static bool hasNonZero8u( const uchar* src, size_t len ) +{ + bool res = false; + const uchar* srcEnd = src+len; +#if CV_SIMD + typedef v_uint8 v_type; + const v_type v_zero = vx_setzero_u8(); + constexpr const int unrollCount = 2; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const uchar* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + res = v_check_any(((v0 | v1) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero16u( const ushort* src, size_t len ) +{ + bool res = false; + const ushort* srcEnd = src+len; +#if CV_SIMD + typedef v_uint16 v_type; + const v_type v_zero = vx_setzero_u16(); + constexpr const int unrollCount = 4; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const ushort* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + res = v_check_any(((v0 | v2) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero32s( const int* src, size_t len ) +{ + bool res = false; + const int* srcEnd = src+len; +#if CV_SIMD + typedef v_int32 v_type; + const v_type v_zero = vx_setzero_s32(); + constexpr const int unrollCount = 8; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const int* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + + v0 |= v2; + v4 |= v6; + res = v_check_any(((v0 | v4) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero32f( const float* src, size_t len ) +{ + bool res = false; + const float* srcEnd = src+len; +#if CV_SIMD + typedef v_float32 v_type; + const v_type v_zero = vx_setzero_f32(); + constexpr const int unrollCount = 8; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const float* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + + v0 |= v2; + v4 |= v6; + //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ + res = !v_check_all(((v0 | v4) == v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero64f( const double* src, size_t len ) +{ + bool res = false; + const double* srcEnd = src+len; +#if CV_SIMD_64F + typedef v_float64 v_type; + const v_type v_zero = vx_setzero_f64(); + constexpr const int unrollCount = 16; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const double* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v_type v8 = vx_load(src); + src += v_type::nlanes; + v_type v9 = vx_load(src); + src += v_type::nlanes; + v_type v10 = vx_load(src); + src += v_type::nlanes; + v_type v11 = vx_load(src); + src += v_type::nlanes; + v_type v12 = vx_load(src); + src += v_type::nlanes; + v_type v13 = vx_load(src); + src += v_type::nlanes; + v_type v14 = vx_load(src); + src += v_type::nlanes; + v_type v15 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + v8 |= v9; + v10 |= v11; + v12 |= v13; + v14 |= v15; + + v0 |= v2; + v4 |= v6; + v8 |= v10; + v12 |= v14; + + v0 |= v4; + v8 |= v12; + //res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ + res = !v_check_all(((v0 | v8) == v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +HasNonZeroFunc getHasNonZeroTab(int depth) +{ + static HasNonZeroFunc hasNonZeroTab[] = + { + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32s), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32f), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero64f), 0 + }; + + return hasNonZeroTab[depth]; +} + +#endif + +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/core/test/test_hasnonzero.cpp b/modules/core/test/test_hasnonzero.cpp new file mode 100644 index 0000000000..9834117ddf --- /dev/null +++ b/modules/core/test/test_hasnonzero.cpp @@ -0,0 +1,201 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +typedef testing::TestWithParam > HasNonZeroAllZeros; + +TEST_P(HasNonZeroAllZeros, hasNonZeroAllZeros) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat::zeros(size, type); + EXPECT_FALSE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroAllZeros, + testing::Combine( + testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroNegZeros; + +TEST_P(HasNonZeroNegZeros, hasNonZeroNegZeros) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat(size, type); + m.setTo(Scalar::all(-0.)); + EXPECT_FALSE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNegZeros, + testing::Combine( + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroLimitValues; + +TEST_P(HasNonZeroLimitValues, hasNonZeroLimitValues) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat(size, type); + + m.setTo(Scalar::all(std::numeric_limits::infinity())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo(Scalar::all(-std::numeric_limits::infinity())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo(Scalar::all(std::numeric_limits::quiet_NaN())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::epsilon()) : Scalar::all(std::numeric_limits::epsilon())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::min()) : Scalar::all(std::numeric_limits::min())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::denorm_min()) : Scalar::all(std::numeric_limits::denorm_min())); + EXPECT_TRUE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroLimitValues, + testing::Combine( + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroRandom; + +TEST_P(HasNonZeroRandom, hasNonZeroRandom) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + RNG& rng = theRNG(); + + const size_t N = std::min(100, size.area()); + for(size_t i = 0 ; i > HasNonZeroNd; + +TEST_P(HasNonZeroNd, hasNonZeroNd) +{ + const int type = get<0>(GetParam()); + const int ndims = get<1>(GetParam()); + const bool continuous = get<2>(GetParam()); + + RNG& rng = theRNG(); + + const size_t N = 10; + for(size_t i = 0 ; i steps(ndims); + std::vector sizes(ndims); + size_t totalBytes = 1; + for(int dim = 0 ; dim(length))*CV_ELEM_SIZE(type); + sizes[dim] = (isFirstDim || continuous) ? length : rng.uniform(1, length); + totalBytes *= steps[dim]*static_cast(sizes[dim]); + } + + std::vector buffer(totalBytes); + void* data = buffer.data(); + + Mat m = Mat(ndims, sizes.data(), type, data, steps.data()); + + std::vector nzRange(ndims); + for(int dim = 0 ; dim0), hasNonZero(m)); + } +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNd, + testing::Combine( + testing::Values(CV_8UC1), + testing::Values(2, 3), + testing::Values(true, false) + ) +); + +}} // namespace