Merge pull request #21645 from chacha21:applyColorMap_8UC1_optimized

Optimize cv::applyColorMap() for simple case

* Optimize cv::applyColorMap() for simple case

PR for 21640
For regular cv::Mat CV_8UC1 src, applying the colormap is simpler than calling the cv::LUT() mechanism.

* add support for src as CV_8UC3

src as CV_8UC3 is handled with a BGR2GRAY conversion, the same optimized code being used afterwards

* code style

rely on cv::Mat.ptr() to index data

* Move new implementation to ColorMap::operator()

Changes as suggested by reviewer

* style

improvements suggsted by reviewer

* typo

* tune parallel work

* better usage of parallel_for_

use nstripes parameter of parallel_for_
assume _lut is continuous to bring faster pixel indexing
optimize src/dst access by contiguous rows of pixels
do not locally copy the LUT any more, it is no more relevant with the new optimizations
This commit is contained in:
Pierre Chatelier 2022-03-01 17:55:00 +01:00 committed by GitHub
parent a332509e02
commit ebb6915e58
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -734,12 +734,57 @@ namespace colormap
Mat src = _src.getMat();
if(src.type() != CV_8UC1 && src.type() != CV_8UC3)
CV_Error(Error::StsBadArg, "cv::ColorMap only supports source images of type CV_8UC1 or CV_8UC3");
// Turn into a BGR matrix into its grayscale representation.
if(src.type() == CV_8UC3)
cvtColor(src.clone(), src, COLOR_BGR2GRAY);
cvtColor(src.clone(), src, COLOR_GRAY2BGR);
// Apply the ColorMap.
LUT(src, _lut, _dst);
CV_CheckEQ(src.dims, 2, "Not supported");
CV_Assert(_lut.isContinuous());
const int lut_type = _lut.type();
CV_CheckType(lut_type, (lut_type == CV_8UC1) || (lut_type == CV_8UC3),
"Only CV_8UC1 and CV_8UC3 LUT are supported");
Mat srcGray;
if (src.channels() == 1)
srcGray = src;
else
cv::cvtColor(src, srcGray, cv::COLOR_BGR2GRAY);//BGR because of historical cv::LUT() usage
_dst.create(src.size(), lut_type);
Mat dstMat = _dst.getMat();
//we do not use cv::LUT() which requires src.channels() == dst.channels()
const int rows = srcGray.rows;
const int cols = srcGray.cols;
const int minimalPixelsPerPacket = 1<<12;
const int rowsPerPacket = std::max(1, minimalPixelsPerPacket/cols);
const int rowsPacketsCount = (rows+rowsPerPacket-1)/rowsPerPacket;
const Range all(0, rows);
if (lut_type == CV_8UC1) {
typedef unsigned char lut_pixel_t;
const lut_pixel_t* srcLUT = _lut.ptr<lut_pixel_t>(0);
auto body = [&, cols](const Range& range) -> void {
for(int row = range.start ; row<range.end ; ++row) {
const unsigned char* srcRow = srcGray.ptr<unsigned char>(row);
lut_pixel_t* dstRow = dstMat.ptr<lut_pixel_t>(row);
for(int col = 0 ; col<cols ; ++col)
*dstRow++ = srcLUT[*srcRow++];
}
};
parallel_for_(all, body, rowsPacketsCount);
}
else if (lut_type == CV_8UC3) {
typedef Vec3b lut_pixel_t;
const lut_pixel_t* srcLUT = _lut.ptr<lut_pixel_t>(0);
auto body = [&, cols](const Range& range) -> void {
for(int row = range.start ; row<range.end ; ++row) {
const unsigned char* srcRow = srcGray.ptr<unsigned char>(row);
lut_pixel_t* dstRow = dstMat.ptr<lut_pixel_t>(row);
for(int col = 0 ; col<cols ; ++col)
*dstRow++ = srcLUT[*srcRow++];
}
};
parallel_for_(all, body, rowsPacketsCount);
}
}
Mat ColorMap::linear_colormap(InputArray X,
@ -798,7 +843,6 @@ namespace colormap
if (userColor.type() != CV_8UC1 && userColor.type() != CV_8UC3)
CV_Error(Error::StsAssert, "cv::LUT only supports tables CV_8UC1 or CV_8UC3.");
colormap::UserColorMap cm(userColor.getMat());
cm(src, dst);
}