mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge pull request #8367 from khnaba:cuda-calchist-with-mask
Implement cv::cuda::calcHist with mask support (#8367) * Implement cuda::calcHist with mask * Fix documentation build warning * Have their own step sizes for src and mask. Fix review comment.
This commit is contained in:
parent
a83a1cafa7
commit
27cf6e549e
@ -201,6 +201,15 @@ CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int
|
||||
*/
|
||||
CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Calculates histogram for one channel 8-bit image confined in given mask.
|
||||
|
||||
@param src Source image with CV_8UC1 type.
|
||||
@param hist Destination histogram with one row, 256 columns, and the CV_32SC1 type.
|
||||
@param mask A mask image same size as src and of type CV_8UC1.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void calcHist(InputArray src, InputArray mask, OutputArray hist, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Equalizes the histogram of a grayscale image.
|
||||
|
||||
@param src Source image with CV_8UC1 type.
|
||||
|
@ -105,6 +105,72 @@ namespace hist
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t srcStep, const uchar* mask, size_t maskStep, int* hist)
|
||||
{
|
||||
__shared__ int shist[256];
|
||||
|
||||
const int y = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
shist[tid] = 0;
|
||||
__syncthreads();
|
||||
|
||||
if (y < rows)
|
||||
{
|
||||
const unsigned int* rowPtr = (const unsigned int*) (src + y * srcStep);
|
||||
const unsigned int* maskRowPtr = (const unsigned int*) (mask + y * maskStep);
|
||||
|
||||
const int cols_4 = cols / 4;
|
||||
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
|
||||
{
|
||||
unsigned int data = rowPtr[x];
|
||||
unsigned int m = maskRowPtr[x];
|
||||
|
||||
if ((m >> 0) & 0xFFU)
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
|
||||
|
||||
if ((m >> 8) & 0xFFU)
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
|
||||
|
||||
if ((m >> 16) & 0xFFU)
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
|
||||
|
||||
if ((m >> 24) & 0xFFU)
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
|
||||
}
|
||||
|
||||
if (cols % 4 != 0 && threadIdx.x == 0)
|
||||
{
|
||||
for (int x = cols_4 * 4; x < cols; ++x)
|
||||
{
|
||||
unsigned int data = ((const uchar*)rowPtr)[x];
|
||||
unsigned int m = ((const uchar*)maskRowPtr)[x];
|
||||
|
||||
if (m)
|
||||
Emulation::smem::atomicAdd(&shist[data], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const int histVal = shist[tid];
|
||||
if (histVal > 0)
|
||||
::atomicAdd(hist + tid, histVal);
|
||||
}
|
||||
|
||||
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.rows, block.y));
|
||||
|
||||
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
@ -69,20 +69,32 @@ void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, Stream&) { throw_no
|
||||
namespace hist
|
||||
{
|
||||
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
|
||||
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
|
||||
{
|
||||
calcHist(_src, cv::cuda::GpuMat(), _hist, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || mask.size() == src.size() );
|
||||
|
||||
_hist.create(1, 256, CV_32SC1);
|
||||
GpuMat hist = _hist.getGpuMat();
|
||||
|
||||
hist.setTo(Scalar::all(0), stream);
|
||||
|
||||
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
|
||||
if (mask.empty())
|
||||
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
|
||||
else
|
||||
hist::histogram256(src, mask, hist.ptr<int>(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@ -136,6 +136,49 @@ INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHist, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES));
|
||||
|
||||
PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, cv::Size)
|
||||
{
|
||||
cv::cuda::DeviceInfo devInfo;
|
||||
|
||||
cv::Size size;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
size = GET_PARAM(1);
|
||||
|
||||
cv::cuda::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
CUDA_TEST_P(CalcHistWithMask, Accuracy)
|
||||
{
|
||||
cv::Mat src = randomMat(size, CV_8UC1);
|
||||
cv::Mat mask = randomMat(size, CV_8UC1);
|
||||
cv::Mat(mask, cv::Rect(0, 0, size.width / 2, size.height / 2)).setTo(0);
|
||||
|
||||
cv::cuda::GpuMat hist;
|
||||
cv::cuda::calcHist(loadMat(src), loadMat(mask), hist);
|
||||
|
||||
cv::Mat hist_gold;
|
||||
|
||||
const int hbins = 256;
|
||||
const float hranges[] = {0.0f, 256.0f};
|
||||
const int histSize[] = {hbins};
|
||||
const float* ranges[] = {hranges};
|
||||
const int channels[] = {0};
|
||||
|
||||
cv::calcHist(&src, 1, channels, mask, hist_gold, 1, histSize, ranges);
|
||||
hist_gold = hist_gold.reshape(1, 1);
|
||||
hist_gold.convertTo(hist_gold, CV_32S);
|
||||
|
||||
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHistWithMask, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// EqualizeHist
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user