Merge pull request #7047 from tomoaki0705:brushUpFp16Cuda

This commit is contained in:
Vadim Pisarevsky 2016-08-08 14:43:57 +00:00
commit 39ebca5dbf
3 changed files with 15 additions and 8 deletions

View File

@ -855,7 +855,14 @@ private:
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
CV_EXPORTS void convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
/** @brief Converts an array to half precision floating number.
@param _src input array.
@param _dst output array.
@param stream Stream for the asynchronous version.
@sa convertFp16
*/
CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
//! @} cudacore_init

View File

@ -594,7 +594,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
}
void cv::cuda::convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream)
void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src = _src.getGpuMat();
int ddepth = 0;

View File

@ -101,11 +101,11 @@ public:
GpuMat g_dst;
// Fp32 -> Fp16
convertFp16Cuda(g_src, g_dst);
convertFp16Cuda(g_dst.clone(), g_dst);
cuda::convertFp16(g_src, g_dst);
cv::convertFp16(src, dst);
// Fp16 -> Fp32
convertFp16(src, dst);
convertFp16(dst, ref);
cuda::convertFp16(g_dst.clone(), g_dst);
cv::convertFp16(dst, ref);
g_dst.download(dst);
EXPECT_MAT_NEAR(dst, ref, 0.0);
@ -127,8 +127,8 @@ public:
GpuMat g_dst;
// Fp32 -> Fp16
convertFp16Cuda(g_src, g_dst);
convertFp16(src, ref);
cuda::convertFp16(g_src, g_dst);
cv::convertFp16(src, ref);
g_dst.download(dst);
EXPECT_MAT_NEAR(dst, ref, 0.0);