Merge pull request #7047 from tomoaki0705:brushUpFp16Cuda

2025-06-15 22:20:58 +08:00 · 2016-08-08 14:43:57 +00:00 · 2016-08-08 14:43:57 +00:00 · 39ebca5dbf
commit 39ebca5dbf
parent ad974de165 2db2d137ce
3 changed files with 15 additions and 8 deletions
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@ -855,7 +855,14 @@ private:
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);

-CV_EXPORTS void convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
+/** @brief Converts an array to half precision floating number.
+
+@param _src input array.
+@param _dst output array.
+@param stream Stream for the asynchronous version.
+@sa convertFp16
+*/
+CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());

 //! @} cudacore_init

--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@ -594,7 +594,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
    funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
 }

-void cv::cuda::convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
 {
    GpuMat src = _src.getGpuMat();
    int ddepth = 0;
--- a/modules/cudev/test/test_cvt.cu
+++ b/modules/cudev/test/test_cvt.cu
@ -101,11 +101,11 @@ public:
        GpuMat g_dst;

        // Fp32 -> Fp16
-        convertFp16Cuda(g_src, g_dst);
-        convertFp16Cuda(g_dst.clone(), g_dst);
+        cuda::convertFp16(g_src, g_dst);
+        cv::convertFp16(src, dst);
        // Fp16 -> Fp32
-        convertFp16(src, dst);
-        convertFp16(dst, ref);
+        cuda::convertFp16(g_dst.clone(), g_dst);
+        cv::convertFp16(dst, ref);

        g_dst.download(dst);
        EXPECT_MAT_NEAR(dst, ref, 0.0);
@ -127,8 +127,8 @@ public:
        GpuMat g_dst;

        // Fp32 -> Fp16
-        convertFp16Cuda(g_src, g_dst);
-        convertFp16(src, ref);
+        cuda::convertFp16(g_src, g_dst);
+        cv::convertFp16(src, ref);

        g_dst.download(dst);
        EXPECT_MAT_NEAR(dst, ref, 0.0);