OpenCL: core support for FP16, more channel orders

* Support cl_image conversion for CL_HALF_FLOAT (float16) * Support cl_image conversion for additional channel orders: CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, CL_RA * Comment on why cl_image conversion is unsupported for CL_RGB * Predict optimal vector width for float16 * ocl::kernelToStr: support float16 * ocl::Device::halfFPConfig: drop artificial requirement for OpenCL version >= 1.2. Even OpenCL 1.0 supports the underlying config property, CL_DEVICE_HALF_FP_CONFIG. * dumpOpenCLInformation: provide info on OpenCL half-float support and preferred half-float vector width * randu: support default range [-1.0, 1.0] for float16 * TestBase::warmup: support float16
2025-06-07 09:25:45 +08:00 · 2021-06-21 00:46:32 -03:00 · 2021-06-21 00:46:32 -03:00 · 6a3d925a47
commit 6a3d925a47
parent 735a79ae83
4 changed files with 44 additions and 12 deletions
--- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp
+++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
@ -144,6 +144,10 @@ static void dumpOpenCLInformation()
        DUMP_MESSAGE_STDOUT("    Double support = " << doubleSupportStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);

+        const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Half support = " << halfSupportStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0);
+
        const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
        DUMP_MESSAGE_STDOUT("    Host unified memory = " << isUnifiedMemoryStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
@ -191,6 +195,9 @@ static void dumpOpenCLInformation()

        DUMP_MESSAGE_STDOUT("    Preferred vector width double = " << device.preferredVectorWidthDouble());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width half = " << device.preferredVectorWidthHalf());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf());
    }
    catch (...)
    {
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -1566,6 +1566,7 @@ struct Device::Impl
        version_ = getStrProp(CL_DEVICE_VERSION);
        extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
        doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
+        halfFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG);
        hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
        maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
        maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
@ -1678,6 +1679,7 @@ struct Device::Impl
    String version_;
    std::string extensions_;
    int doubleFPConfig_;
+    int halfFPConfig_;
    bool hostUnifiedMemory_;
    int maxComputeUnits_;
    size_t maxWorkGroupSize_;
@ -1827,11 +1829,7 @@ int Device::singleFPConfig() const
 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }

 int Device::halfFPConfig() const
-#ifdef CL_VERSION_1_2
-{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
-#else
-{ CV_REQUIRE_OPENCL_1_2_ERROR; }
-#endif
+{ return p ? p->halfFPConfig_ : 0; }

 bool Device::endianLittle() const
 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
@ -6668,6 +6666,10 @@ void convertFromImage(void* cl_mem_image, UMat& dst)
        depth = CV_32F;
        break;

+    case CL_HALF_FLOAT:
+        depth = CV_16F;
+        break;
+
    default:
        CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
    }
@ -6676,9 +6678,23 @@ void convertFromImage(void* cl_mem_image, UMat& dst)
    switch (fmt.image_channel_order)
    {
    case CL_R:
+    case CL_A:
+    case CL_INTENSITY:
+    case CL_LUMINANCE:
        type = CV_MAKE_TYPE(depth, 1);
        break;

+    case CL_RG:
+    case CL_RA:
+        type = CV_MAKE_TYPE(depth, 2);
+        break;
+
+    // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
+    // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
+    /*case CL_RGB:
+        type = CV_MAKE_TYPE(depth, 3);
+        break;*/
+
    case CL_RGBA:
    case CL_BGRA:
    case CL_ARGB:
@ -7068,6 +7084,13 @@ static std::string kerToStr(const Mat & k)
            stream << "DIG(" << data[i] << "f)";
        stream << "DIG(" << data[width] << "f)";
    }
+    else if (depth == CV_16F)
+    {
+        stream.setf(std::ios_base::showpoint);
+        for (int i = 0; i < width; ++i)
+            stream << "DIG(" << (float)data[i] << "h)";
+        stream << "DIG(" << (float)data[width] << "h)";
+    }
    else
    {
        for (int i = 0; i < width; ++i)
@ -7091,7 +7114,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)

    typedef std::string (* func_t)(const Mat &);
    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
-                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
+                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
    const func_t func = funcs[ddepth];
    CV_Assert(func != 0);

@ -7130,14 +7153,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
    int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
        d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
        d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
-        d.preferredVectorWidthDouble(), -1 };
+        d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() };

    // if the device says don't use vectors
    if (vectorWidths[0] == 1)
    {
        // it's heuristic
        vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
-        vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
+        vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2;
        vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
    }

@ -7225,10 +7248,12 @@ struct Image2D::Impl
    {
        cl_image_format format;
        static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
-                                       CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
+                                       CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT };
        static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
                                                CL_SNORM_INT16, -1, -1, -1, -1 };
-        static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
+        // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
+        // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
+        static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA };

        int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
        int channelOrder = channelOrders[cn];
--- a/modules/ts/src/ocl_perf.cpp
+++ b/modules/ts/src/ocl_perf.cpp
@ -70,7 +70,7 @@ void randu(InputOutputArray dst)
        cv::randu(dst, -128, 128);
    else if (dst.depth() == CV_16U)
        cv::randu(dst, 0, 1024);
-    else if (dst.depth() == CV_32F || dst.depth() == CV_64F)
+    else if (dst.depth() == CV_32F || dst.depth() == CV_64F || dst.depth() == CV_16F)
        cv::randu(dst, -1.0, 1.0);
    else if (dst.depth() == CV_16S || dst.depth() == CV_32S)
        cv::randu(dst, -4096, 4096);
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@ -1297,7 +1297,7 @@ void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype)
                cv::randu(a, -128, 128);
            else if (depth == CV_16U)
                cv::randu(a, 0, 1024);
-            else if (depth == CV_32F || depth == CV_64F)
+            else if (depth == CV_32F || depth == CV_64F || depth == CV_16F)
                cv::randu(a, -1.0, 1.0);
            else if (depth == CV_16S || depth == CV_32S)
                cv::randu(a, -4096, 4096);