dnn(DataLayer): fix CPU/OpenCL code paths for FP16 handling

This commit is contained in:
Alexander Alekhin 2021-11-28 04:29:54 +00:00
parent 58dc397930
commit 58b06222ff

View File

@ -597,28 +597,25 @@ struct DataLayer : public Layer
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
// FIXIT: add wrapper without exception suppression
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (outputs_arr.depth() == CV_16S) bool isFP16 = outputs_arr.depth() == CV_16S;
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> outputs, internals; std::vector<Mat> outputs, internals;
outputs_arr.getMatVector(outputs); outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals); internals_arr.getMatVector(internals);
// Supported modes:
// | Input type | Output type |
// | fp32 | fp32 |
// | uint8 | fp32 |
for (int i = 0; i < inputsData.size(); ++i) for (int i = 0; i < inputsData.size(); ++i)
{ {
double scale = scaleFactors[i]; double scale = scaleFactors[i];
Scalar& mean = means[i]; Scalar& mean = means[i];
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
if (isFP16)
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
else
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
bool singleMean = true; bool singleMean = true;
@ -628,35 +625,50 @@ struct DataLayer : public Layer
} }
if (singleMean) if (singleMean)
{
if (isFP16)
{
Mat input_f32;
inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale);
convertFp16(input_f32, outputs[i]);
}
else
{ {
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
} }
}
else else
{ {
for (int n = 0; n < inputsData[i].size[0]; ++n) for (int n = 0; n < inputsData[i].size[0]; ++n)
{
for (int c = 0; c < inputsData[i].size[1]; ++c) for (int c = 0; c < inputsData[i].size[1]; ++c)
{ {
Mat inp = getPlane(inputsData[i], n, c); Mat inp = getPlane(inputsData[i], n, c);
Mat out = getPlane(outputs[i], n, c); Mat out = getPlane(outputs[i], n, c);
if (isFP16)
{
Mat input_f32;
inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale);
convertFp16(input_f32, out);
}
else
{
inp.convertTo(out, CV_32F, scale, -mean[c] * scale); inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
} }
} }
} }
} }
}
}
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
std::vector<Mat> tmp_expressions;
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{ {
// Supported modes: bool isFP16 = outputs_.depth() == CV_16S;
// | Input type | Output type |
// | fp32 | fp32 |
// | fp32 | fp16 |
// | uint8 | fp32 |
std::vector<UMat> outputs; std::vector<UMat> outputs;
outputs_.getUMatVector(outputs); outputs_.getUMatVector(outputs);
tmp_expressions.clear();
for (int i = 0; i < inputsData.size(); ++i) for (int i = 0; i < inputsData.size(); ++i)
{ {
Mat inputData = inputsData[i]; Mat inputData = inputsData[i];
@ -664,61 +676,58 @@ struct DataLayer : public Layer
double scale = scaleFactors[i]; double scale = scaleFactors[i];
Scalar& mean = means[i]; Scalar& mean = means[i];
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); CV_Assert(mean == Scalar() || inputData.size[1] <= 4);
if (isFP16)
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
else
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
bool singleMean = true; bool singleMean = true;
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j)
{ {
singleMean = mean[j] == mean[j - 1]; singleMean = mean[j] == mean[j - 1];
} }
if (outputs_.depth() == CV_16S)
{
if (singleMean) if (singleMean)
{ {
tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0]))); if (isFP16)
convertFp16(tmp_expressions.back(), outputs[i]); {
UMat input_i;
inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale);
convertFp16(input_i, outputs[i]);
} }
else else
{ {
for (int n = 0; n < inputsData[i].size[0]; ++n) inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
for (int c = 0; c < inputsData[i].size[1]; ++c) }
}
else
{ {
Mat inp = getPlane(inputsData[i], n, c); for (int n = 0; n < inputData.size[0]; ++n)
{
for (int c = 0; c < inputData.size[1]; ++c)
{
Mat inp = getPlane(inputData, n, c);
std::vector<cv::Range> plane(4, Range::all()); std::vector<cv::Range> plane(4, Range::all());
plane[0] = Range(n, n + 1); plane[0] = Range(n, n + 1);
plane[1] = Range(c, c + 1); plane[1] = Range(c, c + 1);
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
tmp_expressions.push_back(scale * (inp - mean[c])); if (isFP16)
convertFp16(tmp_expressions.back(), out); {
} UMat input_i;
} inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale);
convertFp16(input_i, out);
} }
else else
{ {
CV_Assert(outputs_.depth() == CV_32F);
if (singleMean)
{
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
}
else
{
for (int n = 0; n < inputsData[i].size[0]; ++n)
for (int c = 0; c < inputsData[i].size[1]; ++c)
{
Mat inp = getPlane(inputsData[i], n, c);
std::vector<cv::Range> plane(4, Range::all());
plane[0] = Range(n, n + 1);
plane[1] = Range(c, c + 1);
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
inp.convertTo(out, CV_32F, scale, -mean[c] * scale); inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
} }
} }
} }
} }
}
return true; return true;
} }
#endif #endif