Merge pull request #24892 from opencv-pushbot:gitee/alalek/dnn_avoid_16s_usage

DNN: avoid CV_16S usage for FP16 #24892

**Merge after**: #24918

TODO:
- [x] measure performance changes
- [x] optimize convertTo for OpenCL: #24918

12700K iGPU:

|Name of Test|0|1|1 vs 0 (x-factor)|
|---|:-:|:-:|:-:|
|AlexNet::DNNTestNetwork::OCV/OCL_FP16|7.441|7.480|0.99|
|CRNN::DNNTestNetwork::OCV/OCL_FP16|10.776|10.736|1.00|
|DenseNet_121::DNNTestNetwork::OCV/OCL_FP16|52.762|52.833|1.00|
|EAST_text_detection::DNNTestNetwork::OCV/OCL_FP16|60.694|60.721|1.00|
|EfficientNet::DNNTestNetwork::OCV/OCL_FP16|33.373|33.173|1.01|
|FastNeuralStyle_eccv16::DNNTestNetwork::OCV/OCL_FP16|81.840|81.724|1.00|
|GoogLeNet::DNNTestNetwork::OCV/OCL_FP16|20.965|20.927|1.00|
|Inception_5h::DNNTestNetwork::OCV/OCL_FP16|22.204|22.173|1.00|
|Inception_v2_SSD_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|47.115|47.460|0.99|
|MPHand::DNNTestNetwork::OCV/OCL_FP16|6.760|6.670|1.01|
|MPPalm::DNNTestNetwork::OCV/OCL_FP16|10.188|10.171|1.00|
|MPPose::DNNTestNetwork::OCV/OCL_FP16|12.510|12.561|1.00|
|MobileNet_SSD_Caffe::DNNTestNetwork::OCV/OCL_FP16|17.290|17.072|1.01|
|MobileNet_SSD_v1_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|19.473|19.306|1.01|
|MobileNet_SSD_v2_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|22.874|23.404|0.98|
|OpenFace::DNNTestNetwork::OCV/OCL_FP16|9.568|9.517|1.01|
|OpenPose_pose_mpi_faster_4_stages::DNNTestNetwork::OCV/OCL_FP16|539.899|539.845|1.00|
|PPHumanSeg::DNNTestNetwork::OCV/OCL_FP16|18.015|18.769|0.96|
|PPOCRv3::DNNTestNetwork::OCV/OCL_FP16|63.122|63.540|0.99|
|ResNet_50::DNNTestNetwork::OCV/OCL_FP16|34.947|34.925|1.00|
|SFace::DNNTestNetwork::OCV/OCL_FP16|10.249|10.206|1.00|
|SSD::DNNTestNetwork::OCV/OCL_FP16|213.068|213.108|1.00|
|SqueezeNet_v1_1::DNNTestNetwork::OCV/OCL_FP16|4.867|4.878|1.00|
|VIT_B_32::DNNTestNetwork::OCV/OCL_FP16|200.563|190.788|1.05|
|VitTrack::DNNTestNetwork::OCV/OCL_FP16|7.528|7.173|1.05|
|YOLOX::DNNTestNetwork::OCV/OCL_FP16|132.858|132.701|1.00|
|YOLOv3::DNNTestNetwork::OCV/OCL_FP16|209.559|208.809|1.00|
|YOLOv4::DNNTestNetwork::OCV/OCL_FP16|221.357|220.924|1.00|
|YOLOv4_tiny::DNNTestNetwork::OCV/OCL_FP16|24.446|24.382|1.00|
|YOLOv5::DNNTestNetwork::OCV/OCL_FP16|43.922|44.080|1.00|
|YOLOv8::DNNTestNetwork::OCV/OCL_FP16|64.159|63.842|1.00|
|YuNet::DNNTestNetwork::OCV/OCL_FP16|10.177|10.231|0.99|
|opencv_face_detector::DNNTestNetwork::OCV/OCL_FP16|15.121|15.445|0.98|

Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
This commit is contained in:
Alexander Alekhin 2024-01-26 16:34:17 +03:00 committed by GitHub
parent 37156a4719
commit efc9837df1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
56 changed files with 160 additions and 172 deletions

View File

@ -279,8 +279,8 @@ public:
// Half precision floats.
CV_Assert(raw_data.size() / 2 == (int)dstBlob.total());
Mat halfs((int)shape.size(), &shape[0], CV_16SC1, (void*)raw_data.c_str());
convertFp16(halfs, dstBlob);
Mat halfs((int)shape.size(), &shape[0], CV_16FC1, (void*)raw_data.c_str());
halfs.convertTo(dstBlob, CV_32F);
}
else if (pbBlob.raw_data_type() == caffe::FLOAT)
{

View File

@ -44,8 +44,8 @@ void shrinkCaffeModel(const String& src, const String& dst, const std::vector<St
CV_Assert(blob->data_size() != 0); // float32 array.
Mat floats(1, blob->data_size(), CV_32FC1, (void*)blob->data().data());
Mat halfs(1, blob->data_size(), CV_16SC1);
convertFp16(floats, halfs); // Convert to float16.
Mat halfs(1, blob->data_size(), CV_16FC1);
floats.convertTo(halfs, CV_16F); // Convert to float16.
blob->clear_data(); // Clear float32 data.

View File

@ -502,7 +502,7 @@ void InfEngineNgraphNet::init(Target targetId)
size_t total = ngraph::shape_size(constant->get_shape());
Mat floats(1, total, CV_32F, (void*)floatsData);
Mat halfs;
cv::convertFp16(floats, halfs);
floats.convertTo(halfs, CV_16F);
auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), halfs.data);
new_const->set_friendly_name(constant->get_friendly_name());

View File

@ -135,10 +135,10 @@ public:
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
{
UMat inputFp32;
convertFp16(inputs[0], inputFp32);
inputs[0].convertTo(inputFp32, CV_32F);
inputs[0] = inputFp32; // replace
}
@ -264,10 +264,7 @@ public:
UMat outputFp32;
inputs[0].convertTo(outputFp32, CV_32F, scales[0], -(scales[0]*zeropoints[0]));
if (outputs_.depth() == CV_16S)
convertFp16(outputFp32, outputs[0]);
else
outputFp32.copyTo(outputs[0]);
outputFp32.convertTo(outputs[0], outputs_.depth());
return true;
}
#endif

View File

@ -176,7 +176,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16F)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
@ -192,7 +192,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
convertFp16(orig_inputs[i], inputs[i]);
orig_inputs[i].convertTo(inputs[i], CV_32F);
outputs.resize(orig_outputs.size());
for (size_t i = 0; i < orig_outputs.size(); i++)
@ -205,7 +205,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
forward(inputs, outputs, internals);
for (size_t i = 0; i < outputs.size(); i++)
convertFp16(outputs[i], orig_outputs[i]);
outputs[i].convertTo(orig_outputs[i], CV_16F);
// sync results back
outputs_arr.assign(orig_outputs);

View File

@ -146,7 +146,7 @@ struct DataLayer : public Layer
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
bool isFP16 = outputs_arr.depth() == CV_16S;
bool isFP16 = outputs_arr.depth() == CV_16F;
std::vector<Mat> outputs, internals;
outputs_arr.getMatVector(outputs);
@ -159,7 +159,7 @@ struct DataLayer : public Layer
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
if (isFP16)
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
CV_CheckTypeEQ(outputs[i].type(), CV_16FC1, "");
else
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
@ -175,7 +175,7 @@ struct DataLayer : public Layer
{
Mat input_f32;
inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale);
convertFp16(input_f32, outputs[i]);
input_f32.convertTo(outputs[i], CV_16F);
}
else
{
@ -194,7 +194,7 @@ struct DataLayer : public Layer
{
Mat input_f32;
inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale);
convertFp16(input_f32, out);
input_f32.convertTo(out, CV_16F);
}
else
{
@ -209,7 +209,7 @@ struct DataLayer : public Layer
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
bool isFP16 = outputs_.depth() == CV_16S;
bool isFP16 = outputs_.depth() == CV_16F;
std::vector<UMat> outputs;
outputs_.getUMatVector(outputs);
@ -223,7 +223,7 @@ struct DataLayer : public Layer
CV_Assert(mean == Scalar() || inputData.size[1] <= 4);
if (isFP16)
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
CV_CheckTypeEQ(outputs[i].type(), CV_16FC1, "");
else
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
@ -239,7 +239,7 @@ struct DataLayer : public Layer
{
UMat input_i;
inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale);
convertFp16(input_i, outputs[i]);
input_i.convertTo(outputs[i], CV_16F);
}
else
{
@ -263,7 +263,7 @@ struct DataLayer : public Layer
{
UMat input_i;
inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale);
convertFp16(input_i, out);
input_i.convertTo(out, CV_16F);
}
else
{

View File

@ -106,7 +106,7 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer {
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -192,7 +192,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inputs_.depth() == CV_16S);
bool use_half = (inputs_.depth() == CV_16F);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
@ -266,7 +266,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -165,14 +165,14 @@ public:
for( i = 0; i < ninputs; i++ )
{
Mat& inp = inputs[i];
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S || inp.type() == CV_8S) &&
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16F || inp.type() == CV_8S) &&
inp.dims == 4 && inp.size[0] == output.size[0] &&
inp.size[2] == output.size[2] &&
inp.size[3] == output.size[3] );
nchannels += inp.size[1];
}
CV_Assert( nchannels == output.size[1] );
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S || output.type() == CV_8S) );
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16F || output.type() == CV_8S) );
cc.chptrs.resize(nchannels*batchsz);
@ -223,7 +223,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);

View File

@ -62,12 +62,12 @@ public:
{
std::vector<UMat> outputs;
outs.getUMatVector(outputs);
if (outs.depth() == CV_16S) {
if (outs.depth() == CV_16F) {
auto blob = blobs[0];
if (blob.type() != CV_32F) {
blob.convertTo(blob, CV_32F);
}
convertFp16(blob, outputs[0]);
blob.convertTo(outputs[0], CV_16F);
}
else
blobs[0].convertTo(outputs[0], outputs[0].type());

View File

@ -140,7 +140,7 @@ public:
}
const Mat &input = inputs[0];
CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16F));
for (size_t i = 0; i < outputs.size(); i++)
{
CV_Assert(inputs[i].type() == input.type());
@ -1023,7 +1023,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -1037,6 +1037,7 @@ public:
umat_blobs.resize(n);
for (size_t i = 0; i < n; i++)
{
CV_Assert(!use_half); // TODO: not implemented
inputs[i + 1].copyTo(umat_blobs[i]);
}
inputs.resize(1);
@ -1049,7 +1050,7 @@ public:
for (size_t i = 0; i < n; i++)
{
if (use_half)
convertFp16(blobs[i], umat_blobs[i]);
blobs[i].convertTo(umat_blobs[i], CV_16F);
else
blobs[i].copyTo(umat_blobs[i]);
}
@ -1130,7 +1131,7 @@ public:
if (fusedWeights)
{
if (use_half)
convertFp16(weightsMat, umat_blobs[0]);
weightsMat.convertTo(umat_blobs[0], CV_16F);
else
weightsMat.copyTo(umat_blobs[0]);
fusedWeights = false;
@ -1140,7 +1141,7 @@ public:
if ( umat_blobs.size() < 2 )
umat_blobs.resize(2);
if (use_half)
convertFp16(Mat(biasvec, true), umat_blobs[1]);
Mat(biasvec, true).convertTo(umat_blobs[1], CV_16F);
else
Mat(biasvec, true).copyTo(umat_blobs[1]);
convolutionOp->setBias(true);
@ -1203,7 +1204,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -1883,7 +1884,7 @@ public:
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
return false;
inputs_.getUMatVector(inputs);
@ -1990,7 +1991,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr));
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -55,7 +55,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -37,7 +37,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -337,7 +337,7 @@ public:
std::vector<UMat> outputs;
outs.getUMatVector(outputs);
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
if (use_half)
{
std::vector<UMat> orig_inputs;
@ -345,7 +345,7 @@ public:
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
convertFp16(orig_inputs[i], inputs[i]);
orig_inputs[i].convertTo(inputs[i], CV_32F);
}
else
{
@ -410,7 +410,7 @@ public:
if (use_half)
{
UMat half_umat;
convertFp16(umat, half_umat);
umat.convertTo(half_umat, CV_16F);
outs.assign(std::vector<UMat>(1, half_umat));
}
@ -428,7 +428,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -454,7 +454,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -243,7 +243,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -590,7 +590,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
if ((inputs_.depth() == CV_16F && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
return false;
if (hasVecInput)
@ -610,7 +610,7 @@ public:
size_t localsize[] = { 128 };
size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };
String opts;
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";
else
opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";
@ -636,7 +636,7 @@ public:
}
else
{
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
return false;
float coeff1 = coeffs.empty() ? 1.f : coeffs[0];
@ -689,7 +689,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -105,7 +105,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -357,7 +357,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -385,9 +385,9 @@ public:
if (use_half)
{
convertFp16(A, A_fp32);
convertFp16(B, B_fp32);
convertFp16(C, C_fp32);
A.convertTo(A_fp32, CV_32F);
B.convertTo(B_fp32, CV_32F);
C.convertTo(C_fp32, CV_32F);
}
else
{
@ -398,9 +398,9 @@ public:
cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
if (use_half)
{
convertFp16(A_fp32, A);
convertFp16(B_fp32, B);
convertFp16(C_fp32, C);
A_fp32.convertTo(A, CV_16F);
B_fp32.convertTo(B, CV_16F);
C_fp32.convertTo(C, CV_16F);
}
}
return true;
@ -431,7 +431,7 @@ public:
for (int i = 0; i < umat_blobs.size(); i++)
{
if (!umat_blobs[i].empty())
convertFp16(umat_blobs[i], half_blobs[i]);
umat_blobs[i].convertTo(half_blobs[i], CV_16F);
}
}
@ -472,8 +472,8 @@ public:
if (use_half)
{
convertFp16(srcMat, srcMat_fp32);
convertFp16(dstMat, dstMat_fp32);
srcMat.convertTo(srcMat_fp32, CV_32F);
dstMat.convertTo(dstMat_fp32, CV_32F);
}
else
{
@ -491,8 +491,8 @@ public:
}
if (use_half)
{
convertFp16(srcMat_fp32, srcMat);
convertFp16(dstMat_fp32, dstMat);
srcMat_fp32.convertTo(srcMat, CV_16F);
dstMat_fp32.convertTo(dstMat, CV_16F);
}
}
@ -508,7 +508,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !isMatMul,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -70,7 +70,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -57,12 +57,12 @@ public:
const Mat& inp = inputs[0];
int indicesType = inputs[1].type();
CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16SC1, "");
CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16FC1, "");
Mat indices32S;
if (indicesType == CV_16S/*FP16*/)
if (indicesType == CV_16F/*FP16*/)
{
Mat indicesF32;
convertFp16(inputs[1], indicesF32);
inputs[1].convertTo(indicesF32, CV_32F);
indicesF32.convertTo(indices32S, CV_32S);
}
else

View File

@ -172,7 +172,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -60,7 +60,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
if (inputs_arr.depth() == CV_16F) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
@ -95,7 +95,7 @@ public:
float inv_norm_size = 1.f / norm_size;
// no fp16 support
if (input.depth() == CV_16S) {
if (input.depth() == CV_16F) {
return false;
}

View File

@ -73,7 +73,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -107,7 +107,7 @@ public:
float inv_norm_size = 1.f / norm_size;
// no fp16 support
if (input.depth() == CV_16S) {
if (input.depth() == CV_16F) {
return false;
}

View File

@ -99,7 +99,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -140,7 +140,7 @@ public:
const auto &bias = inputs.size() == 3 ? inputs[2] : UMat::zeros(norm_size, 1, CV_32F);
// no fp16 support
if (input.depth() == CV_16S) {
if (input.depth() == CV_16F) {
return false;
}

View File

@ -121,7 +121,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -166,7 +166,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -119,7 +119,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -154,7 +154,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inputs_arr.depth() == CV_16S);
bool use_half = (inputs_arr.depth() == CV_16F);
inputs_arr.getUMatVector(inputs);
outputs_arr.getUMatVector(outputs);
@ -192,9 +192,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
}
if (use_half) {
convertFp16(A, A_fp32);
convertFp16(B, B_fp32);
convertFp16(C, C_fp32);
A.convertTo(A_fp32, CV_32F);
B.convertTo(B_fp32, CV_32F);
C.convertTo(C_fp32, CV_32F);
} else {
A_fp32 = A;
B_fp32 = B;
@ -203,9 +203,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
cv::gemm(A_fp32, B_fp32, 1.f, noArray(), 0.f, C_fp32);
if (use_half) {
convertFp16(A_fp32, A);
convertFp16(B_fp32, B);
convertFp16(C_fp32, C);
A_fp32.convertTo(A, CV_16F);
B_fp32.convertTo(B, CV_16F);
C_fp32.convertTo(C, CV_16F);
}
}
return true;

View File

@ -75,7 +75,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -149,7 +149,7 @@ public:
UMat& bnorm_bias = umat_shift;
const unsigned LOCAL_SIZE = 128;
bool use_half = (inputs[0].depth() == CV_16S);
bool use_half = (inputs[0].depth() == CV_16F);
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
LOCAL_SIZE
@ -164,7 +164,7 @@ public:
CV_Assert(newRows != 0);
MatShape s = shape(newRows, inpMat.total() / newRows);
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16S : CV_32F);
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16F : CV_32F);
UMat tmpMat = UMat(s[0], s[1], CV_32F);
float alpha = 1.0f / s[1];
@ -226,7 +226,7 @@ public:
if (normVariance && (row_size % 4 == 0) && (plane_size % 4 == 0))
return fast_forward_ocl(inputs, outputs);
if (inputs[0].depth() == CV_16S)
if (inputs[0].depth() == CV_16F)
return false;
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
@ -309,7 +309,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -638,7 +638,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -112,7 +112,7 @@ public:
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
return false;
inputs_.getUMatVector(inputs);
@ -193,7 +193,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -129,17 +129,7 @@ public:
if (paddingType == "constant")
{
if (inputs_arr.depth() == CV_16S)
{
std::vector<float> paddingValue_fp32(1, paddingValue);
std::vector<int16_t> paddingValue_fp16(1);
cv::convertFp16(paddingValue_fp32, paddingValue_fp16);
outputs[0].setTo(paddingValue_fp16[0]);
}
else if (inputs_arr.depth() == CV_8S)
outputs[0].setTo(saturate_cast<int8_t>(paddingValue));
else
outputs[0].setTo(paddingValue);
outputs[0].setTo(paddingValue);
inputs[0].copyTo(outputs[0](dstRanges));
}
else if (paddingType == "reflect" || paddingType == "edge")

View File

@ -319,7 +319,7 @@ public:
mnew_stride.copyTo(unew_stride);
}
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
String opts = format("-DDtype=%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
@ -350,7 +350,7 @@ public:
inputs_arr.depth() != CV_8S,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -293,7 +293,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -353,7 +353,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -346,7 +346,7 @@ public:
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -431,7 +431,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -186,7 +186,7 @@ public:
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
if (inputs_.depth() == CV_16F)
return false;
inputs_.getUMatVector(inputs);
@ -269,7 +269,7 @@ public:
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -390,7 +390,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -906,7 +906,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
@ -1066,7 +1066,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -456,7 +456,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -161,7 +161,7 @@ public:
std::vector<UMat> outputs;
// TODO: implement a logistic activation to classification scores.
if (useLogistic || inps.depth() == CV_16S)
if (useLogistic || inps.depth() == CV_16F)
return false;
inps.getUMatVector(inputs);
@ -232,7 +232,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -184,7 +184,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -115,7 +115,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -107,7 +107,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -74,7 +74,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
if (inputs_arr.depth() == CV_16F) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}

View File

@ -68,7 +68,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
if (inputs_arr.depth() == CV_16F) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}

View File

@ -107,7 +107,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -621,7 +621,7 @@ public:
{
std::vector<int> inpIdx(dimsNum, 0);
std::vector<int> outIdx(dimsNum, 0);
if (inpMat.type() == CV_16S)
if (inpMat.type() == CV_16F)
getSliceRecursive<int16_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
else if (inpMat.type() == CV_8S)
getSliceRecursive<int8_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);

View File

@ -132,7 +132,7 @@ public:
std::vector<UMat> outputs;
std::vector<UMat> internals;
bool use_half = (inputs_.depth() == CV_16S);
bool use_half = (inputs_.depth() == CV_16F);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
@ -217,7 +217,7 @@ public:
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;

View File

@ -514,7 +514,7 @@ void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes)
CV_Assert(layerShapesIt != layersShapes.end());
if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F)
ld.dtype = CV_16S;
ld.dtype = CV_16F;
std::vector<LayerPin> pinsForInternalBlobs;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
@ -572,7 +572,7 @@ void Net::Impl::allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
preferableTarget == DNN_TARGET_OPENCL_FP16 &&
layers[0].dtype == CV_32F)
{
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16F);
}
inputShapes.push_back(shape(inp));
}
@ -656,8 +656,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
{
UMat& u = umat_outputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
if (u.depth() == CV_16F) // FP16
u.convertTo(m, CV_32F);
else
m = u.getMat(ACCESS_READ);
if (!checkRange(m))
@ -679,8 +679,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
{
UMat& u = umat_inputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
if (u.depth() == CV_16F) // FP16
u.convertTo(m, CV_32F);
else
m = u.getMat(ACCESS_READ);
std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
@ -690,8 +690,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
{
UMat& u = umat_outputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
if (u.depth() == CV_16F) // FP16
u.convertTo(m, CV_32F);
else
m = u.getMat(ACCESS_READ);
std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
@ -701,8 +701,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
{
UMat& u = umat_internalBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
if (u.depth() == CV_16F) // FP16
u.convertTo(m, CV_32F);
else
m = u.getMat(ACCESS_READ);
std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
@ -981,12 +981,12 @@ void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputNam
ld.outputBlobsWrappers[i]->copyToHost();
}
}
if (ld.outputBlobs[0].depth() == CV_16S)
if (ld.outputBlobs[0].depth() == CV_16F)
{
std::vector<Mat>& outputvec = *(std::vector<Mat>*)outputBlobs.getObj();
outputvec.resize(ld.outputBlobs.size());
for (int i = 0; i < outputvec.size(); i++)
convertFp16(ld.outputBlobs[i], outputvec[i]);
ld.outputBlobs[i].convertTo(outputvec[i], CV_32F);
}
else
{
@ -1009,7 +1009,7 @@ void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputNam
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
outputvec.resize(out_vec.size());
for (int i = 0; i < out_vec.size(); i++)
convertFp16(out_vec[i], outputvec[i]);
out_vec[i].convertTo(outputvec[i], CV_32F);
}
}
else
@ -1275,7 +1275,7 @@ void Net::Impl::updateLayersShapes()
preferableTarget == DNN_TARGET_OPENCL_FP16 &&
inputLayerData.dtype == CV_32F)
{
inp.create(inp.dims, inp.size, CV_16S);
inp.create(inp.dims, inp.size, CV_16F);
}
inputShapes.push_back(shape(inp));
}
@ -1344,10 +1344,10 @@ Mat Net::Impl::getBlob(const LayerPin& pin) const
ld.outputBlobsWrappers[pin.oid]->copyToHost();
}
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
if (ld.outputBlobs[pin.oid].depth() == CV_16F)
{
Mat output_blob;
convertFp16(ld.outputBlobs[pin.oid], output_blob);
ld.outputBlobs[pin.oid].convertTo(output_blob, CV_32F);
return output_blob;
}
else

View File

@ -156,7 +156,7 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
CHECK_EQ(gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 || gemm_type == GEMM_TYPE_FAST_IMAGE_32_2 ||
gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE, true) << "Invalid fast image gemm type." << std::endl;
bool halfPrecisionMode = (A.depth() == CV_16S);
bool halfPrecisionMode = (A.depth() == CV_16F);
if (is_image_a)
{
@ -439,7 +439,7 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
CHECK_EQ(gemm_type == GEMM_TYPE_FAST_BUFFER, true)
<< "Invalid fast buffer gemm type." << std::endl;
bool halfPrecisionMode = (A.depth() == CV_16S);
bool halfPrecisionMode = (A.depth() == CV_16F);
size_t sub_group_size = 8;
bool is_small_batch = (M == 2 || M == 4 || M == 8);
@ -544,7 +544,7 @@ bool ocl4dnnGEMMCommon(const CBLAS_TRANSPOSE TransB,
const UMat B_image, UMat C,
const size_t max_image_size)
{
bool halfPrecisionMode = (A.depth() == CV_16S);
bool halfPrecisionMode = (A.depth() == CV_16F);
gemm_type_t gemm_type = halfPrecisionMode ? GEMM_TYPE_FAST_BUFFER : GEMM_TYPE_FAST_IMAGE_32_1;
if (gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 ||
@ -594,7 +594,7 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
const int32_t offy)
{
bool ret = false;
bool use_half = (A.depth() == CV_16S);
bool use_half = (A.depth() == CV_16F);
String opts;
if (use_half)
opts = format("-DDtype=%s -DDtype4=%s -Dconvert_Dtype=convert_%s", "half", "half4", "half");
@ -665,7 +665,7 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
const UMat X, const int32_t offX, UMat Y,
const int32_t offY)
{
bool use_half = (X.depth() == CV_16S);
bool use_half = (X.depth() == CV_16F);
String opts;
if (use_half)
opts = "-DDtype=half -DDtype4=half4 -Dconvert_Dtype=convert_half";

View File

@ -582,10 +582,10 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
}
if (use_half_ && !bias.empty())
CV_CheckTypeEQ(bias.type(), CV_16SC1, "");
CV_CheckTypeEQ(bias.type(), CV_16FC1, "");
if (use_half_)
CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
CV_CheckTypeEQ(weight.type(), CV_16FC1, "");
prepareKernel(bottom, top, weight, bias, numImages);
if (bestKernelConfig.empty())
@ -740,7 +740,7 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
if (swizzled_weights_umat.empty())
swizzled_weights_umat.create(1, (int)alignSize(num_output_, 16) * channels_ *
kernel_h_ * (int)alignSize(kernel_w_, 2),
(use_half_) ? CV_16SC1 : CV_32FC1);
(use_half_) ? CV_16FC1 : CV_32FC1);
if (!interleave) {
int32_t channels = channels_ / group_;
@ -777,8 +777,8 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
UMat weight_tmp; // FP32 in half mode, TODO implement FP16 repack
if (use_half_)
{
CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
convertFp16(weight, weight_tmp);
CV_CheckTypeEQ(weight.type(), CV_16FC1, "");
weight.convertTo(weight_tmp, CV_32F);
weightMat = weight_tmp.getMat(ACCESS_READ);
swizzledWeightMat.create(shape(swizzled_weights_umat), CV_32F);
}
@ -817,7 +817,7 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
weightMat.release();
if (use_half_)
convertFp16(swizzledWeightMat, swizzled_weights_umat);
swizzledWeightMat.convertTo(swizzled_weights_umat, CV_16F);
}
return true;
@ -1140,7 +1140,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
//int32_t sz[4] = {numImages, num_output_, output_h_, output_w_};
CV_CheckEQ(top.total(), (size_t)numImages * num_output_ * output_h_ * output_w_, "");
CV_CheckTypeEQ(top.type(), (use_half_) ? CV_16SC1 : CV_32FC1, "");
CV_CheckTypeEQ(top.type(), (use_half_) ? CV_16FC1 : CV_32FC1, "");
top.setTo(Scalar::all(0));
bool saved_tuned = tuned_;
@ -1154,8 +1154,8 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
Mat mat_top, mat_verify_top;
if (use_half_)
{
convertFp16(top, new_top);
convertFp16(verifyTop, new_verify_top);
top.convertTo(new_top, CV_32F);
verifyTop.convertTo(new_verify_top, CV_32F);
mat_top = new_top.getMat(ACCESS_READ);
mat_verify_top = new_verify_top.getMat(ACCESS_READ);
@ -1827,7 +1827,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
if (loadTunedConfig()) // check external storage
return;
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1);
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16FC1 : CV_32FC1);
calculateBenchmark(bottom, benchData, weight, bias, numImages);

View File

@ -102,10 +102,10 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
UMat newbias, tmpTop;
convertFp16(bias, newbias);
convertFp16(top, tmpTop);
bias.convertTo(newbias, CV_32F);
top.convertTo(tmpTop, CV_32F);
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
convertFp16(tmpTop, top);
tmpTop.convertTo(top, CV_16F);
} else {
UMat biasOnesMat = UMat::ones(M_, 1, CV_32F);
cv::gemm(biasOnesMat, bias, 1, top, 1, top, 0);

View File

@ -2443,7 +2443,7 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr
case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break;
case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break;
case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break;
case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16F; break;
case opencv_onnx::TensorProto_DataType_INT8:
case opencv_onnx::TensorProto_DataType_INT16:
case opencv_onnx::TensorProto_DataType_INT32:

View File

@ -915,22 +915,22 @@ Mat getTensorContentRef_(const tensorflow::TensorProto& tensor)
}
case tensorflow::DT_HALF:
{
Mat halfs;
if (!content.empty())
{
static const int kHalfSize = 2;
halfs = Mat(1, content.size() / kHalfSize, CV_16UC1, (void*)content.c_str());
Mat halfs(1, content.size() / kHalfSize, CV_16FC1, (void*)content.c_str());
halfs.convertTo(m, CV_32F);
}
else
{
const RepeatedField<int32_t>& field = tensor.half_val();
CV_Assert(!field.empty());
Mat ints(1, field.size(), CV_32SC1, (void*)field.data());
Mat halfs;
ints.convertTo(halfs, CV_16UC1);
Mat halfsSigned(halfs.size(), CV_16FC1, halfs.data);
halfsSigned.convertTo(m, CV_32F);
}
// Reinterpret as a signed shorts just for a convertFp16 call.
Mat halfsSigned(halfs.size(), CV_16SC1, halfs.data);
convertFp16(halfsSigned, m);
break;
}
case tensorflow::DT_QUINT8:

View File

@ -101,7 +101,7 @@ Mat TFLiteImporter::parseTensor(const Tensor& tensor)
dtype = CV_32S;
break;
case TensorType_FLOAT16:
dtype = CV_16S;
dtype = CV_16F;
break;
case TensorType_INT8:
dtype = CV_8S;
@ -227,7 +227,7 @@ void TFLiteImporter::populateNet()
if (!data.empty()) {
// Dequantize a buffer
Mat dataFP32;
convertFp16(data, dataFP32);
data.convertTo(dataFP32, CV_32F);
allTensors[op_outputs->Get(0)] = dataFP32;
continue;
}

View File

@ -84,7 +84,7 @@ enum TorchType
TYPE_FLOAT = CV_32F,
TYPE_BYTE = CV_8U,
TYPE_CHAR = CV_8S,
TYPE_SHORT = CV_16S,
TYPE_SHORT = CV_16F,
TYPE_INT = CV_32S,
TYPE_LONG = CV_32SC2
};
@ -276,7 +276,7 @@ struct TorchImporter
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
break;
case TYPE_SHORT:
storageMat.create(1, size, CV_16S);
storageMat.create(1, size, CV_16F);
THFile_readShortRaw(file, (short*)storageMat.data, size);
break;
case TYPE_INT:

View File

@ -1613,7 +1613,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
if (inputs_arr.depth() == CV_16F)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;