mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
Merge pull request #24892 from opencv-pushbot:gitee/alalek/dnn_avoid_16s_usage
DNN: avoid CV_16S usage for FP16 #24892 **Merge after**: #24918 TODO: - [x] measure performance changes - [x] optimize convertTo for OpenCL: #24918 12700K iGPU: |Name of Test|0|1|1 vs 0 (x-factor)| |---|:-:|:-:|:-:| |AlexNet::DNNTestNetwork::OCV/OCL_FP16|7.441|7.480|0.99| |CRNN::DNNTestNetwork::OCV/OCL_FP16|10.776|10.736|1.00| |DenseNet_121::DNNTestNetwork::OCV/OCL_FP16|52.762|52.833|1.00| |EAST_text_detection::DNNTestNetwork::OCV/OCL_FP16|60.694|60.721|1.00| |EfficientNet::DNNTestNetwork::OCV/OCL_FP16|33.373|33.173|1.01| |FastNeuralStyle_eccv16::DNNTestNetwork::OCV/OCL_FP16|81.840|81.724|1.00| |GoogLeNet::DNNTestNetwork::OCV/OCL_FP16|20.965|20.927|1.00| |Inception_5h::DNNTestNetwork::OCV/OCL_FP16|22.204|22.173|1.00| |Inception_v2_SSD_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|47.115|47.460|0.99| |MPHand::DNNTestNetwork::OCV/OCL_FP16|6.760|6.670|1.01| |MPPalm::DNNTestNetwork::OCV/OCL_FP16|10.188|10.171|1.00| |MPPose::DNNTestNetwork::OCV/OCL_FP16|12.510|12.561|1.00| |MobileNet_SSD_Caffe::DNNTestNetwork::OCV/OCL_FP16|17.290|17.072|1.01| |MobileNet_SSD_v1_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|19.473|19.306|1.01| |MobileNet_SSD_v2_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|22.874|23.404|0.98| |OpenFace::DNNTestNetwork::OCV/OCL_FP16|9.568|9.517|1.01| |OpenPose_pose_mpi_faster_4_stages::DNNTestNetwork::OCV/OCL_FP16|539.899|539.845|1.00| |PPHumanSeg::DNNTestNetwork::OCV/OCL_FP16|18.015|18.769|0.96| |PPOCRv3::DNNTestNetwork::OCV/OCL_FP16|63.122|63.540|0.99| |ResNet_50::DNNTestNetwork::OCV/OCL_FP16|34.947|34.925|1.00| |SFace::DNNTestNetwork::OCV/OCL_FP16|10.249|10.206|1.00| |SSD::DNNTestNetwork::OCV/OCL_FP16|213.068|213.108|1.00| |SqueezeNet_v1_1::DNNTestNetwork::OCV/OCL_FP16|4.867|4.878|1.00| |VIT_B_32::DNNTestNetwork::OCV/OCL_FP16|200.563|190.788|1.05| |VitTrack::DNNTestNetwork::OCV/OCL_FP16|7.528|7.173|1.05| |YOLOX::DNNTestNetwork::OCV/OCL_FP16|132.858|132.701|1.00| |YOLOv3::DNNTestNetwork::OCV/OCL_FP16|209.559|208.809|1.00| |YOLOv4::DNNTestNetwork::OCV/OCL_FP16|221.357|220.924|1.00| |YOLOv4_tiny::DNNTestNetwork::OCV/OCL_FP16|24.446|24.382|1.00| |YOLOv5::DNNTestNetwork::OCV/OCL_FP16|43.922|44.080|1.00| |YOLOv8::DNNTestNetwork::OCV/OCL_FP16|64.159|63.842|1.00| |YuNet::DNNTestNetwork::OCV/OCL_FP16|10.177|10.231|0.99| |opencv_face_detector::DNNTestNetwork::OCV/OCL_FP16|15.121|15.445|0.98| Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
This commit is contained in:
parent
37156a4719
commit
efc9837df1
@ -279,8 +279,8 @@ public:
|
||||
// Half precision floats.
|
||||
CV_Assert(raw_data.size() / 2 == (int)dstBlob.total());
|
||||
|
||||
Mat halfs((int)shape.size(), &shape[0], CV_16SC1, (void*)raw_data.c_str());
|
||||
convertFp16(halfs, dstBlob);
|
||||
Mat halfs((int)shape.size(), &shape[0], CV_16FC1, (void*)raw_data.c_str());
|
||||
halfs.convertTo(dstBlob, CV_32F);
|
||||
}
|
||||
else if (pbBlob.raw_data_type() == caffe::FLOAT)
|
||||
{
|
||||
|
@ -44,8 +44,8 @@ void shrinkCaffeModel(const String& src, const String& dst, const std::vector<St
|
||||
CV_Assert(blob->data_size() != 0); // float32 array.
|
||||
|
||||
Mat floats(1, blob->data_size(), CV_32FC1, (void*)blob->data().data());
|
||||
Mat halfs(1, blob->data_size(), CV_16SC1);
|
||||
convertFp16(floats, halfs); // Convert to float16.
|
||||
Mat halfs(1, blob->data_size(), CV_16FC1);
|
||||
floats.convertTo(halfs, CV_16F); // Convert to float16.
|
||||
|
||||
blob->clear_data(); // Clear float32 data.
|
||||
|
||||
|
@ -502,7 +502,7 @@ void InfEngineNgraphNet::init(Target targetId)
|
||||
size_t total = ngraph::shape_size(constant->get_shape());
|
||||
Mat floats(1, total, CV_32F, (void*)floatsData);
|
||||
Mat halfs;
|
||||
cv::convertFp16(floats, halfs);
|
||||
floats.convertTo(halfs, CV_16F);
|
||||
|
||||
auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), halfs.data);
|
||||
new_const->set_friendly_name(constant->get_friendly_name());
|
||||
|
@ -135,10 +135,10 @@ public:
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
{
|
||||
UMat inputFp32;
|
||||
convertFp16(inputs[0], inputFp32);
|
||||
inputs[0].convertTo(inputFp32, CV_32F);
|
||||
inputs[0] = inputFp32; // replace
|
||||
}
|
||||
|
||||
@ -264,10 +264,7 @@ public:
|
||||
UMat outputFp32;
|
||||
inputs[0].convertTo(outputFp32, CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
||||
|
||||
if (outputs_.depth() == CV_16S)
|
||||
convertFp16(outputFp32, outputs[0]);
|
||||
else
|
||||
outputFp32.copyTo(outputs[0]);
|
||||
outputFp32.convertTo(outputs[0], outputs_.depth());
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
@ -176,7 +176,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
|
||||
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
@ -192,7 +192,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
|
||||
|
||||
inputs.resize(orig_inputs.size());
|
||||
for (size_t i = 0; i < orig_inputs.size(); i++)
|
||||
convertFp16(orig_inputs[i], inputs[i]);
|
||||
orig_inputs[i].convertTo(inputs[i], CV_32F);
|
||||
|
||||
outputs.resize(orig_outputs.size());
|
||||
for (size_t i = 0; i < orig_outputs.size(); i++)
|
||||
@ -205,7 +205,7 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
|
||||
forward(inputs, outputs, internals);
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
convertFp16(outputs[i], orig_outputs[i]);
|
||||
outputs[i].convertTo(orig_outputs[i], CV_16F);
|
||||
|
||||
// sync results back
|
||||
outputs_arr.assign(orig_outputs);
|
||||
|
@ -146,7 +146,7 @@ struct DataLayer : public Layer
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
bool isFP16 = outputs_arr.depth() == CV_16S;
|
||||
bool isFP16 = outputs_arr.depth() == CV_16F;
|
||||
|
||||
std::vector<Mat> outputs, internals;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
@ -159,7 +159,7 @@ struct DataLayer : public Layer
|
||||
|
||||
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
|
||||
if (isFP16)
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16FC1, "");
|
||||
else
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
|
||||
|
||||
@ -175,7 +175,7 @@ struct DataLayer : public Layer
|
||||
{
|
||||
Mat input_f32;
|
||||
inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale);
|
||||
convertFp16(input_f32, outputs[i]);
|
||||
input_f32.convertTo(outputs[i], CV_16F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -194,7 +194,7 @@ struct DataLayer : public Layer
|
||||
{
|
||||
Mat input_f32;
|
||||
inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale);
|
||||
convertFp16(input_f32, out);
|
||||
input_f32.convertTo(out, CV_16F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -209,7 +209,7 @@ struct DataLayer : public Layer
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
bool isFP16 = outputs_.depth() == CV_16S;
|
||||
bool isFP16 = outputs_.depth() == CV_16F;
|
||||
|
||||
std::vector<UMat> outputs;
|
||||
outputs_.getUMatVector(outputs);
|
||||
@ -223,7 +223,7 @@ struct DataLayer : public Layer
|
||||
|
||||
CV_Assert(mean == Scalar() || inputData.size[1] <= 4);
|
||||
if (isFP16)
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16FC1, "");
|
||||
else
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
|
||||
|
||||
@ -239,7 +239,7 @@ struct DataLayer : public Layer
|
||||
{
|
||||
UMat input_i;
|
||||
inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale);
|
||||
convertFp16(input_i, outputs[i]);
|
||||
input_i.convertTo(outputs[i], CV_16F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -263,7 +263,7 @@ struct DataLayer : public Layer
|
||||
{
|
||||
UMat input_i;
|
||||
inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale);
|
||||
convertFp16(input_i, out);
|
||||
input_i.convertTo(out, CV_16F);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -106,7 +106,7 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer {
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -192,7 +192,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inputs_.depth() == CV_16S);
|
||||
bool use_half = (inputs_.depth() == CV_16F);
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
@ -266,7 +266,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -165,14 +165,14 @@ public:
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
{
|
||||
Mat& inp = inputs[i];
|
||||
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S || inp.type() == CV_8S) &&
|
||||
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16F || inp.type() == CV_8S) &&
|
||||
inp.dims == 4 && inp.size[0] == output.size[0] &&
|
||||
inp.size[2] == output.size[2] &&
|
||||
inp.size[3] == output.size[3] );
|
||||
nchannels += inp.size[1];
|
||||
}
|
||||
CV_Assert( nchannels == output.size[1] );
|
||||
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S || output.type() == CV_8S) );
|
||||
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16F || output.type() == CV_8S) );
|
||||
|
||||
cc.chptrs.resize(nchannels*batchsz);
|
||||
|
||||
@ -223,7 +223,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
|
@ -62,12 +62,12 @@ public:
|
||||
{
|
||||
std::vector<UMat> outputs;
|
||||
outs.getUMatVector(outputs);
|
||||
if (outs.depth() == CV_16S) {
|
||||
if (outs.depth() == CV_16F) {
|
||||
auto blob = blobs[0];
|
||||
if (blob.type() != CV_32F) {
|
||||
blob.convertTo(blob, CV_32F);
|
||||
}
|
||||
convertFp16(blob, outputs[0]);
|
||||
blob.convertTo(outputs[0], CV_16F);
|
||||
}
|
||||
else
|
||||
blobs[0].convertTo(outputs[0], outputs[0].type());
|
||||
|
@ -140,7 +140,7 @@ public:
|
||||
}
|
||||
|
||||
const Mat &input = inputs[0];
|
||||
CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
|
||||
CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16F));
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[i].type() == input.type());
|
||||
@ -1023,7 +1023,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
@ -1037,6 +1037,7 @@ public:
|
||||
umat_blobs.resize(n);
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
CV_Assert(!use_half); // TODO: not implemented
|
||||
inputs[i + 1].copyTo(umat_blobs[i]);
|
||||
}
|
||||
inputs.resize(1);
|
||||
@ -1049,7 +1050,7 @@ public:
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
if (use_half)
|
||||
convertFp16(blobs[i], umat_blobs[i]);
|
||||
blobs[i].convertTo(umat_blobs[i], CV_16F);
|
||||
else
|
||||
blobs[i].copyTo(umat_blobs[i]);
|
||||
}
|
||||
@ -1130,7 +1131,7 @@ public:
|
||||
if (fusedWeights)
|
||||
{
|
||||
if (use_half)
|
||||
convertFp16(weightsMat, umat_blobs[0]);
|
||||
weightsMat.convertTo(umat_blobs[0], CV_16F);
|
||||
else
|
||||
weightsMat.copyTo(umat_blobs[0]);
|
||||
fusedWeights = false;
|
||||
@ -1140,7 +1141,7 @@ public:
|
||||
if ( umat_blobs.size() < 2 )
|
||||
umat_blobs.resize(2);
|
||||
if (use_half)
|
||||
convertFp16(Mat(biasvec, true), umat_blobs[1]);
|
||||
Mat(biasvec, true).convertTo(umat_blobs[1], CV_16F);
|
||||
else
|
||||
Mat(biasvec, true).copyTo(umat_blobs[1]);
|
||||
convolutionOp->setBias(true);
|
||||
@ -1203,7 +1204,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -1883,7 +1884,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
@ -1990,7 +1991,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr));
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -337,7 +337,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
if (use_half)
|
||||
{
|
||||
std::vector<UMat> orig_inputs;
|
||||
@ -345,7 +345,7 @@ public:
|
||||
|
||||
inputs.resize(orig_inputs.size());
|
||||
for (size_t i = 0; i < orig_inputs.size(); i++)
|
||||
convertFp16(orig_inputs[i], inputs[i]);
|
||||
orig_inputs[i].convertTo(inputs[i], CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -410,7 +410,7 @@ public:
|
||||
if (use_half)
|
||||
{
|
||||
UMat half_umat;
|
||||
convertFp16(umat, half_umat);
|
||||
umat.convertTo(half_umat, CV_16F);
|
||||
outs.assign(std::vector<UMat>(1, half_umat));
|
||||
}
|
||||
|
||||
@ -428,7 +428,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
}
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -454,7 +454,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -243,7 +243,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
|
||||
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -590,7 +590,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
|
||||
if ((inputs_.depth() == CV_16F && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
|
||||
return false;
|
||||
|
||||
if (hasVecInput)
|
||||
@ -610,7 +610,7 @@ public:
|
||||
size_t localsize[] = { 128 };
|
||||
size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };
|
||||
String opts;
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";
|
||||
else
|
||||
opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";
|
||||
@ -636,7 +636,7 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
float coeff1 = coeffs.empty() ? 1.f : coeffs[0];
|
||||
@ -689,7 +689,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -105,7 +105,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -357,7 +357,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
@ -385,9 +385,9 @@ public:
|
||||
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(A, A_fp32);
|
||||
convertFp16(B, B_fp32);
|
||||
convertFp16(C, C_fp32);
|
||||
A.convertTo(A_fp32, CV_32F);
|
||||
B.convertTo(B_fp32, CV_32F);
|
||||
C.convertTo(C_fp32, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -398,9 +398,9 @@ public:
|
||||
cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(A_fp32, A);
|
||||
convertFp16(B_fp32, B);
|
||||
convertFp16(C_fp32, C);
|
||||
A_fp32.convertTo(A, CV_16F);
|
||||
B_fp32.convertTo(B, CV_16F);
|
||||
C_fp32.convertTo(C, CV_16F);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -431,7 +431,7 @@ public:
|
||||
for (int i = 0; i < umat_blobs.size(); i++)
|
||||
{
|
||||
if (!umat_blobs[i].empty())
|
||||
convertFp16(umat_blobs[i], half_blobs[i]);
|
||||
umat_blobs[i].convertTo(half_blobs[i], CV_16F);
|
||||
}
|
||||
}
|
||||
|
||||
@ -472,8 +472,8 @@ public:
|
||||
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat, srcMat_fp32);
|
||||
convertFp16(dstMat, dstMat_fp32);
|
||||
srcMat.convertTo(srcMat_fp32, CV_32F);
|
||||
dstMat.convertTo(dstMat_fp32, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -491,8 +491,8 @@ public:
|
||||
}
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat_fp32, srcMat);
|
||||
convertFp16(dstMat_fp32, dstMat);
|
||||
srcMat_fp32.convertTo(srcMat, CV_16F);
|
||||
dstMat_fp32.convertTo(dstMat, CV_16F);
|
||||
}
|
||||
}
|
||||
|
||||
@ -508,7 +508,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !isMatMul,
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -70,7 +70,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -57,12 +57,12 @@ public:
|
||||
const Mat& inp = inputs[0];
|
||||
|
||||
int indicesType = inputs[1].type();
|
||||
CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16SC1, "");
|
||||
CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16FC1, "");
|
||||
Mat indices32S;
|
||||
if (indicesType == CV_16S/*FP16*/)
|
||||
if (indicesType == CV_16F/*FP16*/)
|
||||
{
|
||||
Mat indicesF32;
|
||||
convertFp16(inputs[1], indicesF32);
|
||||
inputs[1].convertTo(indicesF32, CV_32F);
|
||||
indicesF32.convertTo(indices32S, CV_32S);
|
||||
}
|
||||
else
|
||||
|
@ -172,7 +172,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
if (inputs_arr.depth() == CV_16F) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
@ -95,7 +95,7 @@ public:
|
||||
float inv_norm_size = 1.f / norm_size;
|
||||
|
||||
// no fp16 support
|
||||
if (input.depth() == CV_16S) {
|
||||
if (input.depth() == CV_16F) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -107,7 +107,7 @@ public:
|
||||
float inv_norm_size = 1.f / norm_size;
|
||||
|
||||
// no fp16 support
|
||||
if (input.depth() == CV_16S) {
|
||||
if (input.depth() == CV_16F) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -99,7 +99,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -140,7 +140,7 @@ public:
|
||||
const auto &bias = inputs.size() == 3 ? inputs[2] : UMat::zeros(norm_size, 1, CV_32F);
|
||||
|
||||
// no fp16 support
|
||||
if (input.depth() == CV_16S) {
|
||||
if (input.depth() == CV_16F) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -121,7 +121,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
@ -166,7 +166,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -119,7 +119,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -154,7 +154,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inputs_arr.depth() == CV_16S);
|
||||
bool use_half = (inputs_arr.depth() == CV_16F);
|
||||
inputs_arr.getUMatVector(inputs);
|
||||
outputs_arr.getUMatVector(outputs);
|
||||
|
||||
@ -192,9 +192,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
}
|
||||
|
||||
if (use_half) {
|
||||
convertFp16(A, A_fp32);
|
||||
convertFp16(B, B_fp32);
|
||||
convertFp16(C, C_fp32);
|
||||
A.convertTo(A_fp32, CV_32F);
|
||||
B.convertTo(B_fp32, CV_32F);
|
||||
C.convertTo(C_fp32, CV_32F);
|
||||
} else {
|
||||
A_fp32 = A;
|
||||
B_fp32 = B;
|
||||
@ -203,9 +203,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
|
||||
cv::gemm(A_fp32, B_fp32, 1.f, noArray(), 0.f, C_fp32);
|
||||
if (use_half) {
|
||||
convertFp16(A_fp32, A);
|
||||
convertFp16(B_fp32, B);
|
||||
convertFp16(C_fp32, C);
|
||||
A_fp32.convertTo(A, CV_16F);
|
||||
B_fp32.convertTo(B, CV_16F);
|
||||
C_fp32.convertTo(C, CV_16F);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -149,7 +149,7 @@ public:
|
||||
UMat& bnorm_bias = umat_shift;
|
||||
|
||||
const unsigned LOCAL_SIZE = 128;
|
||||
bool use_half = (inputs[0].depth() == CV_16S);
|
||||
bool use_half = (inputs[0].depth() == CV_16F);
|
||||
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
|
||||
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
|
||||
LOCAL_SIZE
|
||||
@ -164,7 +164,7 @@ public:
|
||||
CV_Assert(newRows != 0);
|
||||
|
||||
MatShape s = shape(newRows, inpMat.total() / newRows);
|
||||
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16S : CV_32F);
|
||||
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16F : CV_32F);
|
||||
UMat tmpMat = UMat(s[0], s[1], CV_32F);
|
||||
float alpha = 1.0f / s[1];
|
||||
|
||||
@ -226,7 +226,7 @@ public:
|
||||
if (normVariance && (row_size % 4 == 0) && (plane_size % 4 == 0))
|
||||
return fast_forward_ocl(inputs, outputs);
|
||||
|
||||
if (inputs[0].depth() == CV_16S)
|
||||
if (inputs[0].depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
|
||||
@ -309,7 +309,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -638,7 +638,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -112,7 +112,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
@ -193,7 +193,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -129,17 +129,7 @@ public:
|
||||
|
||||
if (paddingType == "constant")
|
||||
{
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
std::vector<float> paddingValue_fp32(1, paddingValue);
|
||||
std::vector<int16_t> paddingValue_fp16(1);
|
||||
cv::convertFp16(paddingValue_fp32, paddingValue_fp16);
|
||||
outputs[0].setTo(paddingValue_fp16[0]);
|
||||
}
|
||||
else if (inputs_arr.depth() == CV_8S)
|
||||
outputs[0].setTo(saturate_cast<int8_t>(paddingValue));
|
||||
else
|
||||
outputs[0].setTo(paddingValue);
|
||||
outputs[0].setTo(paddingValue);
|
||||
inputs[0].copyTo(outputs[0](dstRanges));
|
||||
}
|
||||
else if (paddingType == "reflect" || paddingType == "edge")
|
||||
|
@ -319,7 +319,7 @@ public:
|
||||
mnew_stride.copyTo(unew_stride);
|
||||
}
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
String opts = format("-DDtype=%s", use_half ? "half" : "float");
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
@ -350,7 +350,7 @@ public:
|
||||
inputs_arr.depth() != CV_8S,
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -293,7 +293,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
@ -353,7 +353,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
}
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -346,7 +346,7 @@ public:
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
bool use_half = (inps.depth() == CV_16F);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
@ -431,7 +431,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -186,7 +186,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
if (inputs_.depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
@ -269,7 +269,7 @@ public:
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -390,7 +390,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -906,7 +906,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
@ -1066,7 +1066,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -456,7 +456,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -161,7 +161,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
// TODO: implement a logistic activation to classification scores.
|
||||
if (useLogistic || inps.depth() == CV_16S)
|
||||
if (useLogistic || inps.depth() == CV_16F)
|
||||
return false;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
@ -232,7 +232,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -184,7 +184,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -115,7 +115,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
if (inputs_arr.depth() == CV_16F) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
if (inputs_arr.depth() == CV_16F) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -621,7 +621,7 @@ public:
|
||||
{
|
||||
std::vector<int> inpIdx(dimsNum, 0);
|
||||
std::vector<int> outIdx(dimsNum, 0);
|
||||
if (inpMat.type() == CV_16S)
|
||||
if (inpMat.type() == CV_16F)
|
||||
getSliceRecursive<int16_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
|
||||
else if (inpMat.type() == CV_8S)
|
||||
getSliceRecursive<int8_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
|
||||
|
@ -132,7 +132,7 @@ public:
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
bool use_half = (inputs_.depth() == CV_16S);
|
||||
bool use_half = (inputs_.depth() == CV_16F);
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
@ -217,7 +217,7 @@ public:
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
@ -514,7 +514,7 @@ void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes)
|
||||
CV_Assert(layerShapesIt != layersShapes.end());
|
||||
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F)
|
||||
ld.dtype = CV_16S;
|
||||
ld.dtype = CV_16F;
|
||||
|
||||
std::vector<LayerPin> pinsForInternalBlobs;
|
||||
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
|
||||
@ -572,7 +572,7 @@ void Net::Impl::allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16 &&
|
||||
layers[0].dtype == CV_32F)
|
||||
{
|
||||
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
|
||||
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16F);
|
||||
}
|
||||
inputShapes.push_back(shape(inp));
|
||||
}
|
||||
@ -656,8 +656,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
|
||||
{
|
||||
UMat& u = umat_outputBlobs[i];
|
||||
Mat m;
|
||||
if (u.depth() == CV_16S) // FP16
|
||||
convertFp16(u, m);
|
||||
if (u.depth() == CV_16F) // FP16
|
||||
u.convertTo(m, CV_32F);
|
||||
else
|
||||
m = u.getMat(ACCESS_READ);
|
||||
if (!checkRange(m))
|
||||
@ -679,8 +679,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
|
||||
{
|
||||
UMat& u = umat_inputBlobs[i];
|
||||
Mat m;
|
||||
if (u.depth() == CV_16S) // FP16
|
||||
convertFp16(u, m);
|
||||
if (u.depth() == CV_16F) // FP16
|
||||
u.convertTo(m, CV_32F);
|
||||
else
|
||||
m = u.getMat(ACCESS_READ);
|
||||
std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
|
||||
@ -690,8 +690,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
|
||||
{
|
||||
UMat& u = umat_outputBlobs[i];
|
||||
Mat m;
|
||||
if (u.depth() == CV_16S) // FP16
|
||||
convertFp16(u, m);
|
||||
if (u.depth() == CV_16F) // FP16
|
||||
u.convertTo(m, CV_32F);
|
||||
else
|
||||
m = u.getMat(ACCESS_READ);
|
||||
std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
|
||||
@ -701,8 +701,8 @@ void Net::Impl::forwardLayer(LayerData& ld)
|
||||
{
|
||||
UMat& u = umat_internalBlobs[i];
|
||||
Mat m;
|
||||
if (u.depth() == CV_16S) // FP16
|
||||
convertFp16(u, m);
|
||||
if (u.depth() == CV_16F) // FP16
|
||||
u.convertTo(m, CV_32F);
|
||||
else
|
||||
m = u.getMat(ACCESS_READ);
|
||||
std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
|
||||
@ -981,12 +981,12 @@ void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputNam
|
||||
ld.outputBlobsWrappers[i]->copyToHost();
|
||||
}
|
||||
}
|
||||
if (ld.outputBlobs[0].depth() == CV_16S)
|
||||
if (ld.outputBlobs[0].depth() == CV_16F)
|
||||
{
|
||||
std::vector<Mat>& outputvec = *(std::vector<Mat>*)outputBlobs.getObj();
|
||||
outputvec.resize(ld.outputBlobs.size());
|
||||
for (int i = 0; i < outputvec.size(); i++)
|
||||
convertFp16(ld.outputBlobs[i], outputvec[i]);
|
||||
ld.outputBlobs[i].convertTo(outputvec[i], CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1009,7 +1009,7 @@ void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputNam
|
||||
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
outputvec.resize(out_vec.size());
|
||||
for (int i = 0; i < out_vec.size(); i++)
|
||||
convertFp16(out_vec[i], outputvec[i]);
|
||||
out_vec[i].convertTo(outputvec[i], CV_32F);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1275,7 +1275,7 @@ void Net::Impl::updateLayersShapes()
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16 &&
|
||||
inputLayerData.dtype == CV_32F)
|
||||
{
|
||||
inp.create(inp.dims, inp.size, CV_16S);
|
||||
inp.create(inp.dims, inp.size, CV_16F);
|
||||
}
|
||||
inputShapes.push_back(shape(inp));
|
||||
}
|
||||
@ -1344,10 +1344,10 @@ Mat Net::Impl::getBlob(const LayerPin& pin) const
|
||||
ld.outputBlobsWrappers[pin.oid]->copyToHost();
|
||||
}
|
||||
|
||||
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
|
||||
if (ld.outputBlobs[pin.oid].depth() == CV_16F)
|
||||
{
|
||||
Mat output_blob;
|
||||
convertFp16(ld.outputBlobs[pin.oid], output_blob);
|
||||
ld.outputBlobs[pin.oid].convertTo(output_blob, CV_32F);
|
||||
return output_blob;
|
||||
}
|
||||
else
|
||||
|
@ -156,7 +156,7 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
|
||||
CHECK_EQ(gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 || gemm_type == GEMM_TYPE_FAST_IMAGE_32_2 ||
|
||||
gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE, true) << "Invalid fast image gemm type." << std::endl;
|
||||
|
||||
bool halfPrecisionMode = (A.depth() == CV_16S);
|
||||
bool halfPrecisionMode = (A.depth() == CV_16F);
|
||||
|
||||
if (is_image_a)
|
||||
{
|
||||
@ -439,7 +439,7 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
|
||||
CHECK_EQ(gemm_type == GEMM_TYPE_FAST_BUFFER, true)
|
||||
<< "Invalid fast buffer gemm type." << std::endl;
|
||||
|
||||
bool halfPrecisionMode = (A.depth() == CV_16S);
|
||||
bool halfPrecisionMode = (A.depth() == CV_16F);
|
||||
|
||||
size_t sub_group_size = 8;
|
||||
bool is_small_batch = (M == 2 || M == 4 || M == 8);
|
||||
@ -544,7 +544,7 @@ bool ocl4dnnGEMMCommon(const CBLAS_TRANSPOSE TransB,
|
||||
const UMat B_image, UMat C,
|
||||
const size_t max_image_size)
|
||||
{
|
||||
bool halfPrecisionMode = (A.depth() == CV_16S);
|
||||
bool halfPrecisionMode = (A.depth() == CV_16F);
|
||||
gemm_type_t gemm_type = halfPrecisionMode ? GEMM_TYPE_FAST_BUFFER : GEMM_TYPE_FAST_IMAGE_32_1;
|
||||
|
||||
if (gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 ||
|
||||
@ -594,7 +594,7 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
|
||||
const int32_t offy)
|
||||
{
|
||||
bool ret = false;
|
||||
bool use_half = (A.depth() == CV_16S);
|
||||
bool use_half = (A.depth() == CV_16F);
|
||||
String opts;
|
||||
if (use_half)
|
||||
opts = format("-DDtype=%s -DDtype4=%s -Dconvert_Dtype=convert_%s", "half", "half4", "half");
|
||||
@ -665,7 +665,7 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
|
||||
const UMat X, const int32_t offX, UMat Y,
|
||||
const int32_t offY)
|
||||
{
|
||||
bool use_half = (X.depth() == CV_16S);
|
||||
bool use_half = (X.depth() == CV_16F);
|
||||
String opts;
|
||||
if (use_half)
|
||||
opts = "-DDtype=half -DDtype4=half4 -Dconvert_Dtype=convert_half";
|
||||
|
@ -582,10 +582,10 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
|
||||
}
|
||||
|
||||
if (use_half_ && !bias.empty())
|
||||
CV_CheckTypeEQ(bias.type(), CV_16SC1, "");
|
||||
CV_CheckTypeEQ(bias.type(), CV_16FC1, "");
|
||||
|
||||
if (use_half_)
|
||||
CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
|
||||
CV_CheckTypeEQ(weight.type(), CV_16FC1, "");
|
||||
|
||||
prepareKernel(bottom, top, weight, bias, numImages);
|
||||
if (bestKernelConfig.empty())
|
||||
@ -740,7 +740,7 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
|
||||
if (swizzled_weights_umat.empty())
|
||||
swizzled_weights_umat.create(1, (int)alignSize(num_output_, 16) * channels_ *
|
||||
kernel_h_ * (int)alignSize(kernel_w_, 2),
|
||||
(use_half_) ? CV_16SC1 : CV_32FC1);
|
||||
(use_half_) ? CV_16FC1 : CV_32FC1);
|
||||
|
||||
if (!interleave) {
|
||||
int32_t channels = channels_ / group_;
|
||||
@ -777,8 +777,8 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
|
||||
UMat weight_tmp; // FP32 in half mode, TODO implement FP16 repack
|
||||
if (use_half_)
|
||||
{
|
||||
CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
|
||||
convertFp16(weight, weight_tmp);
|
||||
CV_CheckTypeEQ(weight.type(), CV_16FC1, "");
|
||||
weight.convertTo(weight_tmp, CV_32F);
|
||||
weightMat = weight_tmp.getMat(ACCESS_READ);
|
||||
swizzledWeightMat.create(shape(swizzled_weights_umat), CV_32F);
|
||||
}
|
||||
@ -817,7 +817,7 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
|
||||
weightMat.release();
|
||||
|
||||
if (use_half_)
|
||||
convertFp16(swizzledWeightMat, swizzled_weights_umat);
|
||||
swizzledWeightMat.convertTo(swizzled_weights_umat, CV_16F);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1140,7 +1140,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
|
||||
|
||||
//int32_t sz[4] = {numImages, num_output_, output_h_, output_w_};
|
||||
CV_CheckEQ(top.total(), (size_t)numImages * num_output_ * output_h_ * output_w_, "");
|
||||
CV_CheckTypeEQ(top.type(), (use_half_) ? CV_16SC1 : CV_32FC1, "");
|
||||
CV_CheckTypeEQ(top.type(), (use_half_) ? CV_16FC1 : CV_32FC1, "");
|
||||
top.setTo(Scalar::all(0));
|
||||
|
||||
bool saved_tuned = tuned_;
|
||||
@ -1154,8 +1154,8 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
|
||||
Mat mat_top, mat_verify_top;
|
||||
if (use_half_)
|
||||
{
|
||||
convertFp16(top, new_top);
|
||||
convertFp16(verifyTop, new_verify_top);
|
||||
top.convertTo(new_top, CV_32F);
|
||||
verifyTop.convertTo(new_verify_top, CV_32F);
|
||||
|
||||
mat_top = new_top.getMat(ACCESS_READ);
|
||||
mat_verify_top = new_verify_top.getMat(ACCESS_READ);
|
||||
@ -1827,7 +1827,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
|
||||
if (loadTunedConfig()) // check external storage
|
||||
return;
|
||||
|
||||
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1);
|
||||
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16FC1 : CV_32FC1);
|
||||
|
||||
calculateBenchmark(bottom, benchData, weight, bias, numImages);
|
||||
|
||||
|
@ -102,10 +102,10 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
|
||||
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
|
||||
UMat newbias, tmpTop;
|
||||
|
||||
convertFp16(bias, newbias);
|
||||
convertFp16(top, tmpTop);
|
||||
bias.convertTo(newbias, CV_32F);
|
||||
top.convertTo(tmpTop, CV_32F);
|
||||
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
|
||||
convertFp16(tmpTop, top);
|
||||
tmpTop.convertTo(top, CV_16F);
|
||||
} else {
|
||||
UMat biasOnesMat = UMat::ones(M_, 1, CV_32F);
|
||||
cv::gemm(biasOnesMat, bias, 1, top, 1, top, 0);
|
||||
|
@ -2443,7 +2443,7 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break;
|
||||
case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break;
|
||||
case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break;
|
||||
case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
|
||||
case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16F; break;
|
||||
case opencv_onnx::TensorProto_DataType_INT8:
|
||||
case opencv_onnx::TensorProto_DataType_INT16:
|
||||
case opencv_onnx::TensorProto_DataType_INT32:
|
||||
|
@ -915,22 +915,22 @@ Mat getTensorContentRef_(const tensorflow::TensorProto& tensor)
|
||||
}
|
||||
case tensorflow::DT_HALF:
|
||||
{
|
||||
Mat halfs;
|
||||
if (!content.empty())
|
||||
{
|
||||
static const int kHalfSize = 2;
|
||||
halfs = Mat(1, content.size() / kHalfSize, CV_16UC1, (void*)content.c_str());
|
||||
Mat halfs(1, content.size() / kHalfSize, CV_16FC1, (void*)content.c_str());
|
||||
halfs.convertTo(m, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
const RepeatedField<int32_t>& field = tensor.half_val();
|
||||
CV_Assert(!field.empty());
|
||||
Mat ints(1, field.size(), CV_32SC1, (void*)field.data());
|
||||
Mat halfs;
|
||||
ints.convertTo(halfs, CV_16UC1);
|
||||
Mat halfsSigned(halfs.size(), CV_16FC1, halfs.data);
|
||||
halfsSigned.convertTo(m, CV_32F);
|
||||
}
|
||||
// Reinterpret as a signed shorts just for a convertFp16 call.
|
||||
Mat halfsSigned(halfs.size(), CV_16SC1, halfs.data);
|
||||
convertFp16(halfsSigned, m);
|
||||
break;
|
||||
}
|
||||
case tensorflow::DT_QUINT8:
|
||||
|
@ -101,7 +101,7 @@ Mat TFLiteImporter::parseTensor(const Tensor& tensor)
|
||||
dtype = CV_32S;
|
||||
break;
|
||||
case TensorType_FLOAT16:
|
||||
dtype = CV_16S;
|
||||
dtype = CV_16F;
|
||||
break;
|
||||
case TensorType_INT8:
|
||||
dtype = CV_8S;
|
||||
@ -227,7 +227,7 @@ void TFLiteImporter::populateNet()
|
||||
if (!data.empty()) {
|
||||
// Dequantize a buffer
|
||||
Mat dataFP32;
|
||||
convertFp16(data, dataFP32);
|
||||
data.convertTo(dataFP32, CV_32F);
|
||||
allTensors[op_outputs->Get(0)] = dataFP32;
|
||||
continue;
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ enum TorchType
|
||||
TYPE_FLOAT = CV_32F,
|
||||
TYPE_BYTE = CV_8U,
|
||||
TYPE_CHAR = CV_8S,
|
||||
TYPE_SHORT = CV_16S,
|
||||
TYPE_SHORT = CV_16F,
|
||||
TYPE_INT = CV_32S,
|
||||
TYPE_LONG = CV_32SC2
|
||||
};
|
||||
@ -276,7 +276,7 @@ struct TorchImporter
|
||||
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
|
||||
break;
|
||||
case TYPE_SHORT:
|
||||
storageMat.create(1, size, CV_16S);
|
||||
storageMat.create(1, size, CV_16F);
|
||||
THFile_readShortRaw(file, (short*)storageMat.data, size);
|
||||
break;
|
||||
case TYPE_INT:
|
||||
|
@ -1613,7 +1613,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
|
Loading…
Reference in New Issue
Block a user