mirror of
https://github.com/opencv/opencv.git
synced 2025-06-09 18:43:05 +08:00
Merge pull request #22666 from zihaomu:support_onnx_qdq_model
DNN: let Quant and Dequant of ONNX_importer support the Constant input. * let Quant and Dequant support the Constant input. * fix negative value of axis.
This commit is contained in:
parent
540aa13300
commit
903bf0147e
@ -422,16 +422,16 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
class CV_EXPORTS QuantizeLayer : public Layer
|
class CV_EXPORTS QuantizeLayer : public Layer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
float scale;
|
std::vector<float> scales;
|
||||||
int zeropoint;
|
std::vector<int> zeropoints;
|
||||||
static Ptr<QuantizeLayer> create(const LayerParams ¶ms);
|
static Ptr<QuantizeLayer> create(const LayerParams ¶ms);
|
||||||
};
|
};
|
||||||
|
|
||||||
class CV_EXPORTS DequantizeLayer : public Layer
|
class CV_EXPORTS DequantizeLayer : public Layer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
float scale;
|
std::vector<float> scales;
|
||||||
int zeropoint;
|
std::vector<int> zeropoints;
|
||||||
static Ptr<DequantizeLayer> create(const LayerParams ¶ms);
|
static Ptr<DequantizeLayer> create(const LayerParams ¶ms);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -11,14 +11,88 @@ namespace cv
|
|||||||
namespace dnn
|
namespace dnn
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis)
|
||||||
|
{
|
||||||
|
// The data is the 1-D scales or zeropoints.
|
||||||
|
CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]);
|
||||||
|
std::vector<int> broadcast_axes;
|
||||||
|
for (int i = 0; i < targetShape.size(); i++)
|
||||||
|
{
|
||||||
|
if (i != axis)
|
||||||
|
broadcast_axes.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
MatShape subTargetShape = shape(data);
|
||||||
|
|
||||||
|
// convert std::vector to 1D Mat.
|
||||||
|
for (auto broadcast_axis : broadcast_axes)
|
||||||
|
{
|
||||||
|
subTargetShape[broadcast_axis] = targetShape[broadcast_axis];
|
||||||
|
data = data.reshape(0, total(data, 0, broadcast_axis));
|
||||||
|
Mat tmp = cv::repeat(data, 1, subTargetShape[broadcast_axis]);
|
||||||
|
data = tmp.reshape(0, subTargetShape);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector<float>& scales,
|
||||||
|
const std::vector<int>& zeropoints, const MatShape& targetShape, int axis)
|
||||||
|
{
|
||||||
|
// broad cast the scales and zeropoint to the input shape.
|
||||||
|
MatShape subTargetShape(targetShape.size(), 1);
|
||||||
|
subTargetShape[axis] = scales.size();
|
||||||
|
|
||||||
|
zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
|
||||||
|
scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
|
||||||
|
|
||||||
|
const int len = scales.size();
|
||||||
|
// Deep copy the scales and zeropoint data and prevent the original data from being changed.
|
||||||
|
|
||||||
|
float * scalePtr = scalesMat.ptr<float>(0);
|
||||||
|
for (int i = 0; i < len; i++)
|
||||||
|
scalePtr[i] = scales[i];
|
||||||
|
|
||||||
|
float * zpPtr = zeropointsMat.ptr<float>(0);
|
||||||
|
for (int i = 0; i < len; i++)
|
||||||
|
zpPtr[i] = (float )zeropoints[i];
|
||||||
|
|
||||||
|
broadcast1D2TargetMat(scalesMat, targetShape, axis);
|
||||||
|
broadcast1D2TargetMat(zeropointsMat, targetShape, axis);
|
||||||
|
}
|
||||||
|
|
||||||
// Quantize FP32/FP16 Inputs to INT8
|
// Quantize FP32/FP16 Inputs to INT8
|
||||||
class QuantizeLayerImpl CV_FINAL : public QuantizeLayer
|
class QuantizeLayerImpl CV_FINAL : public QuantizeLayer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
int axis;
|
||||||
|
bool is1D;
|
||||||
|
Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
|
||||||
|
|
||||||
QuantizeLayerImpl(const LayerParams& params)
|
QuantizeLayerImpl(const LayerParams& params)
|
||||||
{
|
{
|
||||||
scale = params.get<float>("scales", 1.0f);
|
is1D = params.get<bool>("is1D", false);
|
||||||
zeropoint = params.get<int>("zeropoints", 0);
|
axis = params.get<int>("axis", 1);
|
||||||
|
if (!is1D)
|
||||||
|
{
|
||||||
|
scales.push_back(params.get<float>("scales", 1.0f));
|
||||||
|
zeropoints.push_back(params.get<int>("zeropoints", 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DictValue paramScales = params.get("scales");
|
||||||
|
int i, n = paramScales.size();
|
||||||
|
|
||||||
|
CV_Assert(n > 0);
|
||||||
|
scales.resize(n, 0.);
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
scales[i] = paramScales.get<float>(i);
|
||||||
|
|
||||||
|
zeropoints.resize(n, 0);
|
||||||
|
DictValue paramZp = params.get("zeropoints");
|
||||||
|
n = paramZp.size();
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
zeropoints[i] = paramZp.get<int>(i);
|
||||||
|
}
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,6 +116,14 @@ public:
|
|||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
|
axis = normalize_axis(axis, shape(inputs[0]).size());
|
||||||
|
|
||||||
|
if (is1D)
|
||||||
|
{
|
||||||
|
MatShape inputShape = shape(inputs[0]);
|
||||||
|
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
@ -58,7 +140,7 @@ public:
|
|||||||
inputs[0] = inputFp32; // replace
|
inputs[0] = inputFp32; // replace
|
||||||
}
|
}
|
||||||
|
|
||||||
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint);
|
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -68,14 +150,26 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D,
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint);
|
if (outputs[0].depth() != CV_8S)
|
||||||
|
outputs[0].convertTo(outputs[0], CV_8S);
|
||||||
|
|
||||||
|
if (is1D)
|
||||||
|
{
|
||||||
|
Mat inputTmp;
|
||||||
|
divide(inputs[0], scalesMat, inputTmp);
|
||||||
|
subtract(inputTmp, zeropointsMat, inputTmp);
|
||||||
|
|
||||||
|
inputTmp.convertTo(outputs[0], CV_8S);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -83,10 +177,38 @@ public:
|
|||||||
class DequantizeLayerImpl CV_FINAL : public DequantizeLayer
|
class DequantizeLayerImpl CV_FINAL : public DequantizeLayer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
int axis;
|
||||||
|
bool is1D;
|
||||||
|
Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
|
||||||
|
|
||||||
DequantizeLayerImpl(const LayerParams& params)
|
DequantizeLayerImpl(const LayerParams& params)
|
||||||
{
|
{
|
||||||
scale = params.get<float>("scales", 1.0f);
|
is1D = params.get<bool>("is1D", false);
|
||||||
zeropoint = params.get<int>("zeropoints", 0);
|
axis = params.get<int>("axis", 1);
|
||||||
|
|
||||||
|
if (!is1D)
|
||||||
|
{
|
||||||
|
scales.push_back(params.get<float>("scales", 1.0f));
|
||||||
|
zeropoints.push_back(params.get<int>("zeropoints", 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DictValue paramScales = params.get("scales");
|
||||||
|
int i, n = paramScales.size();
|
||||||
|
|
||||||
|
CV_Assert(n > 0);
|
||||||
|
scales.resize(n);
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
scales[i] = paramScales.get<float>(i);
|
||||||
|
|
||||||
|
zeropoints.resize(n, 0);
|
||||||
|
DictValue paramZp = params.get("zeropoints");
|
||||||
|
n = paramZp.size();
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
zeropoints[i] = paramZp.get<int>(i);
|
||||||
|
}
|
||||||
|
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,6 +232,14 @@ public:
|
|||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
|
axis = normalize_axis(axis, shape(inputs[0]).size());
|
||||||
|
|
||||||
|
if (is1D)
|
||||||
|
{
|
||||||
|
MatShape inputShape = shape(inputs[0]);
|
||||||
|
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
@ -120,7 +250,7 @@ public:
|
|||||||
outputs_.getUMatVector(outputs);
|
outputs_.getUMatVector(outputs);
|
||||||
|
|
||||||
UMat outputFp32;
|
UMat outputFp32;
|
||||||
inputs[0].convertTo(outputFp32, CV_32F, scale, -(scale*zeropoint));
|
inputs[0].convertTo(outputFp32, CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
||||||
|
|
||||||
if (outputs_.depth() == CV_16S)
|
if (outputs_.depth() == CV_16S)
|
||||||
convertFp16(outputFp32, outputs[0]);
|
convertFp16(outputFp32, outputs[0]);
|
||||||
@ -135,14 +265,25 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D,
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
inputs[0].convertTo(outputs[0], CV_32F, scale, -(scale*zeropoint));
|
if (outputs[0].depth() != CV_32F)
|
||||||
|
outputs[0].convertTo(outputs[0], CV_32F);
|
||||||
|
|
||||||
|
if (is1D)
|
||||||
|
{
|
||||||
|
Mat inputTmp;
|
||||||
|
inputs[0].convertTo(inputTmp, CV_32F);
|
||||||
|
subtract(inputTmp, zeropointsMat, inputTmp);
|
||||||
|
multiply(inputTmp, scalesMat, outputs[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ extern bool DNN_DIAGNOSTICS_RUN;
|
|||||||
class ONNXLayerHandler;
|
class ONNXLayerHandler;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static T getScaleFromMat(Mat m)
|
static T getScalarFromMat(Mat m)
|
||||||
{
|
{
|
||||||
CV_Assert(m.total() == 1);
|
CV_Assert(m.total() == 1);
|
||||||
return m.at<T>(0);
|
return m.at<T>(0);
|
||||||
@ -380,6 +380,9 @@ void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
|
|||||||
inpShapes[i] = shape(inputs[i]);
|
inpShapes[i] = shape(inputs[i]);
|
||||||
if (i > 0 && ddepth != inputs[i].depth())
|
if (i > 0 && ddepth != inputs[i].depth())
|
||||||
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
|
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
|
||||||
|
|
||||||
|
// Quantize and Dequantize layer have different output type than input.
|
||||||
|
if (params.type != "Quantize" && params.type != "Dequantize")
|
||||||
ddepth = inputs[i].depth();
|
ddepth = inputs[i].depth();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3240,20 +3243,66 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx
|
|||||||
{
|
{
|
||||||
CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3);
|
CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3);
|
||||||
layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
|
layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
|
||||||
|
int axis = layerParams.get<int>("axis", 1);
|
||||||
|
// For QuantizeLinear and DequantizeLinear, the scale and zeropoint can be a Scalar (per-tensor quantized)
|
||||||
|
// or 1-D tensor (per-channel quantized).
|
||||||
|
bool is1D = false;
|
||||||
|
|
||||||
float scale = getScaleFromMat<float>(getBlob(node_proto, 1));
|
Mat scaleMat = getBlob(node_proto, 1);
|
||||||
int zeropoint = 0;
|
if(scaleMat.total() > 1) is1D = true;
|
||||||
|
|
||||||
|
Mat zpMat;
|
||||||
if (node_proto.input_size() == 3)
|
if (node_proto.input_size() == 3)
|
||||||
zeropoint = (int)getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
{
|
||||||
|
zpMat = getBlob(node_proto, 2);
|
||||||
|
CV_Assert(zpMat.total() == scaleMat.total()); // zero point should has the same shape as scale.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is1D)
|
||||||
|
{
|
||||||
|
const int num = scaleMat.total();
|
||||||
|
|
||||||
|
std::vector<int> zeropoints(num, 0);
|
||||||
|
std::vector<float> scales(num, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < num; i++)
|
||||||
|
{
|
||||||
|
scales[i] = scaleMat.at<float>(i);
|
||||||
|
if (!zpMat.empty())
|
||||||
|
zeropoints[i] = zpMat.depth() == CV_32S ?
|
||||||
|
zpMat.at<int>(i) : (int)zpMat.at<int8_t>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
layerParams.set("is1D", true);
|
||||||
|
layerParams.set("axis", axis);
|
||||||
|
layerParams.set("scales", DictValue::arrayReal(scales.data(), scales.size()));
|
||||||
|
layerParams.set("zeropoints", DictValue::arrayInt(zeropoints.data(), zeropoints.size()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int zeropoint = zpMat.empty() ? 0 : zpMat.depth() == CV_32S ?
|
||||||
|
getScalarFromMat<int>(zpMat) : (int)getScalarFromMat<int8_t>(zpMat);
|
||||||
|
float scale = getScalarFromMat<float>(scaleMat);
|
||||||
|
|
||||||
|
layerParams.set("is1D", false);
|
||||||
layerParams.set("scales", scale);
|
layerParams.set("scales", scale);
|
||||||
layerParams.set("zeropoints", zeropoint);
|
layerParams.set("zeropoints", zeropoint);
|
||||||
|
}
|
||||||
|
|
||||||
if (layerParams.type == "Quantize")
|
if (layerParams.type == "Quantize")
|
||||||
layerParams.set("depth", CV_8S);
|
layerParams.set("depth", CV_8S);
|
||||||
else // Dequantize
|
else // Dequantize
|
||||||
layerParams.set("depth", CV_32F);
|
layerParams.set("depth", CV_32F);
|
||||||
|
|
||||||
|
if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input.
|
||||||
|
{
|
||||||
|
std::vector<Mat> inputs, outputs;
|
||||||
|
inputs.push_back(getBlob(node_proto, 0));
|
||||||
|
|
||||||
|
runLayer(layerParams, inputs, outputs);
|
||||||
|
addConstant(node_proto.output(0), outputs[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
addLayer(layerParams, node_proto);
|
addLayer(layerParams, node_proto);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3263,8 +3312,8 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP
|
|||||||
int ninputs = node_proto.input_size();
|
int ninputs = node_proto.input_size();
|
||||||
CV_Assert(ninputs == 8 || ninputs == 9);
|
CV_Assert(ninputs == 8 || ninputs == 9);
|
||||||
|
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int inp_zp = (int)getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int inp_zp = (int)getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
|
|
||||||
if (layerParams.has("pad"))
|
if (layerParams.has("pad"))
|
||||||
{
|
{
|
||||||
@ -3312,8 +3361,8 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP
|
|||||||
bool per_channel = w_scale.total() == outCn;
|
bool per_channel = w_scale.total() == outCn;
|
||||||
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
||||||
|
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
||||||
int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
||||||
|
|
||||||
Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
|
Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
|
||||||
|
|
||||||
@ -3349,8 +3398,8 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod
|
|||||||
|
|
||||||
int firstInpDims = outShapes[node_proto.input(0)].size();
|
int firstInpDims = outShapes[node_proto.input(0)].size();
|
||||||
|
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
|
|
||||||
Mat weights = getBlob(node_proto, 3).t();
|
Mat weights = getBlob(node_proto, 3).t();
|
||||||
int outCn = weights.size[0];
|
int outCn = weights.size[0];
|
||||||
@ -3361,8 +3410,8 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod
|
|||||||
bool per_channel = w_scale.total() == outCn ? true : false;
|
bool per_channel = w_scale.total() == outCn ? true : false;
|
||||||
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
||||||
|
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
||||||
int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
||||||
|
|
||||||
Mat bias(1, outCn, CV_32S);
|
Mat bias(1, outCn, CV_32S);
|
||||||
Mat outputMultiplier(1, outCn, CV_32F);
|
Mat outputMultiplier(1, outCn, CV_32F);
|
||||||
@ -3411,8 +3460,8 @@ void ONNXImporter::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeP
|
|||||||
|
|
||||||
int firstInpDims = outShapes[node_proto.input(0)].size();
|
int firstInpDims = outShapes[node_proto.input(0)].size();
|
||||||
|
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
|
|
||||||
int outCn = weights.size[0];
|
int outCn = weights.size[0];
|
||||||
int secondInpDims = weights.dims;
|
int secondInpDims = weights.dims;
|
||||||
@ -3431,8 +3480,8 @@ void ONNXImporter::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeP
|
|||||||
CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!");
|
CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!");
|
||||||
}
|
}
|
||||||
|
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 7));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 7));
|
||||||
int8_t out_zp = ninputs == 9 ? getScaleFromMat<int8_t>(getBlob(node_proto, 8)) : 0;
|
int8_t out_zp = ninputs == 9 ? getScalarFromMat<int8_t>(getBlob(node_proto, 8)) : 0;
|
||||||
|
|
||||||
Mat bias;
|
Mat bias;
|
||||||
if (constBlobs.find(node_proto.input(6)) != constBlobs.end())
|
if (constBlobs.find(node_proto.input(6)) != constBlobs.end())
|
||||||
@ -3475,11 +3524,11 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No
|
|||||||
constId = i;
|
constId = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
float inp_0_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_0_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_0_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_0_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
|
|
||||||
float inp_1_sc = getScaleFromMat<float>(getBlob(node_proto, 4));
|
float inp_1_sc = getScalarFromMat<float>(getBlob(node_proto, 4));
|
||||||
int8_t inp_1_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 5));
|
int8_t inp_1_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 5));
|
||||||
|
|
||||||
// Set 2nd input as the const input
|
// Set 2nd input as the const input
|
||||||
if (constId == 0)
|
if (constId == 0)
|
||||||
@ -3488,11 +3537,11 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No
|
|||||||
cv::swap(inp_0_zp, inp_1_zp);
|
cv::swap(inp_0_zp, inp_1_zp);
|
||||||
}
|
}
|
||||||
|
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
||||||
|
|
||||||
int8_t out_zp = 0;
|
int8_t out_zp = 0;
|
||||||
if (node_proto.input_size() == 8)
|
if (node_proto.input_size() == 8)
|
||||||
out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
|
out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
||||||
|
|
||||||
std::vector<float> inp_scales = {inp_0_sc, inp_1_sc};
|
std::vector<float> inp_scales = {inp_0_sc, inp_1_sc};
|
||||||
std::vector<int8_t> inp_zps = {inp_0_zp, inp_1_zp};
|
std::vector<int8_t> inp_zps = {inp_0_zp, inp_1_zp};
|
||||||
@ -3608,10 +3657,10 @@ void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::
|
|||||||
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
||||||
|
|
||||||
float slope = layerParams.get<float>("alpha");
|
float slope = layerParams.get<float>("alpha");
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
||||||
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
||||||
|
|
||||||
Mat lookUpTable(1, 256, CV_8S);
|
Mat lookUpTable(1, 256, CV_8S);
|
||||||
int8_t* table = lookUpTable.ptr<int8_t>();
|
int8_t* table = lookUpTable.ptr<int8_t>();
|
||||||
@ -3637,10 +3686,10 @@ void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::No
|
|||||||
{
|
{
|
||||||
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
||||||
|
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
||||||
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
||||||
|
|
||||||
Mat lookUpTable(1, 256, CV_8S);
|
Mat lookUpTable(1, 256, CV_8S);
|
||||||
int8_t* table = lookUpTable.ptr<int8_t>();
|
int8_t* table = lookUpTable.ptr<int8_t>();
|
||||||
@ -3665,10 +3714,10 @@ void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::No
|
|||||||
{
|
{
|
||||||
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
||||||
|
|
||||||
float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
||||||
float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
||||||
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
||||||
|
|
||||||
layerParams.type = "PoolingInt8";
|
layerParams.type = "PoolingInt8";
|
||||||
layerParams.set("pool", "ave");
|
layerParams.set("pool", "ave");
|
||||||
@ -3687,13 +3736,13 @@ void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::Nod
|
|||||||
layerParams.type = "ConcatInt8";
|
layerParams.type = "ConcatInt8";
|
||||||
int num_inputs = node_proto.input_size();
|
int num_inputs = node_proto.input_size();
|
||||||
|
|
||||||
float out_scale = getScaleFromMat<float>(getBlob(node_proto, 0));
|
float out_scale = getScalarFromMat<float>(getBlob(node_proto, 0));
|
||||||
int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 1));
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 1));
|
||||||
|
|
||||||
for (int i = 2; i < num_inputs; i += 3)
|
for (int i = 2; i < num_inputs; i += 3)
|
||||||
{
|
{
|
||||||
float inp_scale = getScaleFromMat<float>(getBlob(node_proto, i + 1));
|
float inp_scale = getScalarFromMat<float>(getBlob(node_proto, i + 1));
|
||||||
int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, i + 2));
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, i + 2));
|
||||||
|
|
||||||
if (inp_scale != out_scale || inp_zp != out_zp)
|
if (inp_scale != out_scale || inp_zp != out_zp)
|
||||||
{
|
{
|
||||||
|
@ -1824,11 +1824,22 @@ TEST_P(Test_ONNX_layers, Gemm)
|
|||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Quantized_Convolution)
|
TEST_P(Test_ONNX_layers, Quantized_Convolution)
|
||||||
{
|
{
|
||||||
|
// The difference of QOperator and QDQ format:
|
||||||
|
// https://onnxruntime.ai/docs/performance/quantization.html#onnx-quantization-representation-format.
|
||||||
|
{
|
||||||
|
SCOPED_TRACE("QOperator quantized model.");
|
||||||
testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02);
|
testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02);
|
||||||
testONNXModels("quantized_conv_int8_weights", npy, 0.03, 0.5);
|
testONNXModels("quantized_conv_int8_weights", npy, 0.03, 0.5);
|
||||||
testONNXModels("quantized_conv_per_channel_weights", npy, 0.06, 0.4);
|
testONNXModels("quantized_conv_per_channel_weights", npy, 0.06, 0.4);
|
||||||
|
|
||||||
testONNXModels("quantized_conv_asymmetric_pads_int8_weights");
|
testONNXModels("quantized_conv_asymmetric_pads_int8_weights");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SCOPED_TRACE("QDQ quantized model.");
|
||||||
|
testONNXModels("quantized_conv_uint8_weights_qdq", npy, 0.004, 0.02);
|
||||||
|
testONNXModels("quantized_conv_int8_weights_qdq", npy, 0.03, 0.5);
|
||||||
|
testONNXModels("quantized_conv_per_channel_weights_qdq", npy, 0.06, 0.4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Quantized_MatMul)
|
TEST_P(Test_ONNX_layers, Quantized_MatMul)
|
||||||
|
Loading…
Reference in New Issue
Block a user