mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
load fp16 as fp32 and align fp16 and double in onnx_graph_simplifie
This commit is contained in:
parent
fc3e393516
commit
7eaec9dd22
@ -18,6 +18,17 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
extern bool DNN_DIAGNOSTICS_RUN;
|
||||
|
||||
static int isLittleEndianCPU()
|
||||
{
|
||||
int x = 7;
|
||||
char *ptr = (char *)&x;
|
||||
|
||||
if(ptr[0] == 0)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This wrapper can behave differently for fake input nodes and real graph nodes.
|
||||
class ONNXNodeWrapper : public ImportNodeWrapper
|
||||
{
|
||||
@ -767,11 +778,64 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
|
||||
Mat(sizes, CV_32FC1, val).copyTo(blob);
|
||||
}
|
||||
}
|
||||
else if (datatype == opencv_onnx::TensorProto_DataType_FLOAT16)
|
||||
{
|
||||
// FIXME, for now, we only load FP16 Tensor as FP32 Mat, full support for FP16 is required in the future.
|
||||
CV_LOG_ONCE_WARNING(NULL, "DNN: load FP16 model as FP32 model, and it takes twice the FP16 RAM requirement.");
|
||||
|
||||
// ONNX saves float 16 data in two format: int32 and raw_data.
|
||||
// Link: https://github.com/onnx/onnx/issues/4460#issuecomment-1224373746
|
||||
if (!tensor_proto.int32_data().empty())
|
||||
{
|
||||
const int offset = isLittleEndianCPU() ? 0 : 1;
|
||||
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
|
||||
|
||||
AutoBuffer<float16_t, 16> aligned_val;
|
||||
size_t sz = tensor_proto.int32_data().size();
|
||||
aligned_val.allocate(sz);
|
||||
float16_t* bufPtr = aligned_val.data();
|
||||
|
||||
float16_t *fp16Ptr = (float16_t *)field.data();
|
||||
for (int i = 0; i < sz; i++)
|
||||
{
|
||||
bufPtr[i] = fp16Ptr[i*2 + offset];
|
||||
}
|
||||
Mat(sizes, CV_16FC1, bufPtr).convertTo(blob, CV_32FC1);
|
||||
}
|
||||
else
|
||||
{
|
||||
char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
// Aligned pointer is required.
|
||||
AutoBuffer<float16_t, 16> aligned_val;
|
||||
if (!isAligned<sizeof(float16_t)>(val))
|
||||
{
|
||||
size_t sz = tensor_proto.raw_data().size();
|
||||
aligned_val.allocate(divUp(sz, sizeof(float16_t)));
|
||||
memcpy(aligned_val.data(), val, sz);
|
||||
val = (char*)aligned_val.data();
|
||||
}
|
||||
#endif
|
||||
Mat(sizes, CV_16FC1, val).convertTo(blob, CV_32FC1);
|
||||
}
|
||||
}
|
||||
else if (datatype == opencv_onnx::TensorProto_DataType_DOUBLE)
|
||||
{
|
||||
const ::google::protobuf::RepeatedField<double> field = tensor_proto.double_data();
|
||||
CV_Assert(!field.empty());
|
||||
Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
|
||||
char* val = (char *)field.data();
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
// Aligned pointer is required.
|
||||
AutoBuffer<double, 16> aligned_val;
|
||||
if (!isAligned<sizeof(double)>(val))
|
||||
{
|
||||
size_t sz = tensor_proto.raw_data().size();
|
||||
aligned_val.allocate(divUp(sz, sizeof(double)));
|
||||
memcpy(aligned_val.data(), val, sz);
|
||||
val = (char*)aligned_val.data();
|
||||
}
|
||||
#endif
|
||||
Mat(sizes, CV_64FC1, val).convertTo(blob, CV_32FC1);
|
||||
}
|
||||
else if (datatype == opencv_onnx::TensorProto_DataType_INT32)
|
||||
{
|
||||
|
@ -2098,6 +2098,11 @@ TEST_P(Test_ONNX_nets, MobileNet_v2)
|
||||
testONNXModels("mobilenetv2", pb, default_l1, default_lInf, true);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, MobileNet_v2_FP16)
|
||||
{
|
||||
testONNXModels("mobilenetv2_fp16", npy, default_l1, default_lInf, true);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, LResNet100E_IR)
|
||||
{
|
||||
applyTestTag(
|
||||
|
Loading…
Reference in New Issue
Block a user