mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 13:47:32 +08:00
Flush to zero Convolution denormal weights
This commit is contained in:
parent
fd06139c20
commit
68d59a2913
@ -1230,6 +1230,13 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
#if CV_TRY_SSE
|
||||||
|
uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
|
||||||
|
uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
|
||||||
|
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
|
||||||
|
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
|
||||||
|
#endif
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
@ -1312,6 +1319,10 @@ public:
|
|||||||
ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
|
ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
|
||||||
kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
|
kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
|
||||||
}
|
}
|
||||||
|
#if CV_TRY_SSE
|
||||||
|
_MM_SET_FLUSH_ZERO_MODE(ftzMode);
|
||||||
|
_MM_SET_DENORMALS_ZERO_MODE(dazMode);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||||
|
Loading…
Reference in New Issue
Block a user