mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #21025 from alalek:issue_21004
* dnn(ocl4dnn): fix LRN layer accuracy problems - FP16 intermediate computation is not accurate and may provide NaN values * dnn(test): update tolerance for FP16
This commit is contained in:
parent
cb286a66be
commit
8041ab8a61
@ -64,36 +64,37 @@ __kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global con
|
||||
const int step = height * width;
|
||||
__global const Dtype* in_off = in + offset;
|
||||
__global Dtype* out_off = out + offset;
|
||||
KERNEL_ARG_DTYPE scale_val;
|
||||
int head = 0;
|
||||
const int pre_pad = (size - 1) / 2;
|
||||
const int post_pad = size - pre_pad - 1;
|
||||
KERNEL_ARG_DTYPE accum_scale = 0;
|
||||
float accum_scale = 0;
|
||||
// fill the scale at [n, :, h, w]
|
||||
// accumulate values
|
||||
while (head < post_pad && head < channels) {
|
||||
accum_scale += in_off[head * step] * in_off[head * step];
|
||||
float v = in_off[head * step];
|
||||
accum_scale += v * v;
|
||||
++head;
|
||||
}
|
||||
// both add and subtract
|
||||
while (head < channels) {
|
||||
accum_scale += in_off[head * step] * in_off[head * step];
|
||||
float v = in_off[head * step];
|
||||
accum_scale += v * v;
|
||||
if (head - size >= 0) {
|
||||
accum_scale -= in_off[(head - size) * step]
|
||||
* in_off[(head - size) * step];
|
||||
v = in_off[(head - size) * step];
|
||||
accum_scale -= v * v;
|
||||
}
|
||||
scale_val = k + accum_scale * alpha_over_size;
|
||||
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
|
||||
float scale_val = k + accum_scale * alpha_over_size;
|
||||
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
|
||||
++head;
|
||||
}
|
||||
// subtract only
|
||||
while (head < channels + post_pad) {
|
||||
if (head - size >= 0) {
|
||||
accum_scale -= in_off[(head - size) * step]
|
||||
* in_off[(head - size) * step];
|
||||
float v = in_off[(head - size) * step];
|
||||
accum_scale -= v * v;
|
||||
}
|
||||
scale_val = k + accum_scale * alpha_over_size;
|
||||
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
|
||||
float scale_val = k + accum_scale * alpha_over_size;
|
||||
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
|
||||
++head;
|
||||
}
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
|
||||
ASSERT_EQ(inLayerShapes[0][3], 227);
|
||||
|
||||
const float l1 = 1e-5;
|
||||
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
|
||||
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-3 : 1e-4;
|
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
Loading…
Reference in New Issue
Block a user