mirror of
https://github.com/opencv/opencv.git
synced 2025-07-26 07:07:37 +08:00
Merge pull request #13115 from catree:fix_dis_flow_intrin
This commit is contained in:
commit
326be0d7ba
@ -41,6 +41,7 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin.hpp"
|
||||||
#include "opencl_kernels_video.hpp"
|
#include "opencl_kernels_video.hpp"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -517,7 +518,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *
|
|||||||
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz)
|
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz)
|
||||||
{
|
{
|
||||||
float SSD = 0.0f;
|
float SSD = 0.0f;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
if (patch_sz == 8)
|
if (patch_sz == 8)
|
||||||
{
|
{
|
||||||
/* Variables to accumulate the sums */
|
/* Variables to accumulate the sums */
|
||||||
@ -569,7 +570,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *
|
|||||||
dst_dUx += diff * I0x_ptr[i * I0_stride + j];
|
dst_dUx += diff * I0x_ptr[i * I0_stride + j];
|
||||||
dst_dUy += diff * I0y_ptr[i * I0_stride + j];
|
dst_dUy += diff * I0y_ptr[i * I0_stride + j];
|
||||||
}
|
}
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return SSD;
|
return SSD;
|
||||||
@ -586,7 +587,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr,
|
|||||||
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
|
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
|
||||||
float n = (float)patch_sz * patch_sz;
|
float n = (float)patch_sz * patch_sz;
|
||||||
|
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
if (patch_sz == 8)
|
if (patch_sz == 8)
|
||||||
{
|
{
|
||||||
/* Variables to accumulate the sums */
|
/* Variables to accumulate the sums */
|
||||||
@ -641,7 +642,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr,
|
|||||||
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j];
|
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j];
|
||||||
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j];
|
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j];
|
||||||
}
|
}
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
|
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
|
||||||
@ -654,7 +655,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri
|
|||||||
float w11, int patch_sz)
|
float w11, int patch_sz)
|
||||||
{
|
{
|
||||||
float SSD = 0.0f;
|
float SSD = 0.0f;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
if (patch_sz == 8)
|
if (patch_sz == 8)
|
||||||
{
|
{
|
||||||
v_float32x4 SSD_vec = v_setall_f32(0);
|
v_float32x4 SSD_vec = v_setall_f32(0);
|
||||||
@ -679,7 +680,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri
|
|||||||
I0_ptr[i * I0_stride + j];
|
I0_ptr[i * I0_stride + j];
|
||||||
SSD += diff * diff;
|
SSD += diff * diff;
|
||||||
}
|
}
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return SSD;
|
return SSD;
|
||||||
@ -691,7 +692,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int
|
|||||||
{
|
{
|
||||||
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
|
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
|
||||||
float n = (float)patch_sz * patch_sz;
|
float n = (float)patch_sz * patch_sz;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
if (patch_sz == 8)
|
if (patch_sz == 8)
|
||||||
{
|
{
|
||||||
v_float32x4 sum_diff_vec = v_setall_f32(0);
|
v_float32x4 sum_diff_vec = v_setall_f32(0);
|
||||||
@ -721,7 +722,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int
|
|||||||
sum_diff += diff;
|
sum_diff += diff;
|
||||||
sum_diff_sq += diff * diff;
|
sum_diff_sq += diff * diff;
|
||||||
}
|
}
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return sum_diff_sq - sum_diff * sum_diff / n;
|
return sum_diff_sq - sum_diff * sum_diff / n;
|
||||||
|
@ -595,7 +595,7 @@ void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range
|
|||||||
#undef INIT_ROW_POINTERS
|
#undef INIT_ROW_POINTERS
|
||||||
|
|
||||||
int j = 0;
|
int j = 0;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
v_float32x4 zeta_vec = v_setall_f32(zeta_squared);
|
v_float32x4 zeta_vec = v_setall_f32(zeta_squared);
|
||||||
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
|
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
|
||||||
v_float32x4 delta_vec = v_setall_f32(delta2);
|
v_float32x4 delta_vec = v_setall_f32(delta2);
|
||||||
@ -801,7 +801,7 @@ void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()
|
|||||||
pA_v_next[j] += pWeight[j];
|
pA_v_next[j] += pWeight[j];
|
||||||
|
|
||||||
int j = 0;
|
int j = 0;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
v_float32x4 alpha2_vec = v_setall_f32(alpha2);
|
v_float32x4 alpha2_vec = v_setall_f32(alpha2);
|
||||||
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
|
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
|
||||||
v_float32x4 cW_u_vec, cW_v_vec;
|
v_float32x4 cW_u_vec, cW_v_vec;
|
||||||
@ -911,7 +911,7 @@ void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator(
|
|||||||
#undef INIT_ROW_POINTERS
|
#undef INIT_ROW_POINTERS
|
||||||
|
|
||||||
int j = 0;
|
int j = 0;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
v_float32x4 pWeight_vec, uy_vec, vy_vec;
|
v_float32x4 pWeight_vec, uy_vec, vy_vec;
|
||||||
for (; j < len - 3; j += 4)
|
for (; j < len - 3; j += 4)
|
||||||
{
|
{
|
||||||
@ -1013,7 +1013,7 @@ void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &ran
|
|||||||
#undef INIT_ROW_POINTERS
|
#undef INIT_ROW_POINTERS
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
#ifdef CV_SIMD128
|
#if CV_SIMD128
|
||||||
v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]);
|
v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]);
|
||||||
v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]);
|
v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]);
|
||||||
v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]);
|
v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]);
|
||||||
|
Loading…
Reference in New Issue
Block a user