mirror of
https://github.com/opencv/opencv.git
synced 2025-07-20 19:17:36 +08:00
more update on MVN layer ocl implementation
cut one ocl kernel if normVariance is disabled, also use native_powr for performance reason. Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
8226bd25c4
commit
fe494297e4
@ -96,30 +96,31 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
|
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
|
||||||
String buildopt = format("-DNUM=%d ", number);
|
|
||||||
String kname = format("calc_mean%d", number);
|
|
||||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
|
||||||
if (kernel.empty())
|
|
||||||
return false;
|
|
||||||
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
|
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
|
||||||
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
String buildopt = format("-DNUM=%d ", number);
|
||||||
kernel.set(1, (int)s[0]);
|
|
||||||
kernel.set(2, (int)s[1]);
|
|
||||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
|
|
||||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
|
|
||||||
ret = kernel.run(2, global, NULL, false);
|
|
||||||
if (!ret)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (normVariance)
|
if (normVariance)
|
||||||
{
|
{
|
||||||
|
String kname = format("calc_mean%d", number);
|
||||||
|
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||||
|
if (kernel.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||||
|
kernel.set(1, (int)s[0]);
|
||||||
|
kernel.set(2, (int)s[1]);
|
||||||
|
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||||
|
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
|
||||||
|
ret = kernel.run(2, global, NULL, false);
|
||||||
|
if (!ret)
|
||||||
|
return false;
|
||||||
|
|
||||||
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
||||||
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
|
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
kname = format("mvn%d", number);
|
String kname = format("mvn%d", number);
|
||||||
if (normVariance)
|
if (normVariance)
|
||||||
buildopt += "-DNORM_VARIANCE";
|
buildopt += "-DNORM_VARIANCE";
|
||||||
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||||
|
@ -79,7 +79,7 @@ __kernel void CALC_MEAN(__global const Dtype* src,
|
|||||||
|
|
||||||
Dtype mean_val = mean[x];
|
Dtype mean_val = mean[x];
|
||||||
vec_type src_vec = load(src, index);
|
vec_type src_vec = load(src, index);
|
||||||
vec_type dst_vec = pow(src_vec - (vec_type)mean_val, 2);
|
vec_type dst_vec = native_powr(src_vec - (vec_type)mean_val, 2);
|
||||||
store(dst_vec, dst, index);
|
store(dst_vec, dst, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user