mirror of
https://github.com/opencv/opencv.git
synced 2025-06-08 01:53:19 +08:00
make ocl4dnn available to run on other platform than Intel GPU
This commit is contained in:
parent
faafb3152a
commit
2e9e71ab9e
@ -966,8 +966,7 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
@ -176,8 +176,7 @@ public:
|
|||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget) &&
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
|
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
@ -73,7 +73,7 @@ public:
|
|||||||
|
|
||||||
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
if (preferableTarget == DNN_TARGET_OPENCL && !fuse_batch_norm)
|
if (!fuse_batch_norm)
|
||||||
{
|
{
|
||||||
top->getScaleShift(scale, shift);
|
top->getScaleShift(scale, shift);
|
||||||
fuse_batch_norm = !scale.empty() || !shift.empty();
|
fuse_batch_norm = !scale.empty() || !shift.empty();
|
||||||
@ -252,8 +252,7 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
@ -274,25 +273,53 @@ public:
|
|||||||
for( i = 0; i < splitDim; i++ )
|
for( i = 0; i < splitDim; i++ )
|
||||||
newRows *= inpBlob.size[i];
|
newRows *= inpBlob.size[i];
|
||||||
|
|
||||||
if (inpBlob.total() == newRows)
|
|
||||||
{
|
|
||||||
// MVN is applied to single values at an every row.
|
|
||||||
outBlob.setTo(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Mat inpMat = inpBlob.reshape(1, newRows);
|
Mat inpMat = inpBlob.reshape(1, newRows);
|
||||||
Mat outMat = outBlob.reshape(1, newRows);
|
Mat outMat = outBlob.reshape(1, newRows);
|
||||||
|
|
||||||
|
if ( inpBlob.total() == newRows )
|
||||||
|
{
|
||||||
|
// MVN is applied to single values at an every row.
|
||||||
|
if (shift.empty())
|
||||||
|
{
|
||||||
|
outBlob.setTo(0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for ( i = 0; i < newRows; i++ )
|
||||||
|
{
|
||||||
|
outMat.row(i).setTo(((float*)shift.data)[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Scalar mean, dev;
|
Scalar mean, dev;
|
||||||
for ( i = 0; i < newRows; i++)
|
for ( i = 0; i < newRows; i++)
|
||||||
{
|
{
|
||||||
Mat inpRow = inpMat.row(i);
|
Mat inpRow = inpMat.row(i);
|
||||||
Mat outRow = outMat.row(i);
|
Mat outRow = outMat.row(i);
|
||||||
|
float weight = 1.f;
|
||||||
|
float bias = 0.f;
|
||||||
|
if (fuse_batch_norm)
|
||||||
|
{
|
||||||
|
weight = i < scale.cols ? ((float*)scale.data)[i] : weight;
|
||||||
|
bias = i < shift.cols ? ((float*)shift.data)[i] : bias;
|
||||||
|
}
|
||||||
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
|
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
|
||||||
double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
|
double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
|
||||||
inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
|
double normalizationScale = 1.0;
|
||||||
|
double normalizationShift = 0.0;
|
||||||
|
if (fuse_batch_norm)
|
||||||
|
{
|
||||||
|
normalizationScale = alpha * weight;
|
||||||
|
normalizationShift = -mean[0] * normalizationScale + bias;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
normalizationScale = alpha;
|
||||||
|
normalizationShift = -mean[0] * alpha;
|
||||||
|
}
|
||||||
|
inpRow.convertTo(outRow, outRow.type(), normalizationScale, normalizationShift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,8 +191,7 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
@ -89,7 +89,8 @@ __kernel void CALC_MEAN(__global const Dtype* src,
|
|||||||
|
|
||||||
Dtype mean_val = mean[x];
|
Dtype mean_val = mean[x];
|
||||||
vec_type src_vec = load(src, index);
|
vec_type src_vec = load(src, index);
|
||||||
vec_type dst_vec = native_powr(src_vec - (vec_type)mean_val, 2);
|
vec_type dst_vec = src_vec - (vec_type)mean_val;
|
||||||
|
dst_vec = dst_vec * dst_vec;
|
||||||
store(dst_vec, dst, index);
|
store(dst_vec, dst, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,10 +198,14 @@ __kernel void MEAN_FUSE(__global const T * A,
|
|||||||
const T4 a2 = vload4(i, src0_read + 2 * A_col_size);
|
const T4 a2 = vload4(i, src0_read + 2 * A_col_size);
|
||||||
const T4 a3 = vload4(i, src0_read + 3 * A_col_size);
|
const T4 a3 = vload4(i, src0_read + 3 * A_col_size);
|
||||||
|
|
||||||
dot0 = native_powr(convert_float4(a0) - (Dtype4)sum.x, 2);
|
dot0 = convert_float4(a0) - (Dtype4)sum.x;
|
||||||
dot1 = native_powr(convert_float4(a1) - (Dtype4)sum.y, 2);
|
dot1 = convert_float4(a1) - (Dtype4)sum.y;
|
||||||
dot2 = native_powr(convert_float4(a2) - (Dtype4)sum.z, 2);
|
dot2 = convert_float4(a2) - (Dtype4)sum.z;
|
||||||
dot3 = native_powr(convert_float4(a3) - (Dtype4)sum.w, 2);
|
dot3 = convert_float4(a3) - (Dtype4)sum.w;
|
||||||
|
dot0 = dot0 * dot0;
|
||||||
|
dot1 = dot1 * dot1;
|
||||||
|
dot2 = dot2 * dot2;
|
||||||
|
dot3 = dot3 * dot3;
|
||||||
|
|
||||||
vstore4(dot0, i, dst0_read);
|
vstore4(dot0, i, dst0_read);
|
||||||
vstore4(dot1, i, dst0_read + A_col_size);
|
vstore4(dot1, i, dst0_read + A_col_size);
|
||||||
|
@ -160,10 +160,12 @@ TEST_P(Test_TensorFlow_layers, batch_norm)
|
|||||||
TEST_P(Test_TensorFlow_layers, pooling)
|
TEST_P(Test_TensorFlow_layers, pooling)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
int targetId = GetParam();
|
||||||
|
cv::ocl::Device d = cv::ocl::Device::getDefault();
|
||||||
|
bool loosenFlag = targetId == DNN_TARGET_OPENCL && d.isIntel() && d.type() == cv::ocl::Device::TYPE_CPU;
|
||||||
runTensorFlowNet("max_pool_even", targetId);
|
runTensorFlowNet("max_pool_even", targetId);
|
||||||
runTensorFlowNet("max_pool_odd_valid", targetId);
|
runTensorFlowNet("max_pool_odd_valid", targetId);
|
||||||
runTensorFlowNet("ave_pool_same", targetId);
|
runTensorFlowNet("ave_pool_same", targetId);
|
||||||
runTensorFlowNet("max_pool_odd_same", targetId);
|
runTensorFlowNet("max_pool_odd_same", targetId, false, loosenFlag ? 3e-5 : 1e-5, loosenFlag ? 3e-4 : 1e-4);
|
||||||
runTensorFlowNet("reduce_mean", targetId); // an average pooling over all spatial dimensions.
|
runTensorFlowNet("reduce_mean", targetId); // an average pooling over all spatial dimensions.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user