mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 19:50:38 +08:00
Merge pull request #10513 from pengli:dnn
This commit is contained in:
commit
4d4f291553
@ -22,6 +22,7 @@ class BatchNormLayerImpl : public BatchNormLayer
|
||||
{
|
||||
public:
|
||||
Mat weights_, bias_;
|
||||
Mat weightMat, biasMat;
|
||||
|
||||
BatchNormLayerImpl(const LayerParams& params)
|
||||
{
|
||||
@ -96,17 +97,81 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
|
||||
{
|
||||
if (inputs[0]->dims == 4)
|
||||
{
|
||||
int groups = inputs[0]->size[0];
|
||||
int channels = inputs[0]->size[1];
|
||||
int rows = inputs[0]->size[2];
|
||||
int cols = inputs[0]->size[3];
|
||||
MatShape s = shape(groups * channels, rows * cols);
|
||||
weightMat = Mat(s[0], s[1], CV_32FC1);
|
||||
biasMat = Mat(s[0], s[1], CV_32FC1);
|
||||
for (int n = 0; n < s[0]; n++)
|
||||
{
|
||||
weightMat.row(n).setTo(weights_.at<float>(n % channels));
|
||||
biasMat.row(n).setTo(bias_.at<float>(n % channels));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId)
|
||||
{
|
||||
return backendId == DNN_BACKEND_DEFAULT ||
|
||||
backendId == DNN_BACKEND_HALIDE && haveHalide();
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
CV_Assert(blobs.size() >= 2);
|
||||
CV_Assert(inputs.size() == 1);
|
||||
|
||||
UMat &inpBlob = inputs[0];
|
||||
CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4);
|
||||
int groups = inpBlob.size[0];
|
||||
int channels = inpBlob.size[1];
|
||||
int rows = inpBlob.dims > 2 ? inpBlob.size[2] : 1;
|
||||
int cols = inpBlob.dims > 2 ? inpBlob.size[3] : 1;
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
if (inpBlob.dims == 2)
|
||||
{
|
||||
UMat& src = inputs[ii];
|
||||
UMat& dst = outputs[ii];
|
||||
multiply(src, weights_, dst);
|
||||
add(dst, bias_, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
MatShape s = shape(groups * channels, rows * cols);
|
||||
UMat src = inputs[ii].reshape(1, s.size(), &s[0]);
|
||||
UMat dst = outputs[ii].reshape(1, s.size(), &s[0]);
|
||||
multiply(src, weightMat, dst);
|
||||
add(dst, biasMat, dst);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -63,8 +63,22 @@ public:
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
for (int i = 0, n = outputs.size(); i < n; ++i)
|
||||
{
|
||||
void *src_handle = inputs[i].handle(ACCESS_READ);
|
||||
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
|
||||
if (src_handle != dst_handle)
|
||||
inputs[i].copyTo(outputs[i]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
@ -259,11 +259,63 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case SUM:
|
||||
if (coeffs.empty())
|
||||
{
|
||||
add(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
add(outputs[0], inputs[i], outputs[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
UMat mul0, mul1;
|
||||
multiply(coeffs[0], inputs[0], mul0);
|
||||
multiply(coeffs[1], inputs[1], mul1);
|
||||
add(mul0, mul1, outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
{
|
||||
multiply(coeffs[i], inputs[i], mul0);
|
||||
add(mul0, outputs[0], outputs[0]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PROD:
|
||||
multiply(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
multiply(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
case MAX:
|
||||
max(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
max(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -69,11 +69,74 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
|
||||
CV_Assert(inputs[0].total() == outputs[0].total());
|
||||
|
||||
const UMat& inp0 = inputs[0];
|
||||
UMat& buffer = internals[0];
|
||||
size_t num = inp0.size[0];
|
||||
size_t channels = inp0.size[1];
|
||||
size_t channelSize = inp0.total() / (num * channels);
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
{
|
||||
MatShape s = shape(channels, channelSize);
|
||||
UMat src = inputs[i].reshape(1, s.size(), &s[0]);
|
||||
UMat dst = outputs[i].reshape(1, s.size(), &s[0]);
|
||||
|
||||
UMat abs_mat;
|
||||
absdiff(src, cv::Scalar::all(0), abs_mat);
|
||||
pow(abs_mat, pnorm, buffer);
|
||||
|
||||
if (acrossSpatial)
|
||||
{
|
||||
// add eps to avoid overflow
|
||||
float absSum = sum(buffer)[0] + epsilon;
|
||||
float norm = pow(absSum, 1.0f / pnorm);
|
||||
multiply(src, 1.0f / norm, dst);
|
||||
}
|
||||
|
||||
if (!blobs.empty())
|
||||
{
|
||||
// scale the output
|
||||
Mat scale = blobs[0];
|
||||
if (scale.total() == 1)
|
||||
{
|
||||
// _scale: 1 x 1
|
||||
multiply(dst, scale.at<float>(0, 0), dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
// _scale: _channels x 1
|
||||
CV_Assert(scale.total() == channels);
|
||||
repeat(scale, 1, dst.cols, buffer);
|
||||
multiply(dst, buffer, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -320,6 +320,11 @@ TEST(Layer_Test_Eltwise, Accuracy)
|
||||
testLayerUsingCaffeModels("layer_eltwise");
|
||||
}
|
||||
|
||||
OCL_TEST(Layer_Test_Eltwise, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
TEST(Layer_Test_PReLU, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true);
|
||||
|
@ -76,7 +76,7 @@ static std::string path(const std::string& file)
|
||||
return findDataFile("dnn/tensorflow/" + file, false);
|
||||
}
|
||||
|
||||
static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
|
||||
static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
|
||||
double l1 = 1e-5, double lInf = 1e-4,
|
||||
bool memoryLoad = false)
|
||||
{
|
||||
@ -104,6 +104,9 @@ static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
|
||||
|
||||
ASSERT_FALSE(net.empty());
|
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
cv::Mat input = blobFromNPY(inpPath);
|
||||
cv::Mat target = blobFromNPY(outPath);
|
||||
|
||||
@ -132,6 +135,11 @@ TEST(Test_TensorFlow, eltwise_add_mul)
|
||||
runTensorFlowNet("eltwise_add_mul");
|
||||
}
|
||||
|
||||
OCL_TEST(Test_TensorFlow, eltwise_add_mul)
|
||||
{
|
||||
runTensorFlowNet("eltwise_add_mul", DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, pad_and_concat)
|
||||
{
|
||||
runTensorFlowNet("pad_and_concat");
|
||||
@ -141,7 +149,14 @@ TEST(Test_TensorFlow, batch_norm)
|
||||
{
|
||||
runTensorFlowNet("batch_norm");
|
||||
runTensorFlowNet("fused_batch_norm");
|
||||
runTensorFlowNet("batch_norm_text", true);
|
||||
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
|
||||
}
|
||||
|
||||
OCL_TEST(Test_TensorFlow, batch_norm)
|
||||
{
|
||||
runTensorFlowNet("batch_norm", DNN_TARGET_OPENCL);
|
||||
runTensorFlowNet("fused_batch_norm", DNN_TARGET_OPENCL);
|
||||
runTensorFlowNet("batch_norm_text", DNN_TARGET_OPENCL, true);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, pooling)
|
||||
@ -179,15 +194,15 @@ TEST(Test_TensorFlow, fp16)
|
||||
{
|
||||
const float l1 = 1e-3;
|
||||
const float lInf = 1e-2;
|
||||
runTensorFlowNet("fp16_single_conv", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_padding_same", false, l1, lInf);
|
||||
runTensorFlowNet("fp16_single_conv", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_deconvolution", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_odd_same", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_padding_valid", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_eltwise_add_mul", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_odd_valid", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_pad_and_concat", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_max_pool_even", DNN_TARGET_CPU, false, l1, lInf);
|
||||
runTensorFlowNet("fp16_padding_same", DNN_TARGET_CPU, false, l1, lInf);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, quantized)
|
||||
@ -267,7 +282,7 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD)
|
||||
|
||||
TEST(Test_TensorFlow, lstm)
|
||||
{
|
||||
runTensorFlowNet("lstm", true);
|
||||
runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, split)
|
||||
@ -284,11 +299,11 @@ TEST(Test_TensorFlow, memory_read)
|
||||
{
|
||||
double l1 = 1e-5;
|
||||
double lInf = 1e-4;
|
||||
runTensorFlowNet("lstm", true, l1, lInf, true);
|
||||
runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
|
||||
|
||||
runTensorFlowNet("batch_norm", false, l1, lInf, true);
|
||||
runTensorFlowNet("fused_batch_norm", false, l1, lInf, true);
|
||||
runTensorFlowNet("batch_norm_text", true, l1, lInf, true);
|
||||
runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
|
||||
runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
|
||||
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -170,6 +170,11 @@ TEST(Torch_Importer, run_batch_norm)
|
||||
runTorchNet("net_batch_norm", DNN_TARGET_CPU, "", false, true);
|
||||
}
|
||||
|
||||
OCL_TEST(Torch_Importer, run_batch_norm)
|
||||
{
|
||||
runTorchNet("net_batch_norm", DNN_TARGET_OPENCL, "", false, true);
|
||||
}
|
||||
|
||||
TEST(Torch_Importer, net_prelu)
|
||||
{
|
||||
runTorchNet("net_prelu");
|
||||
@ -225,6 +230,11 @@ TEST(Torch_Importer, net_normalize)
|
||||
runTorchNet("net_normalize", DNN_TARGET_CPU, "", false, true);
|
||||
}
|
||||
|
||||
OCL_TEST(Torch_Importer, net_normalize)
|
||||
{
|
||||
runTorchNet("net_normalize", DNN_TARGET_OPENCL, "", false, true);
|
||||
}
|
||||
|
||||
TEST(Torch_Importer, net_padding)
|
||||
{
|
||||
runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
|
||||
@ -237,6 +247,11 @@ TEST(Torch_Importer, net_non_spatial)
|
||||
runTorchNet("net_non_spatial", DNN_TARGET_CPU, "", false, true);
|
||||
}
|
||||
|
||||
OCL_TEST(Torch_Importer, net_non_spatial)
|
||||
{
|
||||
runTorchNet("net_non_spatial", DNN_TARGET_OPENCL, "", false, true);
|
||||
}
|
||||
|
||||
TEST(Torch_Importer, ENet_accuracy)
|
||||
{
|
||||
Net net;
|
||||
|
Loading…
Reference in New Issue
Block a user