Merge pull request #16088 from alalek:dnn_eltwise_layer_different_src_channels

dnn(eltwise): fix handling of different number of channels

* dnn(test): reproducer for Eltwise layer issue from PR16063

* dnn(eltwise): rework support for inputs with different channels

* dnn(eltwise): get rid of finalize(), variableChannels

* dnn(eltwise): update input sorting by number of channels

- do not swap inputs if number of channels are same after truncation

* dnn(test): skip "shortcut" with batch size 2 on MYRIAD targets
This commit is contained in:
Alexander Alekhin 2019-12-11 20:16:58 +03:00 committed by GitHub
parent f2cce5fd8c
commit 5ee7abbe3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 407 additions and 123 deletions

View File

@ -58,6 +58,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS
/wd4456 /wd4510 /wd4610 /wd4800 /wd4456 /wd4510 /wd4610 /wd4800
/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used /wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
/wd4505 # unreferenced local function has been removed /wd4505 # unreferenced local function has been removed
/wd4458 # declaration of 'x' hides class member. GCC still works, MSVC bug is here: https://developercommunity.visualstudio.com/content/problem/219311/c-c4458-declaration-hides-class-member-warning-iss.html
-wd858 -wd2196 -wd858 -wd2196
-Winvalid-offsetof # Apple Clang (attr_value.pb.cc) -Winvalid-offsetof # Apple Clang (attr_value.pb.cc)
) )

View File

@ -508,6 +508,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
static Ptr<Layer> create(const LayerParams &params); static Ptr<Layer> create(const LayerParams &params);
}; };
/** @brief Element wise operation on inputs
Extra optional parameters:
- "operation" as string. Values are "sum" (default), "prod", "max", "div"
- "coeff" as float array. Specify weights of inputs for SUM operation
- "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
*/
class CV_EXPORTS EltwiseLayer : public Layer class CV_EXPORTS EltwiseLayer : public Layer
{ {
public: public:

View File

@ -425,6 +425,7 @@ namespace cv {
} }
shortcut_param.set<std::string>("op", "sum"); shortcut_param.set<std::string>("op", "sum");
shortcut_param.set<std::string>("output_channels_mode", "input_0_truncate");
darknet::LayerParameter lp; darknet::LayerParameter lp;
std::string layer_name = cv::format("shortcut_%d", layer_id); std::string layer_name = cv::format("shortcut_%d", layer_id);

View File

@ -66,9 +66,28 @@ public:
DIV = 3 DIV = 3
} op; } op;
std::vector<float> coeffs; std::vector<float> coeffs;
bool variableChannels;
enum OutputChannelsMode
{
ELTWISE_CHANNNELS_SAME = 0, //!< number of channels from inputs must be the same and equal to output's number of channels
ELTWISE_CHANNNELS_INPUT_0, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< number of channels of other inputs should not be greater than number of channels of first input
ELTWISE_CHANNNELS_INPUT_0_TRUNCATE, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< there is restriction on number of channels of other inputs
//!< extra channels of other inputs is ignored
ELTWISE_CHANNNELS_USE_MAX, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to maximal number of input channels
//!< @note supported operation: `SUM`
} channelsModeInput;
mutable OutputChannelsMode channelsMode; //!< "optimized" channels mode (switch to ELTWISE_CHANNNELS_SAME if number of input channels are equal)
mutable /*size_t*/int outputChannels;
EltwiseLayerImpl(const LayerParams& params) EltwiseLayerImpl(const LayerParams& params)
: outputChannels(0)
{ {
setParamsFrom(params); setParamsFrom(params);
op = SUM; op = SUM;
@ -97,6 +116,35 @@ public:
coeffs[i] = paramCoeff.get<float>(i); coeffs[i] = paramCoeff.get<float>(i);
} }
} }
channelsModeInput = ELTWISE_CHANNNELS_SAME;
if (params.has("output_channels_mode"))
{
String v = toLowerCase(params.get<String>("output_channels_mode"));
if (v == "same")
{
channelsModeInput = ELTWISE_CHANNNELS_SAME;
}
else if (v == "input_0")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0;
}
else if (v == "input_0_truncate")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0_TRUNCATE;
}
else if (v == "max_input_channels")
{
channelsModeInput = ELTWISE_CHANNNELS_USE_MAX;
if (op != SUM)
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") 'max' channels mode is limited to SUM operation only");
}
else
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") unknown channels mode: \"" + v + "\"");
}
channelsMode = channelsModeInput;
// TODO Must have checks for other unknown options
} }
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
@ -104,7 +152,7 @@ public:
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_HALIDE ||
((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())) ((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()))
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && !variableChannels)); || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME));
} }
bool getMemoryShapes(const std::vector<MatShape> &inputs, bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -119,137 +167,207 @@ public:
int dims = inputs[0].size(); int dims = inputs[0].size();
// Number of channels in output shape is determined by the first input tensor. // Number of channels in output shape is determined by the first input tensor.
bool variableChannels = false;
int numChannels = inputs[0][1]; int numChannels = inputs[0][1];
for (int i = 1; i < inputs.size(); i++) for (size_t i = 1; i < inputs.size(); i++)
{ {
CV_Assert(inputs[0][0] == inputs[i][0]); CV_Assert(inputs[0][0] == inputs[i][0]); // batch sizes are equal
// It's allowed for channels axis to be different. int input_channels = inputs[i][1];
for (int j = 2; j < dims; j++) if (numChannels != input_channels)
variableChannels = true;
if (channelsModeInput == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(numChannels == input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0)
{
CV_Assert(numChannels >= input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
// nothing to check
}
else if (channelsModeInput == ELTWISE_CHANNNELS_USE_MAX)
{
numChannels = std::max(numChannels, input_channels);
}
else
{
CV_Assert(0 && "Internal error");
}
for (size_t j = 2; j < dims; j++)
CV_Assert(inputs[0][j] == inputs[i][j]); CV_Assert(inputs[0][j] == inputs[i][j]);
} }
channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME;
outputChannels = numChannels;
outputs.assign(1, inputs[0]); outputs.assign(1, inputs[0]);
outputs[0][1] = numChannels; outputs[0][1] = numChannels;
return false; return false;
} }
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
variableChannels = false;
for (int i = 1; i < inputs.size(); ++i)
{
if (inputs[i].size[1] != inputs[0].size[1])
{
variableChannels = true;
break;
}
}
}
class EltwiseInvoker : public ParallelLoopBody class EltwiseInvoker : public ParallelLoopBody
{ {
public: EltwiseLayerImpl& self;
std::vector<const Mat*> srcs; std::vector<const Mat*> srcs;
std::vector<int> srcNumChannels;
int nsrcs; int nsrcs;
Mat* dst; Mat* dst;
std::vector<float> coeffs; std::vector<float> coeffs;
EltwiseOp op;
int nstripes; int nstripes;
const ActivationLayer* activ; const ActivationLayer* activ;
int channels; int channels;
size_t planeSize; size_t planeSize;
EltwiseInvoker() : nsrcs(0), dst(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} EltwiseInvoker(EltwiseLayerImpl& self_)
: self(self_)
, nsrcs(0), dst(0), nstripes(0), activ(0), channels(0)
, planeSize(0)
{}
static void run(const Mat* srcs, int nsrcs, Mat& dst, public:
const std::vector<float>& coeffs, EltwiseOp op, static void run(EltwiseLayerImpl& self,
const ActivationLayer* activ, int nstripes) const Mat* srcs, int nsrcs, Mat& dst,
int nstripes)
{ {
const EltwiseOp op = self.op;
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous()); CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());
CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); CV_Assert(self.coeffs.empty() || self.coeffs.size() == (size_t)nsrcs);
CV_CheckGE(nsrcs, 2, "");
EltwiseInvoker p; CV_Assert(self.outputChannels == dst.size[1]);
EltwiseInvoker p(self);
p.srcs.resize(nsrcs); p.srcs.resize(nsrcs);
p.coeffs = coeffs; p.srcNumChannels.resize(nsrcs);
p.coeffs = self.coeffs; // can be sorted
bool sortInputs = false;
for( int i = 0; i < nsrcs; i++ ) for( int i = 0; i < nsrcs; i++ )
{ {
p.srcs[i] = srcs + i; p.srcs[i] = &srcs[i];
CV_Assert(srcs[i].type() == dst.type() && CV_CheckEQ(srcs[i].dims, dst.dims, "");
srcs[i].isContinuous()); CV_Assert(srcs[i].isContinuous());
// Sort srcs and coefficients in the order by number of channels CV_Assert(srcs[i].type() == dst.type());
for( int j = i; j >= 1 && p.srcs[j - 1]->size[1] < p.srcs[j]->size[1]; j-- ) p.srcNumChannels[i] = (srcs[i].dims >= 4) ? srcs[i].size[1] : 1;
if (self.channelsMode == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(srcs[i].size == dst.size);
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_USE_MAX)
{
CV_Assert(op == SUM);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else
{
CV_Assert(0 && "Internal error");
}
if (sortInputs)
{
// Sort srcs and coefficients in the desc order by number of channels
for (int j = i; j >= 1; j--)
{
if (std::min(self.outputChannels, p.srcs[j - 1]->size[1]) < std::min(self.outputChannels, p.srcs[j]->size[1]))
{ {
std::swap(p.srcs[j - 1], p.srcs[j]); std::swap(p.srcs[j - 1], p.srcs[j]);
std::swap(p.srcNumChannels[j - 1], p.srcNumChannels[j]);
if (!p.coeffs.empty()) if (!p.coeffs.empty())
std::swap(p.coeffs[j - 1], p.coeffs[j]); std::swap(p.coeffs[j - 1], p.coeffs[j]);
} }
else
break;
}
}
} }
p.nsrcs = nsrcs; p.nsrcs = nsrcs;
p.dst = &dst; p.dst = &dst;
p.op = op;
p.nstripes = nstripes; p.nstripes = nstripes;
p.channels = (dst.dims >= 4 ? dst.size[1] : 1); p.channels = (dst.dims >= 4 ? dst.size[1] : 1);
p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1); p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1);
CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize); CV_CheckEQ(dst.total(), dst.size[0] * p.channels * p.planeSize, "");
bool simpleCoeffs = true; bool simpleCoeffs = true;
if( op == SUM && !coeffs.empty() ) if (op == SUM && !p.coeffs.empty())
{ {
CV_Assert( coeffs.size() == (size_t)nsrcs ); CV_CheckEQ(p.coeffs.size(), (size_t)nsrcs, "");
for( size_t i = 0; i < coeffs.size(); i++ ) for (size_t i = 0; i < p.coeffs.size(); i++)
if( coeffs[i] != 1 ) {
if (p.coeffs[i] != 1)
{ {
simpleCoeffs = false; simpleCoeffs = false;
break; break;
} }
} }
}
if (simpleCoeffs) if (simpleCoeffs)
p.coeffs.clear(); p.coeffs.clear();
p.activ = activ; p.activ = self.activ.get();
parallel_for_(Range(0, nstripes), p, nstripes); parallel_for_(Range(0, nstripes), p, nstripes);
} }
void operator()(const Range& r) const CV_OVERRIDE void operator()(const Range& r) const CV_OVERRIDE
{ {
const EltwiseOp op = self.op;
size_t total = dst->size[0]*planeSize; size_t total = dst->size[0]*planeSize;
size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize; size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, total); size_t stripeEnd = std::min(r.end*stripeSize, total);
int c, j, k, n;
const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0; const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0;
float* dstptr0 = dst->ptr<float>(); float* dstptr0 = dst->ptr<float>();
int blockSize0 = 1 << 12, blockSize; int blockSize0 = 1 << 12;
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs += blockSize ) for (size_t ofs = stripeStart; ofs < stripeEnd; )
{ {
int sampleIdx = (int)(ofs / planeSize); int sampleIdx = (int)(ofs / planeSize);
int delta = (int)ofs - sampleIdx * planeSize; int delta = (int)ofs - sampleIdx * planeSize;
blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta)); int blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta));
if( blockSize <= 0 ) if( blockSize <= 0 )
break; break;
ofs += blockSize;
for( c = 0; c < channels; c++ ) for (int c = 0; c < channels; c++)
{ {
size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize; size_t dstIdx = delta + (sampleIdx*channels + c)*planeSize;
const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta; float* dstptr = dstptr0 + dstIdx;
float* dstptr = dstptr0 + globalDelta;
// This code assumes that srcs are sorted in descending order by channels. // process first two inputs
for (n = 1; n < nsrcs && c < srcs[n]->size[1]; ++n) {} {
const float* srcptr0 = srcs[0]->ptr<float>() + dstIdx;
if (n == 1) const int inputIdx = 1;
int src1_channels = srcNumChannels[inputIdx];
if (c >= src1_channels)
{ {
if( !coeffsptr ) // no data from second input
if (!coeffsptr || coeffsptr[0] == 1.0f)
{ {
for( j = 0; j < blockSize; j++ ) for (int j = 0; j < blockSize; j++)
{ {
dstptr[j] = srcptr0[j]; dstptr[j] = srcptr0[j];
} }
@ -257,74 +375,112 @@ public:
else else
{ {
float c0 = coeffsptr[0]; float c0 = coeffsptr[0];
for( j = 0; j < blockSize; j++ ) for (int j = 0; j < blockSize; j++)
{ {
dstptr[j] = c0*srcptr0[j]; dstptr[j] = c0*srcptr0[j];
} }
} }
} }
else if( op == PROD ) else
{ {
for( k = 1; k < n; k++ ) size_t srcIdx = delta + (sampleIdx * src1_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{ {
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; for (int j = 0; j < blockSize; j++)
for( j = 0; j < blockSize; j++ )
{ {
dstptr[j] = srcptr0[j]*srcptr1[j]; dstptr[j] = srcptr0[j] * srcptrI[j];
}
srcptr0 = (const float*)dstptr;
} }
} }
else if( op == DIV ) else if (op == DIV)
{ {
for( k = 1; k < n; k++ ) for (int j = 0; j < blockSize; j++)
{ {
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; dstptr[j] = srcptr0[j] / srcptrI[j];
for( j = 0; j < blockSize; j++ )
{
dstptr[j] = srcptr0[j]/srcptr1[j];
}
srcptr0 = (const float*)dstptr;
} }
} }
else if( op == MAX ) else if (op == MAX)
{ {
for( k = 1; k < n; k++ ) for (int j = 0; j < blockSize; j++)
{ {
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
for( j = 0; j < blockSize; j++ )
{
dstptr[j] = std::max(srcptr0[j], srcptr1[j]);
}
srcptr0 = (const float*)dstptr;
} }
} }
else if( !coeffsptr ) else if (op == SUM)
{ {
for( k = 1; k < n; k++ ) if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
{ {
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; for (int j = 0; j < blockSize; j++)
for( j = 0; j < blockSize; j++ )
{ {
dstptr[j] = srcptr0[j] + srcptr1[j]; dstptr[j] = srcptr0[j] + srcptrI[j];
}
srcptr0 = (const float*)dstptr;
} }
} }
else else
{ {
float c0 = coeffsptr[0]; float c0 = coeffsptr[0];
for( k = 1; k < n; k++ ) float c1 = coeffsptr[1];
for (int j = 0; j < blockSize; j++)
{ {
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; dstptr[j] = c0*srcptr0[j] + c1*srcptrI[j];
float c1 = coeffsptr[k]; }
for( j = 0; j < blockSize; j++ ) }
}
else
CV_Error(Error::StsInternal, "");
}
}
// aggregate other inputs (3+)
for (size_t inputIdx = 2; inputIdx < nsrcs; inputIdx++)
{ {
dstptr[j] = c0*srcptr0[j] + c1*srcptr1[j]; int srcI_channels = srcNumChannels[inputIdx];
if (c >= srcI_channels)
continue; // no data from second input
size_t srcIdx = delta + (sampleIdx * srcI_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] *= srcptrI[j];
} }
srcptr0 = (const float*)dstptr;
c0 = 1;
} }
else if (op == DIV)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] /= srcptrI[j];
}
}
else if (op == MAX)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::max(dstptr[j], srcptrI[j]);
}
}
else if (op == SUM)
{
if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += srcptrI[j];
}
}
else
{
float cI = coeffsptr[inputIdx];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += cI * srcptrI[j];
}
}
}
else
CV_Error(Error::StsInternal, "");
} }
} }
@ -343,7 +499,7 @@ public:
std::vector<UMat> inputs; std::vector<UMat> inputs;
std::vector<UMat> outputs; std::vector<UMat> outputs;
if ((inputs_.depth() == CV_16S && op != SUM) || variableChannels) if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
return false; return false;
inputs_.getUMatVector(inputs); inputs_.getUMatVector(inputs);
@ -446,8 +602,9 @@ public:
CV_Assert(outputs.size() == 1); CV_Assert(outputs.size() == 1);
const int nstripes = getNumThreads(); const int nstripes = getNumThreads();
EltwiseInvoker::run(&inputs[0], (int)inputs.size(), outputs[0], EltwiseInvoker::run(*this,
coeffs, op, activ.get(), nstripes); &inputs[0], (int)inputs.size(), outputs[0],
nstripes);
} }
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
@ -558,6 +715,7 @@ public:
CV_UNUSED(outputs); // suppress unused variable warning CV_UNUSED(outputs); // suppress unused variable warning
CV_Assert(inputs.size()); CV_Assert(inputs.size());
// FIXIT: handle inputs with different number of channels
long flops = inputs.size() * total(inputs[0]); long flops = inputs.size() * total(inputs[0]);
return flops; return flops;

View File

@ -99,6 +99,7 @@ class Test_Darknet_layers : public DNNTestLayer
public: public:
void testDarknetLayer(const std::string& name, bool hasWeights = false) void testDarknetLayer(const std::string& name, bool hasWeights = false)
{ {
SCOPED_TRACE(name);
Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy")); Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy")); Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
@ -115,6 +116,47 @@ public:
net.setInput(inp); net.setInput(inp);
Mat out = net.forward(); Mat out = net.forward();
normAssert(out, ref, "", default_l1, default_lInf); normAssert(out, ref, "", default_l1, default_lInf);
if (inp.size[0] == 1) // test handling of batch size
{
SCOPED_TRACE("batch size 2");
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD && name == "shortcut")
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
#endif
std::vector<int> sz2 = shape(inp);
sz2[0] = 2;
Net net2 = readNet(cfg, model);
net2.setPreferableBackend(backend);
net2.setPreferableTarget(target);
Range ranges0[4] = { Range(0, 1), Range::all(), Range::all(), Range::all() };
Range ranges1[4] = { Range(1, 2), Range::all(), Range::all(), Range::all() };
Mat inp2(sz2, inp.type(), Scalar::all(0));
inp.copyTo(inp2(ranges0));
inp.copyTo(inp2(ranges1));
net2.setInput(inp2);
Mat out2 = net2.forward();
EXPECT_EQ(0, cv::norm(out2(ranges0), out2(ranges1), NORM_INF)) << "Batch result is not equal: " << name;
Mat ref2 = ref;
if (ref.dims == 2 && out2.dims == 3)
{
int ref_3d_sizes[3] = {1, ref.rows, ref.cols};
ref2 = Mat(3, ref_3d_sizes, ref.type(), (void*)ref.data);
}
/*else if (ref.dims == 3 && out2.dims == 4)
{
int ref_4d_sizes[4] = {1, ref.size[0], ref.size[1], ref.size[2]};
ref2 = Mat(4, ref_4d_sizes, ref.type(), (void*)ref.data);
}*/
ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
normAssert(out2(ranges0), ref2, "", default_l1, default_lInf);
normAssert(out2(ranges1), ref2, "", default_l1, default_lInf);
}
} }
}; };

View File

@ -1582,30 +1582,28 @@ TEST(Layer_Test_Convolution, relu_fusion)
} }
typedef testing::TestWithParam<tuple<bool, tuple<Backend, Target> > > Layer_Test_Eltwise_unequal; typedef testing::TestWithParam<tuple<bool, tuple<Backend, Target> > > Layer_Test_Eltwise_unequal;
TEST_P(Layer_Test_Eltwise_unequal, Accuracy) TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0_truncate)
{ {
bool weighted = get<0>(GetParam()); bool weighted = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam())); int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam())); int targetId = get<1>(get<1>(GetParam()));
if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
Net net; Net net;
LayerParams lp; LayerParams lp;
lp.type = "Eltwise"; lp.type = "Eltwise";
lp.name = "testLayer"; lp.name = "testLayer";
lp.set<std::string>("output_channels_mode", "input_0_truncate");
const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}}; const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}};
const int out_channels = inpShapes[0][1];
std::vector<String> inpNames(3); std::vector<String> inpNames(3);
std::vector<Mat> inputs(3); std::vector<Mat> inputs(3);
size_t numOutValues = 1*4*2*2; // By the first input
std::vector<float> weights(3, 1); std::vector<float> weights(3, 1);
if (weighted) if (weighted)
{ {
for (int i = 0; i < inputs.size(); ++i) for (int i = 0; i < inputs.size(); ++i)
randu(Mat(1, 1, CV_32F, &weights[i]), -1, 1); weights[i] = -0.125f + i * 0.25f;
lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size())); lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
} }
@ -1613,27 +1611,103 @@ TEST_P(Layer_Test_Eltwise_unequal, Accuracy)
for (int i = 0; i < inputs.size(); ++i) for (int i = 0; i < inputs.size(); ++i)
{ {
inputs[i].create(4, inpShapes[i], CV_32F); inputs[i].create(4, inpShapes[i], CV_32F);
randu(inputs[i], 0, 255); size_t total = inputs[i].total();
for (size_t j = 0; j < total; j++)
inputs[i].ptr<float>()[j] = j + i * 100;
inpNames[i] = format("input_%d", i); inpNames[i] = format("input_%d", i);
net.connect(0, i, eltwiseId, i); net.connect(0, i, eltwiseId, i);
} }
Mat ref(1, numOutValues, CV_32F, Scalar(0)); Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
net.setInputsNames(inpNames); net.setInputsNames(inpNames);
for (int i = 0; i < inputs.size(); ++i) for (int i = 0; i < inputs.size(); ++i)
{ {
//std::cout << ref.reshape(1,1) << endl;
net.setInput(inputs[i], inpNames[i]); net.setInput(inputs[i], inpNames[i]);
if (numOutValues >= inputs[i].total()) for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
ref.colRange(0, inputs[i].total()) += weights[i] * inputs[i].reshape(1, 1); {
else int input_channels = inputs[i].size[1];
ref += weights[i] * inputs[i].reshape(1, 1).colRange(0, numOutValues); Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
Mat ref_slice = ref(ranges);
Mat input_slice = inputs[i](ranges);
ref_slice += weights[i] * input_slice;
}
} }
net.setPreferableBackend(backendId); net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId); net.setPreferableTarget(targetId);
Mat out = net.forward(); Mat out = net.forward();
normAssert(out.reshape(1, 1), ref); normAssert(out, ref);
if (testing::Test::HasFailure())
{
std::cout << out.reshape(1,1) << endl;
std::cout << ref.reshape(1,1) << endl;
}
} }
TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0)
{
bool weighted = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Net net;
LayerParams lp;
lp.type = "Eltwise";
lp.name = "testLayer";
lp.set<std::string>("output_channels_mode", "input_0");
const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 2, 2, 2}, {1, 3, 2, 2}};
const int out_channels = inpShapes[0][1];
std::vector<String> inpNames(3);
std::vector<Mat> inputs(3);
std::vector<float> weights(3, 1);
if (weighted)
{
for (int i = 0; i < inputs.size(); ++i)
weights[i] = -0.125f + i * 0.25f;
lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
}
int eltwiseId = net.addLayer(lp.name, lp.type, lp);
for (int i = 0; i < inputs.size(); ++i)
{
inputs[i].create(4, inpShapes[i], CV_32F);
size_t total = inputs[i].total();
for (size_t j = 0; j < total; j++)
inputs[i].ptr<float>()[j] = j + i * 100;
inpNames[i] = format("input_%d", i);
net.connect(0, i, eltwiseId, i);
}
Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
net.setInputsNames(inpNames);
for (int i = 0; i < inputs.size(); ++i)
{
//std::cout << ref.reshape(1,1) << endl;
net.setInput(inputs[i], inpNames[i]);
for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
{
int input_channels = inputs[i].size[1];
Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
Mat ref_slice = ref(ranges);
Mat input_slice = inputs[i](ranges);
ref_slice += weights[i] * input_slice;
}
}
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();
normAssert(out, ref);
if (testing::Test::HasFailure())
{
std::cout << out.reshape(1,1) << endl;
std::cout << ref.reshape(1,1) << endl;
}
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine( INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine(
testing::Bool(), testing::Bool(),
dnnBackendsAndTargets() dnnBackendsAndTargets()

View File

@ -1368,7 +1368,8 @@ double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask)
int normType0 = normType; int normType0 = normType;
normType = normType == NORM_L2SQR ? NORM_L2 : normType; normType = normType == NORM_L2SQR ? NORM_L2 : normType;
CV_Assert( src1.type() == src2.type() && src1.size == src2.size ); CV_CheckTypeEQ(src1.type(), src2.type(), "");
CV_Assert(src1.size == src2.size);
CV_Assert( mask.empty() || (src1.size == mask.size && mask.type() == CV_8U) ); CV_Assert( mask.empty() || (src1.size == mask.size && mask.type() == CV_8U) );
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
const Mat *arrays[]={&src1, &src2, &mask, 0}; const Mat *arrays[]={&src1, &src2, &mask, 0};