mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #23952 from zihaomu:fix_depth_conv_5x5
DNN: optimize the speed of general Depth-wise #23952 Try to solve the issue: https://github.com/opencv/opencv/issues/23941 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
1f7025f028
commit
1920993525
@ -744,7 +744,8 @@ static const ConvParam_t testConvolutionConfigs[] = {
|
|||||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 4, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 864.},
|
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 4, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 864.},
|
||||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 96, 1, 1}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 772.},
|
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 96, 1, 1}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 772.},
|
||||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 8, 1, 1}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 544.},
|
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 8, 1, 1}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 544.},
|
||||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.}
|
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.},
|
||||||
|
/* GFLOPS 0.472 x 1 = 0.472 */ {{5, 5}, {{1, 32, 96, 96}}, 32, 32, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 472154112.}
|
||||||
};
|
};
|
||||||
struct ConvParamID
|
struct ConvParamID
|
||||||
{
|
{
|
||||||
|
@ -1290,7 +1290,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
|
|||||||
else
|
else
|
||||||
Kg_nblocks = 1;
|
Kg_nblocks = 1;
|
||||||
|
|
||||||
bool separateIm2col = fast_1x1 || stripes_per_plane == 1;
|
bool separateIm2col = (fast_1x1 || stripes_per_plane == 1) && conv->conv_type != CONV_TYPE_DEPTHWISE_REMAIN;
|
||||||
|
|
||||||
int Kstripes = Kg_nblocks * stripes_per_plane;
|
int Kstripes = Kg_nblocks * stripes_per_plane;
|
||||||
int nsubtasks = N * ngroups * Kstripes;
|
int nsubtasks = N * ngroups * Kstripes;
|
||||||
|
Loading…
Reference in New Issue
Block a user