mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
e28e3c9491
@ -457,6 +457,7 @@ OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers"
|
||||
OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) )
|
||||
OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
|
||||
OCV_OPTION(OPENCV_ENABLE_MEMORY_SANITIZER "Better support for memory/address sanitizers" OFF)
|
||||
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
|
||||
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
|
||||
|
@ -383,4 +383,7 @@ macro(ocv_add_modules_compiler_options)
|
||||
string(REPLACE "/W3" "/W4" ${flags} "${${flags}}")
|
||||
endforeach()
|
||||
endif()
|
||||
if(OPENCV_ENABLE_MEMORY_SANITIZER)
|
||||
add_definitions(-DOPENCV_ENABLE_MEMORY_SANITIZER=1)
|
||||
endif()
|
||||
endmacro()
|
||||
|
@ -15,15 +15,11 @@ endif()
|
||||
ocv_clear_vars(HAVE_QT HAVE_QT5)
|
||||
if(WITH_QT)
|
||||
if(NOT WITH_QT EQUAL 4)
|
||||
find_package(Qt5Core)
|
||||
find_package(Qt5Gui)
|
||||
find_package(Qt5Widgets)
|
||||
find_package(Qt5Test)
|
||||
find_package(Qt5Concurrent)
|
||||
if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND)
|
||||
find_package(Qt5 COMPONENTS Core Gui Widgets Test Concurrent REQUIRED NO_MODULE)
|
||||
if(Qt5_FOUND)
|
||||
set(HAVE_QT5 ON)
|
||||
set(HAVE_QT ON)
|
||||
find_package(Qt5OpenGL)
|
||||
find_package(Qt5 COMPONENTS OpenGL QUIET)
|
||||
if(Qt5OpenGL_FOUND)
|
||||
set(QT_QTOPENGL_FOUND ON)
|
||||
endif()
|
||||
|
@ -952,7 +952,7 @@ can be used as well.
|
||||
CV_EXPORTS_W bool findChessboardCornersSB(InputArray image,Size patternSize, OutputArray corners,int flags=0);
|
||||
|
||||
//! finds subpixel-accurate positions of the chessboard corners
|
||||
CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
|
||||
CV_EXPORTS_W bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
|
||||
|
||||
/** @brief Renders the detected chessboard corners.
|
||||
|
||||
|
@ -198,6 +198,15 @@ public class Calib3dTest extends OpenCVTestCase {
|
||||
assertTrue(!corners.empty());
|
||||
}
|
||||
|
||||
public void testFind4QuadCornerSubpix() {
|
||||
Size patternSize = new Size(9, 6);
|
||||
MatOfPoint2f corners = new MatOfPoint2f();
|
||||
Size region_size = new Size(5, 5);
|
||||
Calib3d.findChessboardCorners(grayChess, patternSize, corners);
|
||||
Calib3d.find4QuadCornerSubpix(grayChess, corners, region_size);
|
||||
assertTrue(!corners.empty());
|
||||
}
|
||||
|
||||
public void testFindCirclesGridMatSizeMat() {
|
||||
int size = 300;
|
||||
Mat img = new Mat(size, size, CvType.CV_8U);
|
||||
|
@ -909,6 +909,14 @@ void cv::computeCorrespondEpilines( InputArray _points, int whichImage,
|
||||
}
|
||||
}
|
||||
|
||||
static inline double scaleFor(double x){
|
||||
return (std::fabs(x) > std::numeric_limits<float>::epsilon()) ? 1./x : 1.;
|
||||
}
|
||||
static inline float scaleFor(float x){
|
||||
return (std::fabs(x) > std::numeric_limits<float>::epsilon()) ? 1.f/x : 1.f;
|
||||
}
|
||||
|
||||
|
||||
void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
@ -967,7 +975,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst )
|
||||
Point2f* dptr = dst.ptr<Point2f>();
|
||||
for( i = 0; i < npoints; i++ )
|
||||
{
|
||||
float scale = sptr[i].z != 0.f ? 1.f/sptr[i].z : 1.f;
|
||||
float scale = scaleFor(sptr[i].z);
|
||||
dptr[i] = Point2f(sptr[i].x*scale, sptr[i].y*scale);
|
||||
}
|
||||
}
|
||||
@ -977,7 +985,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst )
|
||||
Point3f* dptr = dst.ptr<Point3f>();
|
||||
for( i = 0; i < npoints; i++ )
|
||||
{
|
||||
float scale = sptr[i][3] != 0.f ? 1.f/sptr[i][3] : 1.f;
|
||||
float scale = scaleFor(sptr[i][3]);
|
||||
dptr[i] = Point3f(sptr[i][0]*scale, sptr[i][1]*scale, sptr[i][2]*scale);
|
||||
}
|
||||
}
|
||||
@ -990,7 +998,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst )
|
||||
Point2d* dptr = dst.ptr<Point2d>();
|
||||
for( i = 0; i < npoints; i++ )
|
||||
{
|
||||
double scale = sptr[i].z != 0. ? 1./sptr[i].z : 1.;
|
||||
double scale = scaleFor(sptr[i].z);
|
||||
dptr[i] = Point2d(sptr[i].x*scale, sptr[i].y*scale);
|
||||
}
|
||||
}
|
||||
@ -1000,7 +1008,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst )
|
||||
Point3d* dptr = dst.ptr<Point3d>();
|
||||
for( i = 0; i < npoints; i++ )
|
||||
{
|
||||
double scale = sptr[i][3] != 0.f ? 1./sptr[i][3] : 1.;
|
||||
double scale = scaleFor(sptr[i][3]);
|
||||
dptr[i] = Point3d(sptr[i][0]*scale, sptr[i][1]*scale, sptr[i][2]*scale);
|
||||
}
|
||||
}
|
||||
|
@ -1128,12 +1128,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val));
|
||||
__m256i half = _mm256_sad_epu8(a.val, b.val);
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
__m256i half = _mm256_set1_epi8(0x7f);
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)));
|
||||
half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
|
@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val));
|
||||
__m128i half = _mm_sad_epu8(a.val, b.val);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
__m128i half = _mm_set1_epi8(0x7f);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half),
|
||||
_mm_add_epi8(b.val, half)));
|
||||
half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
|
@ -763,7 +763,7 @@ inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
vec_ushort8 ad = vec_absd(a.val, b.val);
|
||||
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)), vec_int4_c(vec_unpacklu(ad)));
|
||||
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z);
|
||||
return (unsigned)vec_extract(sum, 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
|
||||
|
@ -118,7 +118,11 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
#ifdef OPENCV_ENABLE_MEMORY_SANITIZER
|
||||
template<typename _Tp, size_t fixed_size = 0> class AutoBuffer
|
||||
#else
|
||||
template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
typedef _Tp value_type;
|
||||
|
@ -770,6 +770,15 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_reduce_sad()
|
||||
{
|
||||
Data<R> dataA, dataB(R::nlanes/2);
|
||||
R a = dataA;
|
||||
R b = dataB;
|
||||
EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b));
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_mask()
|
||||
{
|
||||
typedef typename V_RegTraits<R>::int_reg int_reg;
|
||||
@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
|
||||
.test_absdiff()
|
||||
.test_absdiffs()
|
||||
.test_abs()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
|
||||
.test_absdiffs()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
|
@ -210,7 +210,10 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
class CV_EXPORTS BaseConvolutionLayer : public Layer
|
||||
{
|
||||
public:
|
||||
Size kernel, stride, pad, dilation, adjustPad;
|
||||
CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
|
||||
std::vector<size_t> adjust_pads;
|
||||
std::vector<size_t> kernel_size, strides, dilations;
|
||||
std::vector<size_t> pads_begin, pads_end;
|
||||
String padMode;
|
||||
int numOutput;
|
||||
};
|
||||
@ -243,9 +246,10 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
{
|
||||
public:
|
||||
int type;
|
||||
Size kernel, stride;
|
||||
int pad_l, pad_t, pad_r, pad_b;
|
||||
CV_DEPRECATED_EXTERNAL Size pad;
|
||||
std::vector<size_t> kernel_size, strides;
|
||||
std::vector<size_t> pads_begin, pads_end;
|
||||
CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
|
||||
CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
|
||||
bool globalPooling;
|
||||
bool computeMaxIdx;
|
||||
String padMode;
|
||||
|
@ -6,7 +6,7 @@
|
||||
#define OPENCV_DNN_VERSION_HPP
|
||||
|
||||
/// Use with major OpenCV version only.
|
||||
#define OPENCV_DNN_API_VERSION 20190422
|
||||
#define OPENCV_DNN_API_VERSION 20190430
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
|
||||
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
||||
|
@ -209,8 +209,10 @@ class dnn_test(NewOpenCVTests):
|
||||
outs.insert(0, netAsync.forwardAsync())
|
||||
|
||||
for i in reversed(range(numInputs)):
|
||||
if outs[i].wait_for(timeout) == 1:
|
||||
ret = outs[i].wait_for(timeout)
|
||||
if ret == 1:
|
||||
self.fail("Timeout")
|
||||
self.assertEqual(ret, 0) # is ready
|
||||
normAssert(self, refs[i], outs[i].get(), 'Index: %d' % i, 1e-10)
|
||||
|
||||
|
||||
|
@ -2326,6 +2326,7 @@ struct Net::Impl
|
||||
if (isAsync)
|
||||
CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
|
||||
|
||||
CV_Assert(layer->supportBackend(DNN_BACKEND_OPENCV));
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
|
||||
|
@ -67,31 +67,34 @@ public:
|
||||
BaseConvolutionLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0;
|
||||
getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t,
|
||||
pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height,
|
||||
dilation.width, padMode);
|
||||
|
||||
if (pad_t != pad_b || pad_l != pad_r)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
|
||||
|
||||
pad.width = pad_l;
|
||||
pad.height = pad_t;
|
||||
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode);
|
||||
|
||||
numOutput = params.get<int>("num_output");
|
||||
int ngroups = params.get<int>("group", 1);
|
||||
|
||||
adjustPad.height = params.get<int>("adj_h", 0);
|
||||
adjustPad.width = params.get<int>("adj_w", 0);
|
||||
|
||||
CV_Assert(numOutput % ngroups == 0);
|
||||
|
||||
if (kernel_size.size() == 2) {
|
||||
kernel = Size(kernel_size[1], kernel_size[0]);
|
||||
stride = Size(strides[1], strides[0]);
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
if (pads_begin[i] != pads_end[i])
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
|
||||
}
|
||||
pad = Size(pads_begin[1], pads_begin[0]);
|
||||
dilation = Size(dilations[1], dilations[0]);
|
||||
|
||||
adjust_pads.push_back(params.get<int>("adj_h", 0));
|
||||
adjust_pads.push_back(params.get<int>("adj_w", 0));
|
||||
|
||||
adjustPad.height = adjust_pads[0];
|
||||
adjustPad.width = adjust_pads[1];
|
||||
CV_Assert(adjustPad.width < stride.width &&
|
||||
adjustPad.height < stride.height);
|
||||
|
||||
}
|
||||
newWeightAndBias = false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
@ -99,31 +102,38 @@ public:
|
||||
|
||||
CV_Assert(inputs.size() > 0);
|
||||
|
||||
CV_Assert(blobs.size() >= 1 && blobs.size() <= 2);
|
||||
CV_Assert(blobs[0].dims == 4 && blobs[0].size[3] == kernel.width && blobs[0].size[2] == kernel.height);
|
||||
CV_Assert(blobs.size() == 1 || blobs.size() == 2);
|
||||
CV_Assert(inputs[0].dims == outputs[0].dims);
|
||||
CV_Assert(blobs[0].dims == kernel_size.size() + 2);
|
||||
for (int i = 0; i < kernel_size.size(); i++) {
|
||||
CV_Assert(blobs[0].size[i + 2] == kernel_size[i]);
|
||||
}
|
||||
|
||||
const Mat &input = inputs[0];
|
||||
CV_Assert(input.dims == 4 && (input.type() == CV_32F || input.type() == CV_64F || input.type() == CV_16S));
|
||||
CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[i].type() == input.type());
|
||||
CV_Assert(inputs[i].dims == 4 && inputs[i].size[1] == input.size[1]);
|
||||
CV_Assert(inputs[i].size[2] == input.size[2] && inputs[i].size[3] == input.size[3]);
|
||||
CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]);
|
||||
for (int j = 0; j < inputs[i].dims; j++) {
|
||||
CV_Assert(inputs[i].size[j] == input.size[j]);
|
||||
}
|
||||
}
|
||||
|
||||
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
|
||||
|
||||
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
|
||||
|
||||
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
|
||||
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
|
||||
|
||||
|
||||
if (pad_t != pad_b || pad_l != pad_r)
|
||||
std::vector<int> inpShape;
|
||||
std::vector<int> outShape;
|
||||
for (int i = 2; i < inputs[0].dims; i++) {
|
||||
inpShape.push_back(inputs[0].size[i]);
|
||||
outShape.push_back(outputs[0].size[i]);
|
||||
}
|
||||
getConvPoolPaddings(inpShape, outShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end);
|
||||
if (pads_begin.size() == 2) {
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
if (pads_begin[i] != pads_end[i])
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
|
||||
|
||||
pad.width = pad_l;
|
||||
pad.height = pad_t;
|
||||
}
|
||||
pad = Size(pads_begin[1], pads_begin[0]);
|
||||
}
|
||||
}
|
||||
|
||||
bool hasBias() const
|
||||
@ -238,15 +248,21 @@ public:
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
if (kernel_size.size() == 3)
|
||||
return preferableTarget == DNN_TARGET_CPU;
|
||||
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R4) ||
|
||||
(preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (kernel_size.size() != 2)
|
||||
return false;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
|
||||
}
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
@ -259,21 +275,23 @@ public:
|
||||
|
||||
internals.clear();
|
||||
|
||||
int inpCn = inputs[0][1];
|
||||
int inpH = inputs[0][2];
|
||||
int inpW = inputs[0][3];
|
||||
CV_Assert(inputs.size() != 0);
|
||||
std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
|
||||
|
||||
int outCn = blobs[0].size[0];
|
||||
Size out;
|
||||
std::vector<int> outShape;
|
||||
outShape.push_back(inputs[0][0]);
|
||||
outShape.push_back(outCn);
|
||||
|
||||
int inpCn = inputs[0][1];
|
||||
if (padMode.empty())
|
||||
{
|
||||
out.height = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
|
||||
out.width = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
|
||||
for (int i = 0; i < inpShape.size(); i++)
|
||||
outShape.push_back((inpShape[i] + pads_begin[i] + pads_end[i] - dilations[i] * (kernel_size[i] - 1) - 1) / strides[i] + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out);
|
||||
getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape);
|
||||
}
|
||||
|
||||
int ngroups = inpCn / blobs[0].size[1];
|
||||
@ -282,8 +300,7 @@ public:
|
||||
"be multiple of %d but got %d", blobs[0].size[1], inpCn));
|
||||
CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0);
|
||||
|
||||
int dims[] = {inputs[0][0], outCn, out.height, out.width};
|
||||
outputs.resize(inputs.size(), shape(dims, 4));
|
||||
outputs.resize(1, outShape);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -521,25 +538,28 @@ public:
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
CV_Assert(input->dims.size() == 4);
|
||||
CV_Assert(input->dims.size() == 4 || input->dims.size() == 5);
|
||||
|
||||
const int inpCn = input->dims[2]; // NOTE: input->dims are reversed (whcn)
|
||||
const int inpCn = input->dims[input->dims.size() - 2]; // NOTE: input->dims are reversed (WHIO or WHDIO)
|
||||
const int outCn = blobs[0].size[0];
|
||||
const int inpGroupCn = blobs[0].size[1];
|
||||
const int group = inpCn / inpGroupCn;
|
||||
|
||||
auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
InferenceEngine::Layout layout = (input->dims.size() == 4) ? InferenceEngine::Layout::OIHW :
|
||||
InferenceEngine::Layout::NCDHW;
|
||||
|
||||
auto ieWeights = wrapToInfEngineBlob(blobs[0], layout);
|
||||
if (newWeightAndBias)
|
||||
{
|
||||
if (weightsMat.isContinuous())
|
||||
{
|
||||
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
|
||||
ieWeights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
|
||||
ieWeights = wrapToInfEngineBlob(fusedWeights, layout);
|
||||
}
|
||||
else
|
||||
{
|
||||
ieWeights = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
|
||||
InferenceEngine::Precision::FP32, layout,
|
||||
ieWeights->dims());
|
||||
ieWeights->allocate();
|
||||
|
||||
@ -558,11 +578,11 @@ public:
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setKernel(kernel_size);
|
||||
ieLayer.setStrides(strides);
|
||||
ieLayer.setDilation(dilations);
|
||||
ieLayer.setPaddingsBegin(pads_begin);
|
||||
ieLayer.setPaddingsEnd(pads_end);
|
||||
ieLayer.setGroup((size_t)group);
|
||||
ieLayer.setOutDepth((size_t)outCn);
|
||||
|
||||
@ -1155,6 +1175,10 @@ public:
|
||||
CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0,
|
||||
outputs.size() == 1, inputs[0].data != outputs[0].data);
|
||||
|
||||
if (inputs[0].dims == 5) {
|
||||
CV_Error(Error::StsNotImplemented, "Convolution3D layer is not supported on OCV backend");
|
||||
}
|
||||
|
||||
int ngroups = inputs[0].size[1]/blobs[0].size[1];
|
||||
CV_Assert(outputs[0].size[1] % ngroups == 0);
|
||||
int outCn = blobs[0].size[0];
|
||||
@ -1227,6 +1251,9 @@ public:
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
if (kernel_size.size() == 3)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported deconvolution3D layer");
|
||||
|
||||
if (INF_ENGINE_RELEASE >= 2018050000 && (adjustPad.height || adjustPad.width))
|
||||
return false;
|
||||
|
||||
@ -1242,7 +1269,7 @@ public:
|
||||
}
|
||||
else
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
|
||||
return kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -1253,39 +1280,36 @@ public:
|
||||
CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput);
|
||||
CV_Assert(inputs.size() != 0);
|
||||
|
||||
int inpCn = inputs[0][1];
|
||||
int inpH = inputs[0][2];
|
||||
int inpW = inputs[0][3];
|
||||
|
||||
int outH = -1, outW = -1;
|
||||
int outCn = numOutput;
|
||||
std::vector<int> outShape;
|
||||
outShape.push_back(inputs[0][0]); // batch
|
||||
outShape.push_back(outCn);
|
||||
if (padMode.empty())
|
||||
{
|
||||
outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
|
||||
outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
|
||||
for (int i = 0; i < kernel_size.size(); i++)
|
||||
outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] - pads_begin[i] - pads_end[i] + adjust_pads[i]);
|
||||
}
|
||||
else if (padMode == "VALID")
|
||||
{
|
||||
outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height;
|
||||
outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width;
|
||||
for (int i = 0; i < kernel_size.size(); i++)
|
||||
outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] + adjust_pads[i]);
|
||||
}
|
||||
else if (padMode == "SAME")
|
||||
{
|
||||
outH = stride.height * (inpH - 1) + 1 + adjustPad.height;
|
||||
outW = stride.width * (inpW - 1) + 1 + adjustPad.width;
|
||||
for (int i = 0; i < kernel_size.size(); i++)
|
||||
outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + 1 + adjust_pads[i]);
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
|
||||
|
||||
int outCn = numOutput;
|
||||
|
||||
CV_Assert(outCn % blobs[0].size[1] == 0);
|
||||
int ngroups = outCn / blobs[0].size[1];
|
||||
|
||||
int inpCn = inputs[0][1];
|
||||
CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);
|
||||
CV_Assert(blobs[0].size[0] == inpCn);
|
||||
|
||||
int dims[] = {inputs[0][0], outCn, outH, outW};
|
||||
outputs.resize(inputs.size(), shape(dims, 4));
|
||||
outputs.resize(1, outShape);
|
||||
|
||||
if (!is1x1())
|
||||
internals.push_back(computeColRowShape(inputs[0], outputs[0]));
|
||||
@ -1301,16 +1325,20 @@ public:
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
|
||||
getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
|
||||
Size(inputs[0].size[3], inputs[0].size[2]),
|
||||
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
|
||||
|
||||
if (pad_t != pad_b || pad_l != pad_r)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
|
||||
|
||||
pad.width = pad_l;
|
||||
pad.height = pad_t;
|
||||
std::vector<int> inpShape;
|
||||
std::vector<int> outShape;
|
||||
for (int i = 2; i < inputs[0].dims; i++) {
|
||||
inpShape.push_back(inputs[0].size[i]);
|
||||
outShape.push_back(outputs[0].size[i]);
|
||||
}
|
||||
getConvPoolPaddings(outShape, inpShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end);
|
||||
if (pads_begin.size() == 2) {
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
if (pads_begin[i] != pads_end[i])
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in deconvolution layer");
|
||||
}
|
||||
pad = Size(pads_begin[1], pads_begin[0]);
|
||||
}
|
||||
|
||||
weightsMultipliers.assign(numOutput, 1.0);
|
||||
if (weightsMat.empty())
|
||||
@ -1830,11 +1858,11 @@ public:
|
||||
|
||||
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setKernel(kernel_size);
|
||||
ieLayer.setStrides(strides);
|
||||
ieLayer.setDilation(dilations);
|
||||
ieLayer.setPaddingsBegin(pads_begin);
|
||||
ieLayer.setPaddingsEnd(pads_end);
|
||||
ieLayer.setGroup((size_t)group);
|
||||
ieLayer.setOutDepth((size_t)numOutput);
|
||||
|
||||
|
@ -206,8 +206,9 @@ public:
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
const int num = inputs[0][0];
|
||||
CV_Assert(inputs.size() >= 3);
|
||||
CV_Assert(inputs[0][0] == inputs[1][0]);
|
||||
CV_Assert(num == inputs[1][0]);
|
||||
|
||||
int numPriors = inputs[2][2] / 4;
|
||||
CV_Assert((numPriors * _numLocClasses * 4) == total(inputs[0], 1));
|
||||
@ -216,10 +217,10 @@ public:
|
||||
|
||||
// num() and channels() are 1.
|
||||
// Since the number of bboxes to be kept is unknown before nms, we manually
|
||||
// set it to maximal number of detections, [keep_top_k] parameter.
|
||||
// set it to maximal number of detections, [keep_top_k] parameter multiplied by batch size.
|
||||
// Each row is a 7 dimension std::vector, which stores
|
||||
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
|
||||
outputs.resize(1, shape(1, 1, _keepTopK, 7));
|
||||
outputs.resize(1, shape(1, 1, _keepTopK * num, 7));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -57,20 +57,19 @@ std::string makeName(const std::string& str1, const std::string& str2)
|
||||
}
|
||||
|
||||
bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll,
|
||||
int ¶meterH, int ¶meterW, bool hasDefault = false, const int& defaultValue = 0)
|
||||
std::vector<size_t>& parameter, bool hasDefault = false, const std::vector<size_t>& defaultValue = std::vector<size_t>(2, 0))
|
||||
{
|
||||
std::string nameH = makeName(nameBase, std::string("_h"));
|
||||
std::string nameW = makeName(nameBase, std::string("_w"));
|
||||
std::string nameAll_ = nameAll;
|
||||
if(nameAll_ == "")
|
||||
{
|
||||
if (nameAll_ == "")
|
||||
nameAll_ = nameBase;
|
||||
}
|
||||
|
||||
if (params.has(nameH) && params.has(nameW))
|
||||
{
|
||||
parameterH = params.get<int>(nameH);
|
||||
parameterW = params.get<int>(nameW);
|
||||
CV_Assert(params.get<int>(nameH) >= 0 && params.get<int>(nameW) >= 0);
|
||||
parameter.push_back(params.get<int>(nameH));
|
||||
parameter.push_back(params.get<int>(nameW));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@ -78,26 +77,19 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const
|
||||
if (params.has(nameAll_))
|
||||
{
|
||||
DictValue param = params.get(nameAll_);
|
||||
parameterH = param.get<int>(0);
|
||||
if (param.size() == 1)
|
||||
{
|
||||
parameterW = parameterH;
|
||||
}
|
||||
else if (param.size() == 2)
|
||||
{
|
||||
parameterW = param.get<int>(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
for (int i = 0; i < param.size(); i++) {
|
||||
CV_Assert(param.get<int>(i) >= 0);
|
||||
parameter.push_back(param.get<int>(i));
|
||||
}
|
||||
if (parameter.size() == 1)
|
||||
parameter.resize(2, parameter[0]);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(hasDefault)
|
||||
if (hasDefault)
|
||||
{
|
||||
parameterH = parameterW = defaultValue;
|
||||
parameter = defaultValue;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@ -108,30 +100,38 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const
|
||||
}
|
||||
}
|
||||
|
||||
void getKernelSize(const LayerParams ¶ms, int &kernelH, int &kernelW)
|
||||
void getKernelSize(const LayerParams ¶ms, std::vector<size_t>& kernel)
|
||||
{
|
||||
if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW))
|
||||
{
|
||||
if (!util::getParameter(params, "kernel", "kernel_size", kernel))
|
||||
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
|
||||
}
|
||||
|
||||
CV_Assert(kernelH > 0 && kernelW > 0);
|
||||
for (int i = 0; i < kernel.size(); i++)
|
||||
CV_Assert(kernel[i] > 0);
|
||||
}
|
||||
|
||||
void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode)
|
||||
void getStrideAndPadding(const LayerParams ¶ms, std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
|
||||
std::vector<size_t>& strides, cv::String& padMode, size_t kernel_size = 2)
|
||||
{
|
||||
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
|
||||
padT = params.get<int>("pad_t");
|
||||
padL = params.get<int>("pad_l");
|
||||
padB = params.get<int>("pad_b");
|
||||
padR = params.get<int>("pad_r");
|
||||
CV_Assert(params.get<int>("pad_t") >= 0 && params.get<int>("pad_l") >= 0 &&
|
||||
params.get<int>("pad_b") >= 0 && params.get<int>("pad_r") >= 0);
|
||||
pads_begin.push_back(params.get<int>("pad_t"));
|
||||
pads_begin.push_back(params.get<int>("pad_l"));
|
||||
pads_end.push_back(params.get<int>("pad_b"));
|
||||
pads_end.push_back(params.get<int>("pad_r"));
|
||||
}
|
||||
else {
|
||||
util::getParameter(params, "pad", "pad", padT, padL, true, 0);
|
||||
padB = padT;
|
||||
padR = padL;
|
||||
util::getParameter(params, "pad", "pad", pads_begin, true, std::vector<size_t>(kernel_size, 0));
|
||||
if (pads_begin.size() < 4)
|
||||
pads_end = pads_begin;
|
||||
else
|
||||
{
|
||||
pads_end = std::vector<size_t>(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end());
|
||||
pads_begin.resize(pads_begin.size() / 2);
|
||||
}
|
||||
util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
|
||||
CV_Assert(pads_begin.size() == pads_end.size());
|
||||
}
|
||||
util::getParameter(params, "stride", "stride", strides, true, std::vector<size_t>(kernel_size, 1));
|
||||
|
||||
padMode = "";
|
||||
if (params.has("pad_mode"))
|
||||
@ -139,15 +139,16 @@ void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &p
|
||||
padMode = params.get<String>("pad_mode");
|
||||
}
|
||||
|
||||
CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0);
|
||||
for (int i = 0; i < strides.size(); i++)
|
||||
CV_Assert(strides[i] > 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling,
|
||||
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode)
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, bool &globalPooling,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
|
||||
std::vector<size_t>& strides, cv::String &padMode)
|
||||
{
|
||||
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
|
||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
|
||||
|
||||
globalPooling = params.has("global_pooling") &&
|
||||
params.get<bool>("global_pooling");
|
||||
@ -158,25 +159,30 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel
|
||||
{
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
|
||||
}
|
||||
if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1)
|
||||
{
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1");
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
if (pads_begin[i] != 0 || pads_end[i] != 0)
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
|
||||
}
|
||||
for (int i = 0; i < strides.size(); i++) {
|
||||
if (strides[i] != 1)
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
util::getKernelSize(params, kernelH, kernelW);
|
||||
util::getKernelSize(params, kernel);
|
||||
}
|
||||
}
|
||||
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
|
||||
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode)
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations, cv::String &padMode)
|
||||
{
|
||||
util::getKernelSize(params, kernelH, kernelW);
|
||||
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
|
||||
util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
|
||||
util::getKernelSize(params, kernel);
|
||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
|
||||
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
|
||||
|
||||
CV_Assert(dilationH > 0 && dilationW > 0);
|
||||
for (int i = 0; i < dilations.size(); i++)
|
||||
CV_Assert(dilations[i] > 0);
|
||||
}
|
||||
|
||||
// From TensorFlow code:
|
||||
@ -188,19 +194,19 @@ void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &ke
|
||||
// We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
|
||||
// and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means
|
||||
// we pad more on the right and bottom than on the top and left.
|
||||
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
||||
const Size &stride, const String &padMode,
|
||||
const Size &dilation, Size& out)
|
||||
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& stride, const String &padMode,
|
||||
const std::vector<size_t>& dilation, std::vector<int>& out)
|
||||
{
|
||||
if (padMode == "VALID")
|
||||
{
|
||||
out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height;
|
||||
out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width;
|
||||
for (int i = 0; i < inp.size(); i++)
|
||||
out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]);
|
||||
}
|
||||
else if (padMode == "SAME")
|
||||
{
|
||||
out.height = (inp.height - 1 + stride.height) / stride.height;
|
||||
out.width = (inp.width - 1 + stride.width) / stride.width;
|
||||
for (int i = 0; i < inp.size(); i++)
|
||||
out.push_back((inp[i] - 1 + stride[i]) / stride[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -208,22 +214,26 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
||||
}
|
||||
}
|
||||
|
||||
void getConvPoolPaddings(const Size& inp, const Size& out,
|
||||
const Size &kernel, const Size &stride,
|
||||
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR)
|
||||
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<int>& out,
|
||||
const std::vector<size_t>& kernel, const std::vector<size_t>& strides,
|
||||
const String &padMode, const std::vector<size_t>& dilation,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end)
|
||||
{
|
||||
if (padMode == "VALID")
|
||||
{
|
||||
padT = padL = padB = padR = 0;
|
||||
pads_begin.assign(kernel.size(), 0);
|
||||
pads_end.assign(kernel.size(), 0);
|
||||
}
|
||||
else if (padMode == "SAME")
|
||||
{
|
||||
int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height);
|
||||
int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
|
||||
// For odd values of total padding, add more padding at the 'right'
|
||||
// side of the given dimension.
|
||||
padT= padB = Ph / 2;
|
||||
padL = padR = Pw / 2;
|
||||
CV_Assert_N(kernel.size() == dilation.size(), kernel.size() == strides.size(),
|
||||
kernel.size() == inp.size(), kernel.size() == out.size());
|
||||
pads_begin.resize(kernel.size());
|
||||
pads_end.resize(kernel.size());
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
int pad = ((out[i] - 1) * strides[i] + dilation[i] * (kernel[i] - 1) + 1 - inp[i]) / 2;
|
||||
pads_begin[i] = pads_end[i] = std::max(0, pad);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,22 +59,20 @@ namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations, cv::String &padMode);
|
||||
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
|
||||
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode);
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, bool &globalPooling,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
||||
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling,
|
||||
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode);
|
||||
|
||||
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
||||
const Size &stride, const String &padMode,
|
||||
const Size &dilation, Size& out);
|
||||
|
||||
|
||||
void getConvPoolPaddings(const Size& inp, const Size& out,
|
||||
const Size &kernel, const Size &stride,
|
||||
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR);
|
||||
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& stride, const String &padMode,
|
||||
const std::vector<size_t>& dilation, std::vector<int>& out);
|
||||
|
||||
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<int>& out,
|
||||
const std::vector<size_t>& kernel, const std::vector<size_t>& strides,
|
||||
const String &padMode, const std::vector<size_t>& dilation,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,7 +92,7 @@ public:
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
return bias == 1;
|
||||
return bias == (int)bias;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM));
|
||||
|
@ -73,6 +73,7 @@ public:
|
||||
computeMaxIdx = true;
|
||||
globalPooling = false;
|
||||
stride = Size(1, 1);
|
||||
pad_t = pad_l = pad_b = pad_r = 0;
|
||||
|
||||
if (params.has("pool") || params.has("kernel_size") ||
|
||||
params.has("kernel_w") || params.has("kernel_h"))
|
||||
@ -87,11 +88,17 @@ public:
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
||||
|
||||
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
|
||||
pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode);
|
||||
getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode);
|
||||
if (kernel_size.size() == 2) {
|
||||
kernel = Size(kernel_size[1], kernel_size[0]);
|
||||
stride = Size(strides[1], strides[0]);
|
||||
pad = Size(pads_begin[1], pads_begin[0]);
|
||||
|
||||
pad.width = pad_l;
|
||||
pad.height = pad_t;
|
||||
pad_t = pads_begin[0];
|
||||
pad_l = pads_begin[1];
|
||||
pad_b = pads_end[0];
|
||||
pad_r = pads_end[1];
|
||||
}
|
||||
}
|
||||
else if (params.has("pooled_w") || params.has("pooled_h"))
|
||||
{
|
||||
@ -126,17 +133,24 @@ public:
|
||||
|
||||
CV_Assert(!inputs.empty());
|
||||
|
||||
cv::Size inp(inputs[0].size[3], inputs[0].size[2]),
|
||||
out(outputs[0].size[3], outputs[0].size[2]);
|
||||
|
||||
if(globalPooling)
|
||||
{
|
||||
kernel = inp;
|
||||
std::vector<int> inp;
|
||||
std::vector<int> out;
|
||||
for (int i = 2; i < inputs[0].dims; i++) {
|
||||
inp.push_back(inputs[0].size[i]);
|
||||
out.push_back(outputs[0].size[i]);
|
||||
}
|
||||
if (globalPooling) {
|
||||
kernel = Size(inp[1], inp[0]);
|
||||
kernel_size = std::vector<size_t>(inp.begin(), inp.end());
|
||||
}
|
||||
|
||||
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r);
|
||||
pad.width = pad_l;
|
||||
pad.height = pad_t;
|
||||
getConvPoolPaddings(inp, out, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), pads_begin, pads_end);
|
||||
if (pads_begin.size() == 2) {
|
||||
pad_t = pads_begin[0];
|
||||
pad_l = pads_begin[1];
|
||||
pad_b = pads_end[0];
|
||||
pad_r = pads_end[1];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
poolOp.release();
|
||||
@ -149,6 +163,8 @@ public:
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (kernel_size.size() == 3)
|
||||
return preferableTarget == DNN_TARGET_CPU;
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD) {
|
||||
if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) {
|
||||
return !isMyriadX();
|
||||
@ -162,12 +178,16 @@ public:
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!kernel_size.empty() && kernel_size.size() != 2) // TODO Support Pooling3D
|
||||
return false;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() &&
|
||||
(type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))) ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() &&
|
||||
(type == MAX || type == AVE));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
|
||||
@ -307,10 +327,12 @@ public:
|
||||
if (type == MAX || type == AVE)
|
||||
{
|
||||
InferenceEngine::Builder::PoolingLayer ieLayer(name);
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r});
|
||||
|
||||
ieLayer.setKernel(kernel_size);
|
||||
ieLayer.setStrides(strides);
|
||||
ieLayer.setPaddingsBegin(pads_begin);
|
||||
ieLayer.setPaddingsEnd(pads_end);
|
||||
|
||||
ieLayer.setPoolingType(type == MAX ?
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
|
||||
@ -955,59 +977,56 @@ public:
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() != 0);
|
||||
Size in(inputs[0][3], inputs[0][2]), out;
|
||||
|
||||
std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
|
||||
std::vector<int> outShape(inputs[0].begin(), inputs[0].begin() + 2);
|
||||
|
||||
if (globalPooling)
|
||||
{
|
||||
out.height = 1;
|
||||
out.width = 1;
|
||||
outShape.push_back(1);
|
||||
outShape.push_back(1);
|
||||
}
|
||||
else if (type == ROI || type == PSROI)
|
||||
{
|
||||
out.height = pooledSize.height;
|
||||
out.width = pooledSize.width;
|
||||
outShape.push_back(pooledSize.height);
|
||||
outShape.push_back(pooledSize.width);
|
||||
}
|
||||
else if (padMode.empty())
|
||||
{
|
||||
float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height;
|
||||
float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width;
|
||||
out.height = 1 + (ceilMode ? ceil(height) : floor(height));
|
||||
out.width = 1 + (ceilMode ? ceil(width) : floor(width));
|
||||
for (int i = 0; i < kernel_size.size(); i++) {
|
||||
float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i];
|
||||
outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
|
||||
}
|
||||
|
||||
if (pad_r || pad_b)
|
||||
{
|
||||
// If we have padding, ensure that the last pooling starts strictly
|
||||
// inside the image (instead of at the padding); otherwise clip the last.
|
||||
if ((out.height - 1) * stride.height >= in.height + pad_b)
|
||||
--out.height;
|
||||
if ((out.width - 1) * stride.width >= in.width + pad_r)
|
||||
--out.width;
|
||||
CV_Assert((out.height - 1) * stride.height < in.height + pad_b);
|
||||
CV_Assert((out.width - 1) * stride.width < in.width + pad_r);
|
||||
for (int i = 0; i < pads_end.size(); i++) {
|
||||
if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
|
||||
--outShape[2 + i];
|
||||
CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
|
||||
getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), outShape);
|
||||
}
|
||||
|
||||
int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};
|
||||
if (type == ROI)
|
||||
{
|
||||
CV_Assert(inputs.size() == 2);
|
||||
dims[0] = inputs[1][0]; // Number of proposals;
|
||||
outShape[0] = inputs[1][0]; // Number of proposals;
|
||||
}
|
||||
else if (type == PSROI)
|
||||
{
|
||||
CV_Assert(inputs.size() == 2);
|
||||
CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]);
|
||||
dims[0] = inputs[1][0]; // Number of proposals;
|
||||
dims[1] = psRoiOutChannels;
|
||||
outShape[0] = inputs[1][0]; // Number of proposals;
|
||||
outShape[1] = psRoiOutChannels;
|
||||
}
|
||||
|
||||
int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
|
||||
CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));
|
||||
outputs.assign(numOutputs, shape(dims, 4));
|
||||
|
||||
outputs.assign(numOutputs, outShape);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -184,6 +184,12 @@ std::map<std::string, Mat> ONNXImporter::getGraphTensors(
|
||||
return layers_weights;
|
||||
}
|
||||
|
||||
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
|
||||
std::vector<int32_t> dst(src.size());
|
||||
convertInt64ToInt32(src, dst, src.size());
|
||||
return DictValue::arrayInt(&dst[0], src.size());
|
||||
}
|
||||
|
||||
LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
|
||||
{
|
||||
LayerParams lp;
|
||||
@ -194,15 +200,13 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
|
||||
|
||||
if(attribute_name == "kernel_shape")
|
||||
{
|
||||
CV_Assert(attribute_proto.ints_size() == 2);
|
||||
lp.set("kernel_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
|
||||
lp.set("kernel_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
|
||||
CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
||||
lp.set("kernel_size", parse(attribute_proto.ints()));
|
||||
}
|
||||
else if(attribute_name == "strides")
|
||||
{
|
||||
CV_Assert(attribute_proto.ints_size() == 2);
|
||||
lp.set("stride_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
|
||||
lp.set("stride_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
|
||||
CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
||||
lp.set("stride", parse(attribute_proto.ints()));
|
||||
}
|
||||
else if(attribute_name == "pads")
|
||||
{
|
||||
@ -225,11 +229,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
|
||||
else
|
||||
{
|
||||
// Convolution or pooling.
|
||||
CV_Assert(attribute_proto.ints_size() == 4);
|
||||
lp.set("pad_t", saturate_cast<int32_t>(attribute_proto.ints(0)));
|
||||
lp.set("pad_l", saturate_cast<int32_t>(attribute_proto.ints(1)));
|
||||
lp.set("pad_b", saturate_cast<int32_t>(attribute_proto.ints(2)));
|
||||
lp.set("pad_r", saturate_cast<int32_t>(attribute_proto.ints(3)));
|
||||
CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
|
||||
lp.set("pad", parse(attribute_proto.ints()));
|
||||
}
|
||||
}
|
||||
else if(attribute_name == "auto_pad")
|
||||
@ -243,9 +244,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
|
||||
}
|
||||
else if(attribute_name == "dilations")
|
||||
{
|
||||
CV_Assert(attribute_proto.ints_size() == 2);
|
||||
lp.set("dilation_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
|
||||
lp.set("dilation_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
|
||||
CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
||||
lp.set("dilation", parse(attribute_proto.ints()));
|
||||
}
|
||||
else if (attribute_proto.has_i())
|
||||
{
|
||||
@ -270,10 +270,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
|
||||
}
|
||||
else if (attribute_proto.ints_size() > 0)
|
||||
{
|
||||
const ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = attribute_proto.ints();
|
||||
std::vector<int32_t> dst(attribute_proto.ints_size());
|
||||
convertInt64ToInt32(src, dst, attribute_proto.ints_size());
|
||||
lp.set(attribute_proto.name(), DictValue::arrayInt(&dst[0], attribute_proto.ints_size()));
|
||||
lp.set(attribute_proto.name(), parse(attribute_proto.ints()));
|
||||
}
|
||||
else if (attribute_proto.has_t())
|
||||
{
|
||||
@ -305,19 +302,6 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto,
|
||||
return constBlob->second;
|
||||
}
|
||||
|
||||
|
||||
bool ONNXImporter::isCeilMode(const LayerParams& layerParams) {
|
||||
if (!layerParams.has("pad_mode")) {
|
||||
if (layerParams.has("pad_h")) {
|
||||
return layerParams.get<int>("pad_h") != layerParams.get<int>("pad_b") ||
|
||||
layerParams.get<int>("pad_w") != layerParams.get<int>("pad_r");
|
||||
}
|
||||
else
|
||||
return false; // all pads == 0
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ONNXImporter::populateNet(Net dstNet)
|
||||
{
|
||||
CV_Assert(model_proto.has_graph());
|
||||
@ -384,13 +368,13 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
{
|
||||
layerParams.type = "Pooling";
|
||||
layerParams.set("pool", "MAX");
|
||||
layerParams.set("ceil_mode", isCeilMode(layerParams));
|
||||
layerParams.set("ceil_mode", layerParams.has("pad_mode"));
|
||||
}
|
||||
else if (layer_type == "AveragePool")
|
||||
{
|
||||
layerParams.type = "Pooling";
|
||||
layerParams.set("pool", "AVE");
|
||||
layerParams.set("ceil_mode", isCeilMode(layerParams));
|
||||
layerParams.set("ceil_mode", layerParams.has("pad_mode"));
|
||||
layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
|
||||
}
|
||||
else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool")
|
||||
@ -600,8 +584,9 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
if (outShape.size() != 4)
|
||||
CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements.");
|
||||
|
||||
const int strideY = layerParams.get<int>("stride_h", 1);
|
||||
const int strideX = layerParams.get<int>("stride_w", 1);
|
||||
DictValue stride = layerParams.get("stride");
|
||||
const int strideY = stride.getIntValue(0);
|
||||
const int strideX = stride.getIntValue(1);
|
||||
const int outH = outShape.getIntValue(2);
|
||||
const int outW = outShape.getIntValue(3);
|
||||
|
||||
@ -612,17 +597,23 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
}
|
||||
else if (layerParams.get<String>("pad_mode") == "VALID")
|
||||
{
|
||||
if (!layerParams.has("kernel_h") || !layerParams.has("kernel_w"))
|
||||
if (!layerParams.has("kernel_size"))
|
||||
CV_Error(Error::StsNotImplemented,
|
||||
"Required attributes 'kernel_h' and 'kernel_w' are not present.");
|
||||
"Required attribute 'kernel_size' is not present.");
|
||||
|
||||
int kernelH = layerParams.get<int>("kernel_h");
|
||||
int kernelW = layerParams.get<int>("kernel_w");
|
||||
|
||||
layerParams.set("adj_w", (outW - kernelW) % strideX);
|
||||
layerParams.set("adj_h", (outH - kernelH) % strideY);
|
||||
DictValue kernel = layerParams.get("kernel_size");
|
||||
layerParams.set("adj_h", (outH - kernel.getIntValue(0)) % strideY);
|
||||
layerParams.set("adj_w", (outW - kernel.getIntValue(1)) % strideX);
|
||||
}
|
||||
}
|
||||
else if (layerParams.has("output_padding"))
|
||||
{
|
||||
const DictValue& adj_pad = layerParams.get("output_padding");
|
||||
if (adj_pad.size() != 2)
|
||||
CV_Error(Error::StsNotImplemented, "Deconvolution3D layer is not supported");
|
||||
layerParams.set("adj_w", adj_pad.get<int>(1));
|
||||
layerParams.set("adj_h", adj_pad.get<int>(0));
|
||||
}
|
||||
}
|
||||
else if (layer_type == "Transpose")
|
||||
{
|
||||
|
@ -901,7 +901,7 @@ void InfEngineBackendNet::forward(const std::vector<Ptr<BackendWrapper> >& outBl
|
||||
InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req;
|
||||
infRequestPtr->SetUserData(reqWrapper.get(), 0);
|
||||
|
||||
infRequestPtr->SetCompletionCallback({
|
||||
infRequestPtr->SetCompletionCallback(
|
||||
[](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status)
|
||||
{
|
||||
InfEngineReqWrapper* wrapper;
|
||||
@ -927,7 +927,7 @@ void InfEngineBackendNet::forward(const std::vector<Ptr<BackendWrapper> >& outBl
|
||||
}
|
||||
wrapper->isReady = true;
|
||||
}
|
||||
});
|
||||
);
|
||||
}
|
||||
if (isAsync)
|
||||
{
|
||||
|
@ -79,7 +79,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static const tensorflow::NodeDef& getInputNode(const tensorflow::GraphDef& net,
|
||||
static int getInputNodeId(const tensorflow::GraphDef& net,
|
||||
const tensorflow::NodeDef& node,
|
||||
int inpId)
|
||||
{
|
||||
@ -92,7 +92,7 @@ public:
|
||||
for (int i = 0; i < numNodes; ++i)
|
||||
{
|
||||
if (net.node(i).name() == name)
|
||||
return net.node(i);
|
||||
return i;
|
||||
}
|
||||
CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
|
||||
}
|
||||
@ -104,36 +104,46 @@ public:
|
||||
matchedNodesIds.clear();
|
||||
matchedNodesIds.reserve(nodesToFuse.size());
|
||||
|
||||
int numNodes = net.node_size();
|
||||
for (int i = 0; i < nodesToFuse.size(); ++i)
|
||||
std::queue<int> nodesToMatch;
|
||||
std::queue<int> targetNodes;
|
||||
nodesToMatch.push(nodeId);
|
||||
targetNodes.push(nodesToFuse.back());
|
||||
while (!nodesToMatch.empty())
|
||||
{
|
||||
while (nodeId < numNodes && net.node(nodeId).op() == "Const")
|
||||
{
|
||||
nodeId += 1;
|
||||
}
|
||||
if (nodeId > numNodes - 1)
|
||||
int nodeToMatch = nodesToMatch.front();
|
||||
int targetNodeId = targetNodes.front();
|
||||
nodesToMatch.pop();
|
||||
targetNodes.pop();
|
||||
|
||||
if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) !=
|
||||
matchedNodesIds.end())
|
||||
continue;
|
||||
|
||||
const tensorflow::NodeDef& node = net.node(nodeToMatch);
|
||||
if (node.op() != nodes[targetNodeId])
|
||||
return false;
|
||||
|
||||
const tensorflow::NodeDef& node = net.node(nodeId);
|
||||
|
||||
if (node.op() != nodes[nodesToFuse[i]])
|
||||
return false;
|
||||
|
||||
std::vector<int>& inputNodes = inputs[nodesToFuse[i]];
|
||||
std::vector<int>& inputNodes = inputs[targetNodeId];
|
||||
if (inputNodes.size() != node.input_size())
|
||||
return false;
|
||||
|
||||
for (int j = 0; j < inputNodes.size(); ++j)
|
||||
{
|
||||
if (nodes[inputNodes[j]].empty()) // Unknown input node type.
|
||||
continue;
|
||||
const tensorflow::NodeDef& inpNode = getInputNode(net, node, j);
|
||||
if (inpNode.op() != nodes[inputNodes[j]])
|
||||
nodeId = getInputNodeId(net, node, j);
|
||||
const tensorflow::NodeDef& inpNode = net.node(nodeId);
|
||||
if (inpNode.op() != "Const")
|
||||
{
|
||||
nodesToMatch.push(nodeId);
|
||||
targetNodes.push(inputNodes[j]);
|
||||
}
|
||||
else if (nodes[inputNodes[j]] != "Const")
|
||||
return false;
|
||||
}
|
||||
|
||||
matchedNodesIds.push_back(nodeId);
|
||||
nodeId += 1;
|
||||
matchedNodesIds.push_back(nodeToMatch);
|
||||
}
|
||||
std::sort(matchedNodesIds.begin(), matchedNodesIds.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -181,7 +191,7 @@ public:
|
||||
std::vector<tensorflow::NodeDef*> inputNodes(inputsNames.size());
|
||||
for (int i = 0; i < inputsNames.size(); ++i)
|
||||
{
|
||||
inputNodes[i] = (tensorflow::NodeDef*)&getInputNode(net, *node, i);
|
||||
inputNodes[i] = net.mutable_node(getInputNodeId(net, *node, i));
|
||||
}
|
||||
finalize(net, node, inputNodes);
|
||||
}
|
||||
@ -354,7 +364,7 @@ public:
|
||||
{
|
||||
if (!Subgraph::match(net, nodeId, matchedNodesIds))
|
||||
return false;
|
||||
Mat maxValue = getTensorContent(net.node(nodeId + 1).attr().at("value").tensor());
|
||||
Mat maxValue = getTensorContent(net.node(matchedNodesIds.front() + 1).attr().at("value").tensor());
|
||||
return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at<float>(0) == 6;
|
||||
}
|
||||
};
|
||||
@ -384,6 +394,17 @@ public:
|
||||
setFusedNode("Reshape", ids);
|
||||
}
|
||||
|
||||
virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector<int>& matchedNodesIds) CV_OVERRIDE
|
||||
{
|
||||
const tensorflow::NodeDef& node = net.node(nodeId);
|
||||
if (node.input_size() == 0)
|
||||
return false;
|
||||
|
||||
inpName = node.input(0);
|
||||
return Subgraph::match(net, nodeId, matchedNodesIds);
|
||||
}
|
||||
|
||||
|
||||
virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
|
||||
std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
|
||||
{
|
||||
@ -395,6 +416,7 @@ public:
|
||||
}
|
||||
tensorflow::TensorProto* shapeTensor = inputNodes[1]->mutable_attr()->at("value").mutable_tensor();
|
||||
fusedNode->mutable_input()->DeleteSubrange(2, numOutDims - 1);
|
||||
fusedNode->set_input(0, inpName);
|
||||
|
||||
shapeTensor->clear_int_val();
|
||||
for (int i = 0; i < shape.size(); ++i)
|
||||
@ -405,6 +427,7 @@ public:
|
||||
|
||||
private:
|
||||
int numOutDims;
|
||||
std::string inpName;
|
||||
};
|
||||
|
||||
class L2NormalizeSubgraph : public Subgraph
|
||||
@ -685,9 +708,9 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
|
||||
subgraphs.push_back(Ptr<Subgraph>(new DeconvolutionSameKerasSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new UpsamplingKerasSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new SoftMaxSlimSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new SoftMaxSlimV2Subgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
|
||||
|
||||
int numNodes = net.node_size();
|
||||
std::vector<int> matchedNodesIds;
|
||||
|
@ -51,6 +51,7 @@ enum DataLayout
|
||||
{
|
||||
DATA_LAYOUT_NHWC,
|
||||
DATA_LAYOUT_NCHW,
|
||||
DATA_LAYOUT_NDHWC,
|
||||
DATA_LAYOUT_UNKNOWN,
|
||||
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
|
||||
};
|
||||
@ -258,6 +259,8 @@ static int getDataLayout(const tensorflow::NodeDef& layer)
|
||||
return DATA_LAYOUT_NHWC;
|
||||
else if (format == "NCHW" || format == "channels_first")
|
||||
return DATA_LAYOUT_NCHW;
|
||||
else if (format == "NDHWC")
|
||||
return DATA_LAYOUT_NDHWC;
|
||||
else
|
||||
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
|
||||
}
|
||||
@ -281,22 +284,35 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
||||
if (hasLayerAttr(layer, "strides"))
|
||||
{
|
||||
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
|
||||
int dimX, dimY, dimC;
|
||||
int dimX, dimY, dimC, dimD;
|
||||
int layout = getDataLayout(layer);
|
||||
if (layout == DATA_LAYOUT_NCHW)
|
||||
{
|
||||
dimC = 1; dimY = 2; dimX = 3;
|
||||
}
|
||||
else if (layout == DATA_LAYOUT_NDHWC)
|
||||
{
|
||||
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
dimY = 1; dimX = 2; dimC = 3;
|
||||
}
|
||||
if (val.list().i_size() != 4 ||
|
||||
if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
|
||||
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
||||
CV_Error(Error::StsError, "Unsupported strides");
|
||||
if (layout == DATA_LAYOUT_NDHWC) {
|
||||
int strides[] = {static_cast<int>(val.list().i(dimD)),
|
||||
static_cast<int>(val.list().i(dimY)),
|
||||
static_cast<int>(val.list().i(dimX))};
|
||||
layerParams.set("stride", DictValue::arrayInt(strides, 3));
|
||||
}
|
||||
else
|
||||
{
|
||||
layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
|
||||
layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DictValue parseDims(const tensorflow::TensorProto &tensor) {
|
||||
@ -318,22 +334,36 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
||||
if (hasLayerAttr(layer, "ksize"))
|
||||
{
|
||||
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
|
||||
int dimX, dimY, dimC;
|
||||
int dimX, dimY, dimC, dimD;
|
||||
int layout = getDataLayout(layer);
|
||||
if (layout == DATA_LAYOUT_NCHW)
|
||||
{
|
||||
dimC = 1; dimY = 2; dimX = 3;
|
||||
}
|
||||
else if (layout == DATA_LAYOUT_NDHWC)
|
||||
{
|
||||
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
dimY = 1; dimX = 2; dimC = 3;
|
||||
}
|
||||
if (val.list().i_size() != 4 ||
|
||||
if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
|
||||
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
||||
CV_Error(Error::StsError, "Unsupported ksize");
|
||||
|
||||
if (layout == DATA_LAYOUT_NDHWC) {
|
||||
int kernel[] = {static_cast<int>(val.list().i(dimD)),
|
||||
static_cast<int>(val.list().i(dimY)),
|
||||
static_cast<int>(val.list().i(dimX))};
|
||||
layerParams.set("kernel_size", DictValue::arrayInt(kernel, 3));
|
||||
}
|
||||
else
|
||||
{
|
||||
layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
|
||||
layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
layerParams.set("kernel_h", 1);
|
||||
@ -456,12 +486,26 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
|
||||
// TODO: other blob types
|
||||
CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
|
||||
tensor.dtype() == tensorflow::DT_HALF);
|
||||
CV_Assert(dims == 4);
|
||||
CV_Assert(dims == 4 || dims == 5);
|
||||
|
||||
int out_c, input_c, depth, height, width;
|
||||
if (dims == 4)
|
||||
{
|
||||
// REORDER kernel HWIO to OIHW
|
||||
swap(shape[0], shape[2]); // IWHO
|
||||
swap(shape[1], shape[3]); // IOHW
|
||||
swap(shape[0], shape[1]); // OIHW
|
||||
depth = 1; height = shape[2]; width = shape[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
// REORDER kernel DHWIO to OIDHW
|
||||
swap(shape[0], shape[4]); // OHWID
|
||||
swap(shape[1], shape[3]); // OIWHD
|
||||
swap(shape[2], shape[4]); // OIDHW
|
||||
depth = shape[2]; height = shape[3]; width = shape[4];
|
||||
}
|
||||
out_c = shape[0]; input_c = shape[1];
|
||||
|
||||
dstBlob.create(shape, CV_32F);
|
||||
|
||||
@ -472,14 +516,16 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
|
||||
float *dstData = dstBlob.ptr<float>();
|
||||
const float *data = reinterpret_cast<const float*>(tensorContent.data);
|
||||
|
||||
int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
|
||||
int total = out_c*input_c*height*width;
|
||||
for(int i_oc = 0; i_oc < out_c; i_oc++) {
|
||||
for(int i_ic = 0; i_ic < input_c; i_ic++) {
|
||||
for(int i_h = 0; i_h < height; i_h++) {
|
||||
for(int i_w = 0; i_w < width; i_w++) {
|
||||
int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
|
||||
int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
|
||||
int total = out_c * input_c * depth * height * width;
|
||||
for (int i_oc = 0; i_oc < out_c; i_oc++) {
|
||||
for (int i_ic = 0; i_ic < input_c; i_ic++) {
|
||||
for (int i_d = 0; i_d < depth; i_d++) {
|
||||
for (int i_h = 0; i_h < height; i_h++) {
|
||||
for (int i_w = 0; i_w < width; i_w++) {
|
||||
int dst_i = input_c * depth * height * width * i_oc +
|
||||
depth * height * width * i_ic + height * width * i_d + width * i_h + i_w;
|
||||
int src_i = out_c * input_c * width * height * i_d +
|
||||
out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc;
|
||||
CV_Assert(dst_i < total);
|
||||
CV_Assert(src_i < total);
|
||||
dstData[dst_i] = data[src_i];
|
||||
@ -487,6 +533,7 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
|
||||
@ -745,7 +792,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
|
||||
data_layouts[name] = predictedLayout;
|
||||
|
||||
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad")
|
||||
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "Conv3D")
|
||||
{
|
||||
// The first node of dilated convolution subgraph.
|
||||
// Extract input node, dilation rate and paddings.
|
||||
@ -917,9 +964,9 @@ void TFImporter::populateNet(Net dstNet)
|
||||
{
|
||||
layerParams.blobs[0] = sharedWeightsIt->second;
|
||||
}
|
||||
Mat weights = layerParams.blobs[0];
|
||||
layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2));
|
||||
|
||||
layerParams.set("kernel_h", layerParams.blobs[0].size[2]);
|
||||
layerParams.set("kernel_w", layerParams.blobs[0].size[3]);
|
||||
layerParams.set("num_output", layerParams.blobs[0].size[0]);
|
||||
|
||||
setStrides(layerParams, layer);
|
||||
@ -1079,7 +1126,15 @@ void TFImporter::populateNet(Net dstNet)
|
||||
{
|
||||
Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
|
||||
|
||||
if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)
|
||||
if (inpLayout == DATA_LAYOUT_NHWC)
|
||||
{
|
||||
if (newShape.total() == 4)
|
||||
{
|
||||
// NHWC->NCHW
|
||||
std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
|
||||
std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
|
||||
}
|
||||
if (newShape.total() != 4 || newShape.at<int>(1) == 1)
|
||||
{
|
||||
LayerParams permLP;
|
||||
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
||||
@ -1093,11 +1148,6 @@ void TFImporter::populateNet(Net dstNet)
|
||||
inpId = Pin(permName);
|
||||
inpLayout = DATA_LAYOUT_NCHW;
|
||||
}
|
||||
else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)
|
||||
{
|
||||
// NHWC->NCHW
|
||||
std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
|
||||
std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
|
||||
}
|
||||
layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
|
||||
|
||||
@ -1290,7 +1340,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
connect(layer_id, dstNet, inp, id, ii - from);
|
||||
}
|
||||
}
|
||||
else if (type == "MaxPool")
|
||||
else if (type == "MaxPool" || type == "MaxPool3D")
|
||||
{
|
||||
layerParams.set("pool", "max");
|
||||
|
||||
@ -1303,11 +1353,10 @@ void TFImporter::populateNet(Net dstNet)
|
||||
|
||||
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
|
||||
}
|
||||
else if (type == "AvgPool")
|
||||
else if (type == "AvgPool" || type == "AvgPool3D")
|
||||
{
|
||||
layerParams.set("pool", "ave");
|
||||
layerParams.set("ave_pool_padded_area", false);
|
||||
|
||||
setKSize(layerParams, layer);
|
||||
setStrides(layerParams, layer);
|
||||
setPadding(layerParams, layer);
|
||||
@ -1335,7 +1384,9 @@ void TFImporter::populateNet(Net dstNet)
|
||||
// num_split
|
||||
// 1st blob is dims tensor
|
||||
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
|
||||
layerParams.set("axis", toNCHW(axis));
|
||||
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
||||
axis = toNCHW(axis);
|
||||
layerParams.set("axis", axis);
|
||||
|
||||
int id = dstNet.addLayer(name, "Slice", layerParams);
|
||||
layer_id[name] = id;
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
netDefault.setInput(inp);
|
||||
Mat outDefault = netDefault.forward(outputLayer).clone();
|
||||
|
||||
Net net = readNet(weights, proto);
|
||||
net = readNet(weights, proto);
|
||||
net.setInput(inp);
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
@ -94,6 +94,8 @@ public:
|
||||
else
|
||||
normAssert(ref, out, msg, l1, lInf);
|
||||
}
|
||||
|
||||
Net net;
|
||||
};
|
||||
|
||||
TEST_P(DNNTestNetwork, AlexNet)
|
||||
@ -103,6 +105,7 @@ TEST_P(DNNTestNetwork, AlexNet)
|
||||
Size(227, 227), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" :
|
||||
"dnn/halide_scheduler_alexnet.yml");
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, ResNet_50)
|
||||
@ -112,6 +115,7 @@ TEST_P(DNNTestNetwork, ResNet_50)
|
||||
Size(224, 224), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" :
|
||||
"dnn/halide_scheduler_resnet_50.yml");
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
@ -120,6 +124,7 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
Size(227, 227), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" :
|
||||
"dnn/halide_scheduler_squeezenet_v1_1.yml");
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, GoogLeNet)
|
||||
@ -127,6 +132,7 @@ TEST_P(DNNTestNetwork, GoogLeNet)
|
||||
applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
|
||||
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
|
||||
Size(224, 224), "prob");
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, Inception_5h)
|
||||
@ -142,6 +148,7 @@ TEST_P(DNNTestNetwork, Inception_5h)
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_inception_5h.yml" :
|
||||
"dnn/halide_scheduler_inception_5h.yml",
|
||||
l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, ENet)
|
||||
@ -168,6 +175,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : 0.0;
|
||||
processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
|
||||
inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height)
|
||||
@ -185,7 +193,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height)
|
||||
float diffSquares = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : 0.0;
|
||||
processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
|
||||
inp, "detection_out", "", diffScores, diffSquares);
|
||||
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
||||
@ -200,6 +208,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
||||
float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.216 : 0.2;
|
||||
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf, detectionConfThresh);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height)
|
||||
@ -217,6 +226,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height)
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
|
||||
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
@ -230,6 +240,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
|
||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf, 0.25);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, SSD_VGG16)
|
||||
@ -244,6 +255,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
|
||||
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
|
||||
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
|
||||
"dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreThreshold, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_coco)
|
||||
@ -262,6 +274,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_coco)
|
||||
const float lInf = (target == DNN_TARGET_MYRIAD) ? 0.072 : 0.0;
|
||||
processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt",
|
||||
Size(46, 46), "", "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_mpi)
|
||||
@ -280,6 +293,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi)
|
||||
const float lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.16 : 0.0;
|
||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt",
|
||||
Size(46, 46), "", "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||
@ -296,6 +310,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
|
||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt",
|
||||
Size(46, 46));
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenFace)
|
||||
@ -324,6 +339,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
|
||||
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
|
||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
|
||||
inp, "detection_out");
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
@ -342,6 +358,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0;
|
||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, DenseNet_121)
|
||||
@ -360,6 +377,7 @@ TEST_P(DNNTestNetwork, DenseNet_121)
|
||||
l1 = 0.1; lInf = 0.6;
|
||||
}
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
|
@ -207,60 +207,72 @@ TEST(Reproducibility_SSD, Accuracy)
|
||||
normAssertDetections(ref, out);
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<Target> Reproducibility_MobileNet_SSD;
|
||||
typedef testing::TestWithParam<tuple<Backend, Target> > Reproducibility_MobileNet_SSD;
|
||||
TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
|
||||
{
|
||||
const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false);
|
||||
const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false);
|
||||
Net net = readNetFromCaffe(proto, model);
|
||||
int targetId = GetParam();
|
||||
const float l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 1.5e-4 : 1e-5;
|
||||
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-4;
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
Mat sample = imread(_tf("street.png"));
|
||||
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
net.setInput(inp);
|
||||
Mat out = net.forward();
|
||||
Mat out = net.forward().clone();
|
||||
|
||||
const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-5;
|
||||
const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-3 : 1e-4;
|
||||
const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5;
|
||||
const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4;
|
||||
Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
|
||||
normAssertDetections(ref, out, "", 0.0, scores_diff, boxes_iou_diff);
|
||||
normAssertDetections(ref, out, "", FLT_MIN, scores_diff, boxes_iou_diff);
|
||||
|
||||
// Check that detections aren't preserved.
|
||||
inp.setTo(0.0f);
|
||||
net.setInput(inp);
|
||||
out = net.forward();
|
||||
out = out.reshape(1, out.total() / 7);
|
||||
Mat zerosOut = net.forward();
|
||||
zerosOut = zerosOut.reshape(1, zerosOut.total() / 7);
|
||||
|
||||
const int numDetections = out.rows;
|
||||
const int numDetections = zerosOut.rows;
|
||||
ASSERT_NE(numDetections, 0);
|
||||
for (int i = 0; i < numDetections; ++i)
|
||||
{
|
||||
float confidence = out.ptr<float>(i)[2];
|
||||
float confidence = zerosOut.ptr<float>(i)[2];
|
||||
ASSERT_EQ(confidence, 0);
|
||||
}
|
||||
|
||||
// There is something wrong with Reshape layer in Myriad plugin and
|
||||
// regression with DLIE/OCL_FP16 target.
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
if ((targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2) ||
|
||||
targetId == DNN_TARGET_OPENCL_FP16)
|
||||
return;
|
||||
}
|
||||
|
||||
// Check batching mode.
|
||||
ref = ref.reshape(1, numDetections);
|
||||
inp = blobFromImages(std::vector<Mat>(2, sample), 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
net.setInput(inp);
|
||||
Mat outBatch = net.forward();
|
||||
|
||||
// Output blob has a shape 1x1x2Nx7 where N is a number of detection for
|
||||
// a single sample in batch. The first numbers of detection vectors are batch id.
|
||||
outBatch = outBatch.reshape(1, outBatch.total() / 7);
|
||||
EXPECT_EQ(outBatch.rows, 2 * numDetections);
|
||||
normAssert(outBatch.rowRange(0, numDetections), ref, "", l1, lInf);
|
||||
normAssert(outBatch.rowRange(numDetections, 2 * numDetections).colRange(1, 7), ref.colRange(1, 7),
|
||||
"", l1, lInf);
|
||||
// For Inference Engine backend there is -1 delimiter which points the end of detections.
|
||||
const int numRealDetections = ref.size[2];
|
||||
EXPECT_EQ(outBatch.size[2], 2 * numDetections);
|
||||
out = out.reshape(1, numDetections).rowRange(0, numRealDetections);
|
||||
outBatch = outBatch.reshape(1, 2 * numDetections);
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
Mat pred = outBatch.rowRange(i * numRealDetections, (i + 1) * numRealDetections);
|
||||
EXPECT_EQ(countNonZero(pred.col(0) != i), 0);
|
||||
normAssert(pred.colRange(1, 7), out.colRange(1, 7));
|
||||
}
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD,
|
||||
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, dnnBackendsAndTargets());
|
||||
|
||||
typedef testing::TestWithParam<Target> Reproducibility_ResNet50;
|
||||
TEST_P(Reproducibility_ResNet50, Accuracy)
|
||||
@ -405,6 +417,7 @@ TEST_P(Test_Caffe_nets, Colorization)
|
||||
l1 = 0.6; lInf = 15;
|
||||
}
|
||||
normAssert(out, ref, "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_Caffe_nets, DenseNet_121)
|
||||
@ -436,6 +449,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
|
||||
l1 = 0.11; lInf = 0.5;
|
||||
}
|
||||
normAssert(out, ref, "", l1, lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST(Test_Caffe, multiple_inputs)
|
||||
|
@ -111,6 +111,31 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void expectNoFallbacks(Net& net)
|
||||
{
|
||||
// Check if all the layers are supported with current backend and target.
|
||||
// Some layers might be fused so their timings equal to zero.
|
||||
std::vector<double> timings;
|
||||
net.getPerfProfile(timings);
|
||||
std::vector<String> names = net.getLayerNames();
|
||||
CV_Assert(names.size() == timings.size());
|
||||
|
||||
for (int i = 0; i < names.size(); ++i)
|
||||
{
|
||||
Ptr<dnn::Layer> l = net.getLayer(net.getLayerId(names[i]));
|
||||
bool fused = !timings[i];
|
||||
if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
|
||||
CV_Error(Error::StsNotImplemented, "Layer [" + l->name + "] of type [" +
|
||||
l->type + "] is expected to has backend implementation");
|
||||
}
|
||||
}
|
||||
|
||||
void expectNoFallbacksFromIE(Net& net)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
expectNoFallbacks(net);
|
||||
}
|
||||
|
||||
protected:
|
||||
void checkBackend(Mat* inp = 0, Mat* ref = 0)
|
||||
{
|
||||
|
@ -28,7 +28,8 @@ public:
|
||||
};
|
||||
|
||||
void testONNXModels(const String& basename, const Extension ext = npy,
|
||||
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false)
|
||||
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
|
||||
bool checkNoFallbacks = true)
|
||||
{
|
||||
String onnxmodel = _tf("models/" + basename + ".onnx");
|
||||
Mat inp, ref;
|
||||
@ -67,6 +68,8 @@ public:
|
||||
ref = netSoftmax.forward();
|
||||
}
|
||||
normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
|
||||
if (checkNoFallbacks)
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
};
|
||||
|
||||
@ -81,6 +84,13 @@ TEST_P(Test_ONNX_layers, Convolution)
|
||||
testONNXModels("convolution");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Convolution3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
testONNXModels("conv3d");
|
||||
testONNXModels("conv3d_bias");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Two_convolution)
|
||||
{
|
||||
@ -96,10 +106,11 @@ TEST_P(Test_ONNX_layers, Two_convolution)
|
||||
|
||||
TEST_P(Test_ONNX_layers, Deconvolution)
|
||||
{
|
||||
testONNXModels("deconvolution");
|
||||
testONNXModels("two_deconvolution");
|
||||
testONNXModels("deconvolution_group");
|
||||
testONNXModels("deconvolution_output_shape");
|
||||
testONNXModels("deconvolution", npy, 0, 0, false, false);
|
||||
testONNXModels("two_deconvolution", npy, 0, 0, false, false);
|
||||
testONNXModels("deconvolution_group", npy, 0, 0, false, false);
|
||||
testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
|
||||
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Dropout)
|
||||
@ -137,6 +148,20 @@ TEST_P(Test_ONNX_layers, AveragePooling)
|
||||
testONNXModels("average_pooling");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, MaxPooling3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
testONNXModels("max_pool3d");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, AvePooling3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
testONNXModels("ave_pool3d");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, BatchNormalization)
|
||||
{
|
||||
testONNXModels("batch_norm");
|
||||
@ -198,6 +223,7 @@ TEST_P(Test_ONNX_layers, MultyInputs)
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(ref, out, "", default_l1, default_lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DynamicReshape)
|
||||
@ -235,6 +261,7 @@ TEST_P(Test_ONNX_nets, Alexnet)
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(out, ref, "", default_l1, default_lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, Squeezenet)
|
||||
@ -267,6 +294,7 @@ TEST_P(Test_ONNX_nets, Googlenet)
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(ref, out, "", default_l1, default_lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, CaffeNet)
|
||||
|
@ -131,6 +131,13 @@ TEST_P(Test_TensorFlow_layers, conv)
|
||||
runTensorFlowNet("conv_pool_nchw");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, Convolution3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
runTensorFlowNet("conv3d");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, padding)
|
||||
{
|
||||
runTensorFlowNet("padding_valid");
|
||||
@ -212,6 +219,20 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same)
|
||||
runTensorFlowNet("ave_pool_same");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, MaxPooling3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
runTensorFlowNet("max_pool3d");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, AvePooling3D)
|
||||
{
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only DLIE backend on CPU is supported");
|
||||
runTensorFlowNet("ave_pool3d");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, deconvolution)
|
||||
{
|
||||
runTensorFlowNet("deconvolution");
|
||||
@ -335,6 +356,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0043 : default_l1;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.037 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.2, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
|
||||
@ -372,6 +394,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
|
||||
@ -402,6 +425,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
|
||||
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1.5e-5;
|
||||
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3;
|
||||
normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
||||
@ -460,6 +484,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.048 : 1.1e-5;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.058 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.45, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
|
||||
@ -489,6 +514,7 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 3.4e-3;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.024 : 1e-2;
|
||||
normAssertDetections(ref, out, "", 0.9, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
|
||||
@ -553,6 +579,7 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection)
|
||||
}
|
||||
normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores);
|
||||
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets());
|
||||
@ -654,6 +681,13 @@ TEST_P(Test_TensorFlow_layers, relu6)
|
||||
runTensorFlowNet("keras_relu6", /*hasText*/ true);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, subpixel)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
throw SkipTestException("");
|
||||
runTensorFlowNet("subpixel");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, keras_mobilenet_head)
|
||||
{
|
||||
runTensorFlowNet("keras_mobilenet_head");
|
||||
|
@ -1355,6 +1355,8 @@ static void GetCircleCenters(const std::vector<int> ¢ers, std::vector<Vec4f>
|
||||
template<typename T>
|
||||
static void RemoveOverlaps(std::vector<T>& circles, float minDist)
|
||||
{
|
||||
if (circles.size() <= 1u)
|
||||
return;
|
||||
float minDist2 = minDist * minDist;
|
||||
size_t endIdx = 1;
|
||||
for (size_t i = 1; i < circles.size(); ++i)
|
||||
|
@ -181,17 +181,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
assert (planes.length == 3);
|
||||
assert (image.getFormat() == mPreviewFormat);
|
||||
|
||||
// see also https://developer.android.com/reference/android/graphics/ImageFormat.html#YUV_420_888
|
||||
// Y plane (0) non-interleaved => stride == 1; U/V plane interleaved => stride == 2
|
||||
assert (planes[0].getPixelStride() == 1);
|
||||
assert (planes[1].getPixelStride() == 2);
|
||||
assert (planes[2].getPixelStride() == 2);
|
||||
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
ByteBuffer uv_plane = planes[1].getBuffer();
|
||||
Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane);
|
||||
Mat uv_mat = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane);
|
||||
JavaCamera2Frame tempFrame = new JavaCamera2Frame(y_mat, uv_mat, w, h);
|
||||
JavaCamera2Frame tempFrame = new JavaCamera2Frame(image);
|
||||
deliverAndDrawFrame(tempFrame);
|
||||
tempFrame.release();
|
||||
image.close();
|
||||
@ -334,50 +324,87 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
private class JavaCamera2Frame implements CvCameraViewFrame {
|
||||
@Override
|
||||
public Mat gray() {
|
||||
return mYuvFrameData.submat(0, mHeight, 0, mWidth);
|
||||
Image.Plane[] planes = mImage.getPlanes();
|
||||
int w = mImage.getWidth();
|
||||
int h = mImage.getHeight();
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
mGray = new Mat(h, w, CvType.CV_8UC1, y_plane);
|
||||
return mGray;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mat rgba() {
|
||||
if (mPreviewFormat == ImageFormat.NV21)
|
||||
Imgproc.cvtColor(mYuvFrameData, mRgba, Imgproc.COLOR_YUV2RGBA_NV21, 4);
|
||||
else if (mPreviewFormat == ImageFormat.YV12)
|
||||
Imgproc.cvtColor(mYuvFrameData, mRgba, Imgproc.COLOR_YUV2RGB_I420, 4); // COLOR_YUV2RGBA_YV12 produces inverted colors
|
||||
else if (mPreviewFormat == ImageFormat.YUV_420_888) {
|
||||
assert (mUVFrameData != null);
|
||||
Imgproc.cvtColorTwoPlane(mYuvFrameData, mUVFrameData, mRgba, Imgproc.COLOR_YUV2RGBA_NV21);
|
||||
} else
|
||||
throw new IllegalArgumentException("Preview Format can be NV21 or YV12");
|
||||
Image.Plane[] planes = mImage.getPlanes();
|
||||
int w = mImage.getWidth();
|
||||
int h = mImage.getHeight();
|
||||
int chromaPixelStride = planes[1].getPixelStride();
|
||||
|
||||
|
||||
if (chromaPixelStride == 2) { // Chroma channels are interleaved
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
ByteBuffer uv_plane = planes[1].getBuffer();
|
||||
Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane);
|
||||
Mat uv_mat = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane);
|
||||
Imgproc.cvtColorTwoPlane(y_mat, uv_mat, mRgba, Imgproc.COLOR_YUV2RGBA_NV21);
|
||||
return mRgba;
|
||||
} else { // Chroma channels are not interleaved
|
||||
byte[] yuv_bytes = new byte[w*(h+h/2)];
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
ByteBuffer u_plane = planes[1].getBuffer();
|
||||
ByteBuffer v_plane = planes[2].getBuffer();
|
||||
|
||||
y_plane.get(yuv_bytes, 0, w*h);
|
||||
|
||||
int chromaRowStride = planes[1].getRowStride();
|
||||
int chromaRowPadding = chromaRowStride - w/2;
|
||||
|
||||
int offset = w*h;
|
||||
if (chromaRowPadding == 0){
|
||||
// When the row stride of the chroma channels equals their width, we can copy
|
||||
// the entire channels in one go
|
||||
u_plane.get(yuv_bytes, offset, w*h/4);
|
||||
offset += w*h/4;
|
||||
v_plane.get(yuv_bytes, offset, w*h/4);
|
||||
} else {
|
||||
// When not equal, we need to copy the channels row by row
|
||||
for (int i = 0; i < h/2; i++){
|
||||
u_plane.get(yuv_bytes, offset, w/2);
|
||||
offset += w/2;
|
||||
if (i < h/2-1){
|
||||
u_plane.position(u_plane.position() + chromaRowPadding);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < h/2; i++){
|
||||
v_plane.get(yuv_bytes, offset, w/2);
|
||||
offset += w/2;
|
||||
if (i < h/2-1){
|
||||
v_plane.position(v_plane.position() + chromaRowPadding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mat yuv_mat = new Mat(h+h/2, w, CvType.CV_8UC1);
|
||||
yuv_mat.put(0, 0, yuv_bytes);
|
||||
Imgproc.cvtColor(yuv_mat, mRgba, Imgproc.COLOR_YUV2RGBA_I420, 4);
|
||||
return mRgba;
|
||||
}
|
||||
|
||||
public JavaCamera2Frame(Mat Yuv420sp, int width, int height) {
|
||||
super();
|
||||
mWidth = width;
|
||||
mHeight = height;
|
||||
mYuvFrameData = Yuv420sp;
|
||||
mUVFrameData = null;
|
||||
mRgba = new Mat();
|
||||
}
|
||||
|
||||
public JavaCamera2Frame(Mat Y, Mat UV, int width, int height) {
|
||||
|
||||
public JavaCamera2Frame(Image image) {
|
||||
super();
|
||||
mWidth = width;
|
||||
mHeight = height;
|
||||
mYuvFrameData = Y;
|
||||
mUVFrameData = UV;
|
||||
mImage = image;
|
||||
mRgba = new Mat();
|
||||
mGray = new Mat();
|
||||
}
|
||||
|
||||
public void release() {
|
||||
mRgba.release();
|
||||
mGray.release();
|
||||
}
|
||||
|
||||
private Mat mYuvFrameData;
|
||||
private Mat mUVFrameData;
|
||||
private Image mImage;
|
||||
private Mat mRgba;
|
||||
private int mWidth;
|
||||
private int mHeight;
|
||||
private Mat mGray;
|
||||
};
|
||||
}
|
||||
|
@ -40,6 +40,8 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
|
||||
#include "opencv2/core/core_c.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@ -741,7 +743,7 @@ void checkIppStatus()
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkTestData = false;
|
||||
static bool checkTestData = cv::utils::getConfigurationParameterBool("OPENCV_TEST_REQUIRE_DATA", false);
|
||||
bool skipUnstableTests = false;
|
||||
bool runBigDataTests = false;
|
||||
int testThreads = 0;
|
||||
@ -788,8 +790,11 @@ void testTearDown()
|
||||
{
|
||||
size_t peak = malloc_peak();
|
||||
memory_usage = peak - memory_usage_base;
|
||||
if (peak > 0)
|
||||
{
|
||||
CV_LOG_INFO(NULL, "Memory_usage (malloc): " << memory_usage << " (base=" << memory_usage_base << ")");
|
||||
}
|
||||
}
|
||||
{
|
||||
// core/src/alloc.cpp: #define OPENCV_ALLOC_ENABLE_STATISTICS
|
||||
// handle large buffers via fastAlloc()
|
||||
@ -797,7 +802,10 @@ void testTearDown()
|
||||
cv::utils::AllocatorStatisticsInterface& ocv_stats = cv::getAllocatorStatistics();
|
||||
ocv_peak = ocv_stats.getPeakUsage();
|
||||
ocv_memory_usage = ocv_peak - memory_usage_base_opencv;
|
||||
if (ocv_peak)
|
||||
{
|
||||
CV_LOG_INFO(NULL, "Memory_usage (OpenCV): " << ocv_memory_usage << " (base=" << memory_usage_base_opencv << " current=" << ocv_stats.getCurrentUsage() << ")");
|
||||
}
|
||||
if (memory_usage == 0) // external profiler has higher priority (and accuracy)
|
||||
memory_usage = ocv_memory_usage;
|
||||
}
|
||||
@ -807,7 +815,10 @@ void testTearDown()
|
||||
cv::utils::AllocatorStatisticsInterface& ocl_stats = cv::ocl::getOpenCLAllocatorStatistics();
|
||||
ocl_peak = ocl_stats.getPeakUsage();
|
||||
ocl_memory_usage = ocl_peak - memory_usage_base_opencl;
|
||||
if (ocl_memory_usage > 0)
|
||||
{
|
||||
CV_LOG_INFO(NULL, "Memory_usage (OpenCL): " << ocl_memory_usage << " (base=" << memory_usage_base_opencl << " current=" << ocl_stats.getCurrentUsage() << ")");
|
||||
}
|
||||
::testing::Test::RecordProperty("ocl_memory_usage",
|
||||
cv::format("%llu", (unsigned long long)ocl_memory_usage));
|
||||
}
|
||||
@ -828,16 +839,16 @@ void testTearDown()
|
||||
|
||||
void parseCustomOptions(int argc, char **argv)
|
||||
{
|
||||
const char * const command_line_keys =
|
||||
const string command_line_keys = string(
|
||||
"{ ipp test_ipp_check |false |check whether IPP works without failures }"
|
||||
"{ test_seed |809564 |seed for random numbers generator }"
|
||||
"{ test_threads |-1 |the number of worker threads, if parallel execution is enabled}"
|
||||
"{ skip_unstable |false |skip unstable tests }"
|
||||
"{ test_bigdata |false |run BigData tests (>=2Gb) }"
|
||||
"{ test_require_data |false |fail on missing non-required test data instead of skip}"
|
||||
"{ test_require_data |") + (checkTestData ? "true" : "false") + string("|fail on missing non-required test data instead of skip (env:OPENCV_TEST_REQUIRE_DATA)}"
|
||||
CV_TEST_TAGS_PARAMS
|
||||
"{ h help |false |print help info }"
|
||||
;
|
||||
);
|
||||
|
||||
cv::CommandLineParser parser(argc, argv, command_line_keys);
|
||||
if (parser.get<bool>("help"))
|
||||
@ -860,6 +871,7 @@ void parseCustomOptions(int argc, char **argv)
|
||||
|
||||
skipUnstableTests = parser.get<bool>("skip_unstable");
|
||||
runBigDataTests = parser.get<bool>("test_bigdata");
|
||||
if (parser.has("test_require_data"))
|
||||
checkTestData = parser.get<bool>("test_require_data");
|
||||
|
||||
activateTestTags(parser);
|
||||
|
@ -25,7 +25,7 @@ using namespace cvtest;
|
||||
using namespace perf;
|
||||
|
||||
int64 TestBase::timeLimitDefault = 0;
|
||||
unsigned int TestBase::iterationsLimitDefault = (unsigned int)(-1);
|
||||
unsigned int TestBase::iterationsLimitDefault = UINT_MAX;
|
||||
int64 TestBase::_timeadjustment = 0;
|
||||
|
||||
// Item [0] will be considered the default implementation.
|
||||
@ -1158,7 +1158,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
}
|
||||
|
||||
timeLimitDefault = param_time_limit == 0.0 ? 1 : (int64)(param_time_limit * cv::getTickFrequency());
|
||||
iterationsLimitDefault = param_force_samples == 0 ? (unsigned)(-1) : param_force_samples;
|
||||
iterationsLimitDefault = param_force_samples == 0 ? UINT_MAX : param_force_samples;
|
||||
_timeadjustment = _calibrate();
|
||||
}
|
||||
|
||||
@ -1197,9 +1197,13 @@ enum PERF_STRATEGY TestBase::getCurrentModulePerformanceStrategy()
|
||||
int64 TestBase::_calibrate()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
if (iterationsLimitDefault <= 1)
|
||||
return 0;
|
||||
|
||||
class _helper : public ::perf::TestBase
|
||||
{
|
||||
public:
|
||||
_helper() { testStrategy = PERF_STRATEGY_BASE; }
|
||||
performance_metrics& getMetrics() { return calcMetrics(); }
|
||||
virtual void TestBody() {}
|
||||
virtual void PerfTestBody()
|
||||
@ -1210,13 +1214,17 @@ int64 TestBase::_calibrate()
|
||||
cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2));
|
||||
declare.time(30);
|
||||
double s = 0;
|
||||
for(declare.iterations(20); next() && startTimer(); stopTimer())
|
||||
declare.iterations(20);
|
||||
minIters = nIters = 20;
|
||||
for(; next() && startTimer(); stopTimer())
|
||||
s+=a.dot(b);
|
||||
declare.time(s);
|
||||
|
||||
//self calibration
|
||||
SetUp();
|
||||
for(declare.iterations(1000); next() && startTimer(); stopTimer()){}
|
||||
declare.iterations(1000);
|
||||
minIters = nIters = 1000;
|
||||
for(int iters = 0; next() && startTimer(); iters++, stopTimer()) { /*std::cout << iters << nIters << std::endl;*/ }
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -251,6 +251,13 @@ make & enjoy!
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH (V4L2_CID_MPEG_BASE+365)
|
||||
#endif
|
||||
|
||||
#ifndef V4L2_CID_ROTATE
|
||||
#define V4L2_CID_ROTATE (V4L2_CID_BASE+34)
|
||||
#endif
|
||||
#ifndef V4L2_CID_IRIS_ABSOLUTE
|
||||
#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
|
||||
#endif
|
||||
|
||||
/* Defaults - If your board can do better, set it here. Set for the most common type inputs. */
|
||||
#define DEFAULT_V4L_WIDTH 640
|
||||
#define DEFAULT_V4L_HEIGHT 480
|
||||
|
@ -36,11 +36,11 @@ using namespace cv;
|
||||
|
||||
static void help()
|
||||
{
|
||||
cout << "\n This program demonstrates how to use MSER to detect extremal regions \n"
|
||||
"Usage: \n"
|
||||
" ./detect_mser <image1(without parameter a syntehtic image is used as default)>\n"
|
||||
cout << "\nThis program demonstrates how to use MSER to detect extremal regions\n"
|
||||
"Usage:\n"
|
||||
" ./detect_mser <image1(without parameter a synthetic image is used as default)>\n"
|
||||
"Press esc key when image window is active to change descriptor parameter\n"
|
||||
"Press 2, 8, 4, 6, +,- or 5 keys in openGL windows to change view or use mouse\n";
|
||||
"Press 2, 8, 4, 6, +, -, or 5 keys in openGL windows to change view or use mouse\n";
|
||||
}
|
||||
|
||||
struct MSERParams
|
||||
|
375
samples/cpp/digits.cpp
Normal file
375
samples/cpp/digits.cpp
Normal file
@ -0,0 +1,375 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/ml.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
const int SZ = 20; // size of each digit is SZ x SZ
|
||||
const int CLASS_N = 10;
|
||||
const char* DIGITS_FN = "digits.png";
|
||||
|
||||
static void help()
|
||||
{
|
||||
cout <<
|
||||
"\n"
|
||||
"SVM and KNearest digit recognition.\n"
|
||||
"\n"
|
||||
"Sample loads a dataset of handwritten digits from 'digits.png'.\n"
|
||||
"Then it trains a SVM and KNearest classifiers on it and evaluates\n"
|
||||
"their accuracy.\n"
|
||||
"\n"
|
||||
"Following preprocessing is applied to the dataset:\n"
|
||||
" - Moment-based image deskew (see deskew())\n"
|
||||
" - Digit images are split into 4 10x10 cells and 16-bin\n"
|
||||
" histogram of oriented gradients is computed for each\n"
|
||||
" cell\n"
|
||||
" - Transform histograms to space with Hellinger metric (see [1] (RootSIFT))\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"[1] R. Arandjelovic, A. Zisserman\n"
|
||||
" \"Three things everyone should know to improve object retrieval\"\n"
|
||||
" http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf\n"
|
||||
"\n"
|
||||
"Usage:\n"
|
||||
" ./digits\n" << endl;
|
||||
}
|
||||
|
||||
static void split2d(const Mat& image, const Size cell_size, vector<Mat>& cells)
|
||||
{
|
||||
int height = image.rows;
|
||||
int width = image.cols;
|
||||
|
||||
int sx = cell_size.width;
|
||||
int sy = cell_size.height;
|
||||
|
||||
cells.clear();
|
||||
|
||||
for (int i = 0; i < height; i += sy)
|
||||
{
|
||||
for (int j = 0; j < width; j += sx)
|
||||
{
|
||||
cells.push_back(image(Rect(j, i, sx, sy)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void load_digits(const char* fn, vector<Mat>& digits, vector<int>& labels)
|
||||
{
|
||||
digits.clear();
|
||||
labels.clear();
|
||||
|
||||
String filename = samples::findFile(fn);
|
||||
|
||||
cout << "Loading " << filename << " ..." << endl;
|
||||
|
||||
Mat digits_img = imread(filename, IMREAD_GRAYSCALE);
|
||||
split2d(digits_img, Size(SZ, SZ), digits);
|
||||
|
||||
for (int i = 0; i < CLASS_N; i++)
|
||||
{
|
||||
for (size_t j = 0; j < digits.size() / CLASS_N; j++)
|
||||
{
|
||||
labels.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void deskew(const Mat& img, Mat& deskewed_img)
|
||||
{
|
||||
Moments m = moments(img);
|
||||
|
||||
if (abs(m.mu02) < 0.01)
|
||||
{
|
||||
deskewed_img = img.clone();
|
||||
return;
|
||||
}
|
||||
|
||||
float skew = (float)(m.mu11 / m.mu02);
|
||||
float M_vals[2][3] = {{1, skew, -0.5f * SZ * skew}, {0, 1, 0}};
|
||||
Mat M(Size(3, 2), CV_32F);
|
||||
|
||||
for (int i = 0; i < M.rows; i++)
|
||||
{
|
||||
for (int j = 0; j < M.cols; j++)
|
||||
{
|
||||
M.at<float>(i, j) = M_vals[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
warpAffine(img, deskewed_img, M, Size(SZ, SZ), WARP_INVERSE_MAP | INTER_LINEAR);
|
||||
}
|
||||
|
||||
static void mosaic(const int width, const vector<Mat>& images, Mat& grid)
|
||||
{
|
||||
int mat_width = SZ * width;
|
||||
int mat_height = SZ * (int)ceil((double)images.size() / width);
|
||||
|
||||
if (!images.empty())
|
||||
{
|
||||
grid = Mat(Size(mat_width, mat_height), images[0].type());
|
||||
|
||||
for (size_t i = 0; i < images.size(); i++)
|
||||
{
|
||||
Mat location_on_grid = grid(Rect(SZ * ((int)i % width), SZ * ((int)i / width), SZ, SZ));
|
||||
images[i].copyTo(location_on_grid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void evaluate_model(const vector<float>& predictions, const vector<Mat>& digits, const vector<int>& labels, Mat& mos)
|
||||
{
|
||||
double err = 0;
|
||||
|
||||
for (size_t i = 0; i < predictions.size(); i++)
|
||||
{
|
||||
if ((int)predictions[i] != labels[i])
|
||||
{
|
||||
err++;
|
||||
}
|
||||
}
|
||||
|
||||
err /= predictions.size();
|
||||
|
||||
cout << format("error: %.2f %%", err * 100) << endl;
|
||||
|
||||
int confusion[10][10] = {};
|
||||
|
||||
for (size_t i = 0; i < labels.size(); i++)
|
||||
{
|
||||
confusion[labels[i]][(int)predictions[i]]++;
|
||||
}
|
||||
|
||||
cout << "confusion matrix:" << endl;
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
for (int j = 0; j < 10; j++)
|
||||
{
|
||||
cout << format("%2d ", confusion[i][j]);
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
|
||||
vector<Mat> vis;
|
||||
|
||||
for (size_t i = 0; i < digits.size(); i++)
|
||||
{
|
||||
Mat img;
|
||||
cvtColor(digits[i], img, COLOR_GRAY2BGR);
|
||||
|
||||
if ((int)predictions[i] != labels[i])
|
||||
{
|
||||
for (int j = 0; j < img.rows; j++)
|
||||
{
|
||||
for (int k = 0; k < img.cols; k++)
|
||||
{
|
||||
img.at<Vec3b>(j, k)[0] = 0;
|
||||
img.at<Vec3b>(j, k)[1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vis.push_back(img);
|
||||
}
|
||||
|
||||
mosaic(25, vis, mos);
|
||||
}
|
||||
|
||||
static void bincount(const Mat& x, const Mat& weights, const int min_length, vector<double>& bins)
|
||||
{
|
||||
double max_x_val = 0;
|
||||
minMaxLoc(x, NULL, &max_x_val);
|
||||
|
||||
bins = vector<double>(max((int)max_x_val, min_length));
|
||||
|
||||
for (int i = 0; i < x.rows; i++)
|
||||
{
|
||||
for (int j = 0; j < x.cols; j++)
|
||||
{
|
||||
bins[x.at<int>(i, j)] += weights.at<float>(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void preprocess_hog(const vector<Mat>& digits, Mat& hog)
|
||||
{
|
||||
int bin_n = 16;
|
||||
int half_cell = SZ / 2;
|
||||
double eps = 1e-7;
|
||||
|
||||
hog = Mat(Size(4 * bin_n, (int)digits.size()), CV_32F);
|
||||
|
||||
for (size_t img_index = 0; img_index < digits.size(); img_index++)
|
||||
{
|
||||
Mat gx;
|
||||
Sobel(digits[img_index], gx, CV_32F, 1, 0);
|
||||
|
||||
Mat gy;
|
||||
Sobel(digits[img_index], gy, CV_32F, 0, 1);
|
||||
|
||||
Mat mag;
|
||||
Mat ang;
|
||||
cartToPolar(gx, gy, mag, ang);
|
||||
|
||||
Mat bin(ang.size(), CV_32S);
|
||||
|
||||
for (int i = 0; i < ang.rows; i++)
|
||||
{
|
||||
for (int j = 0; j < ang.cols; j++)
|
||||
{
|
||||
bin.at<int>(i, j) = (int)(bin_n * ang.at<float>(i, j) / (2 * CV_PI));
|
||||
}
|
||||
}
|
||||
|
||||
Mat bin_cells[] = {
|
||||
bin(Rect(0, 0, half_cell, half_cell)),
|
||||
bin(Rect(half_cell, 0, half_cell, half_cell)),
|
||||
bin(Rect(0, half_cell, half_cell, half_cell)),
|
||||
bin(Rect(half_cell, half_cell, half_cell, half_cell))
|
||||
};
|
||||
Mat mag_cells[] = {
|
||||
mag(Rect(0, 0, half_cell, half_cell)),
|
||||
mag(Rect(half_cell, 0, half_cell, half_cell)),
|
||||
mag(Rect(0, half_cell, half_cell, half_cell)),
|
||||
mag(Rect(half_cell, half_cell, half_cell, half_cell))
|
||||
};
|
||||
|
||||
vector<double> hist;
|
||||
hist.reserve(4 * bin_n);
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
vector<double> partial_hist;
|
||||
bincount(bin_cells[i], mag_cells[i], bin_n, partial_hist);
|
||||
hist.insert(hist.end(), partial_hist.begin(), partial_hist.end());
|
||||
}
|
||||
|
||||
// transform to Hellinger kernel
|
||||
double sum = 0;
|
||||
|
||||
for (size_t i = 0; i < hist.size(); i++)
|
||||
{
|
||||
sum += hist[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < hist.size(); i++)
|
||||
{
|
||||
hist[i] /= sum + eps;
|
||||
hist[i] = sqrt(hist[i]);
|
||||
}
|
||||
|
||||
double hist_norm = norm(hist);
|
||||
|
||||
for (size_t i = 0; i < hist.size(); i++)
|
||||
{
|
||||
hog.at<float>((int)img_index, (int)i) = (float)(hist[i] / (hist_norm + eps));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void shuffle(vector<Mat>& digits, vector<int>& labels)
|
||||
{
|
||||
vector<int> shuffled_indexes(digits.size());
|
||||
|
||||
for (size_t i = 0; i < digits.size(); i++)
|
||||
{
|
||||
shuffled_indexes[i] = (int)i;
|
||||
}
|
||||
|
||||
randShuffle(shuffled_indexes);
|
||||
|
||||
vector<Mat> shuffled_digits(digits.size());
|
||||
vector<int> shuffled_labels(labels.size());
|
||||
|
||||
for (size_t i = 0; i < shuffled_indexes.size(); i++)
|
||||
{
|
||||
shuffled_digits[shuffled_indexes[i]] = digits[i];
|
||||
shuffled_labels[shuffled_indexes[i]] = labels[i];
|
||||
}
|
||||
|
||||
digits = shuffled_digits;
|
||||
labels = shuffled_labels;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
help();
|
||||
|
||||
vector<Mat> digits;
|
||||
vector<int> labels;
|
||||
|
||||
load_digits(DIGITS_FN, digits, labels);
|
||||
|
||||
cout << "preprocessing..." << endl;
|
||||
|
||||
// shuffle digits
|
||||
shuffle(digits, labels);
|
||||
|
||||
vector<Mat> digits2;
|
||||
|
||||
for (size_t i = 0; i < digits.size(); i++)
|
||||
{
|
||||
Mat deskewed_digit;
|
||||
deskew(digits[i], deskewed_digit);
|
||||
digits2.push_back(deskewed_digit);
|
||||
}
|
||||
|
||||
Mat samples;
|
||||
|
||||
preprocess_hog(digits2, samples);
|
||||
|
||||
int train_n = (int)(0.9 * samples.rows);
|
||||
Mat test_set;
|
||||
|
||||
vector<Mat> digits_test(digits2.begin() + train_n, digits2.end());
|
||||
mosaic(25, digits_test, test_set);
|
||||
imshow("test set", test_set);
|
||||
|
||||
Mat samples_train = samples(Rect(0, 0, samples.cols, train_n));
|
||||
Mat samples_test = samples(Rect(0, train_n, samples.cols, samples.rows - train_n));
|
||||
vector<int> labels_train(labels.begin(), labels.begin() + train_n);
|
||||
vector<int> labels_test(labels.begin() + train_n, labels.end());
|
||||
|
||||
Ptr<ml::KNearest> k_nearest;
|
||||
Ptr<ml::SVM> svm;
|
||||
vector<float> predictions;
|
||||
Mat vis;
|
||||
|
||||
cout << "training KNearest..." << endl;
|
||||
k_nearest = ml::KNearest::create();
|
||||
k_nearest->train(samples_train, ml::ROW_SAMPLE, labels_train);
|
||||
|
||||
// predict digits with KNearest
|
||||
k_nearest->findNearest(samples_test, 4, predictions);
|
||||
evaluate_model(predictions, digits_test, labels_test, vis);
|
||||
imshow("KNearest test", vis);
|
||||
k_nearest.release();
|
||||
|
||||
cout << "training SVM..." << endl;
|
||||
svm = ml::SVM::create();
|
||||
svm->setGamma(5.383);
|
||||
svm->setC(2.67);
|
||||
svm->setKernel(ml::SVM::RBF);
|
||||
svm->setType(ml::SVM::C_SVC);
|
||||
svm->train(samples_train, ml::ROW_SAMPLE, labels_train);
|
||||
|
||||
// predict digits with SVM
|
||||
svm->predict(samples_test, predictions);
|
||||
evaluate_model(predictions, digits_test, labels_test, vis);
|
||||
imshow("SVM test", vis);
|
||||
cout << "Saving SVM as \"digits_svm.yml\"..." << endl;
|
||||
svm->save("digits_svm.yml");
|
||||
svm.release();
|
||||
|
||||
waitKey();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user