Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2018-08-29 16:24:56 +03:00
commit c1db75e0c7
44 changed files with 1180 additions and 292 deletions

View File

@ -275,7 +275,7 @@ OCV_OPTION(WITH_VA "Include VA support" OFF
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) ) OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) )
OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) ) OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" OFF IF (UNIX AND NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION) IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION) IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) ) OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) )
OCV_OPTION(WITH_PROTOBUF "Enable libprotobuf" ON ) OCV_OPTION(WITH_PROTOBUF "Enable libprotobuf" ON )

View File

@ -78,9 +78,9 @@ endif()
if(INF_ENGINE_TARGET) if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE) if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") message(WARNING "InferenceEngine version have not been set, 2018R3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif() endif()
set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)") set(INF_ENGINE_RELEASE "2018030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
) )

View File

@ -12,7 +12,9 @@ endif()
if(VA_INCLUDE_DIR) if(VA_INCLUDE_DIR)
set(HAVE_VA TRUE) set(HAVE_VA TRUE)
set(VA_LIBRARIES "-lva" "-lva-drm") if(NOT DEFINED VA_LIBRARIES)
set(VA_LIBRARIES "va" "va-drm")
endif()
else() else()
set(HAVE_VA FALSE) set(HAVE_VA FALSE)
message(WARNING "libva installation is not found.") message(WARNING "libva installation is not found.")

View File

@ -12,7 +12,7 @@ Tutorial was written for the following versions of corresponding software:
- Download and install Android Studio from https://developer.android.com/studio. - Download and install Android Studio from https://developer.android.com/studio.
- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.2-android-sdk.zip`). - Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.3-android-sdk.zip`).
- Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`. - Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`.

View File

@ -36,14 +36,14 @@ Open your Doxyfile using your favorite text editor and search for the key
`TAGFILES`. Change it as follows: `TAGFILES`. Change it as follows:
@code @code
TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2 TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3
@endcode @endcode
If you had other definitions already, you can append the line using a `\`: If you had other definitions already, you can append the line using a `\`:
@code @code
TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \ TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \
./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2 ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3
@endcode @endcode
Doxygen can now use the information from the tag file to link to the OpenCV Doxygen can now use the information from the tag file to link to the OpenCV

View File

@ -0,0 +1,86 @@
package org.opencv.core;
import java.util.Arrays;
import java.util.List;
import org.opencv.core.RotatedRect;
public class MatOfRotatedRect extends Mat {
// 32FC5
private static final int _depth = CvType.CV_32F;
private static final int _channels = 5;
public MatOfRotatedRect() {
super();
}
protected MatOfRotatedRect(long addr) {
super(addr);
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incompatible Mat");
//FIXME: do we need release() here?
}
public static MatOfRotatedRect fromNativeAddr(long addr) {
return new MatOfRotatedRect(addr);
}
public MatOfRotatedRect(Mat m) {
super(m, Range.all());
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incompatible Mat");
//FIXME: do we need release() here?
}
public MatOfRotatedRect(RotatedRect...a) {
super();
fromArray(a);
}
public void alloc(int elemNumber) {
if(elemNumber>0)
super.create(elemNumber, 1, CvType.makeType(_depth, _channels));
}
public void fromArray(RotatedRect...a) {
if(a==null || a.length==0)
return;
int num = a.length;
alloc(num);
float buff[] = new float[num * _channels];
for(int i=0; i<num; i++) {
RotatedRect r = a[i];
buff[_channels*i+0] = (float) r.center.x;
buff[_channels*i+1] = (float) r.center.y;
buff[_channels*i+2] = (float) r.size.width;
buff[_channels*i+3] = (float) r.size.height;
buff[_channels*i+4] = (float) r.angle;
}
put(0, 0, buff); //TODO: check ret val!
}
public RotatedRect[] toArray() {
int num = (int) total();
RotatedRect[] a = new RotatedRect[num];
if(num == 0)
return a;
float buff[] = new float[_channels];
for(int i=0; i<num; i++) {
get(i, 0, buff); //TODO: check ret val!
a[i] = new RotatedRect(new Point(buff[0],buff[1]),new Size(buff[2],buff[3]),buff[4]);
}
return a;
}
public void fromList(List<RotatedRect> lr) {
RotatedRect ap[] = lr.toArray(new RotatedRect[0]);
fromArray(ap);
}
public List<RotatedRect> toList() {
RotatedRect[] ar = toArray();
return Arrays.asList(ar);
}
}

View File

@ -1,11 +1,16 @@
package org.opencv.test.core; package org.opencv.test.core;
import org.opencv.core.CvType;
import org.opencv.core.Point; import org.opencv.core.Point;
import org.opencv.core.Rect; import org.opencv.core.Rect;
import org.opencv.core.RotatedRect; import org.opencv.core.RotatedRect;
import org.opencv.core.MatOfRotatedRect;
import org.opencv.core.Size; import org.opencv.core.Size;
import org.opencv.test.OpenCVTestCase; import org.opencv.test.OpenCVTestCase;
import java.util.Arrays;
import java.util.List;
public class RotatedRectTest extends OpenCVTestCase { public class RotatedRectTest extends OpenCVTestCase {
private double angle; private double angle;
@ -188,4 +193,21 @@ public class RotatedRectTest extends OpenCVTestCase {
assertEquals(expected, actual); assertEquals(expected, actual);
} }
public void testMatOfRotatedRect() {
RotatedRect a = new RotatedRect(new Point(1,2),new Size(3,4),5.678);
RotatedRect b = new RotatedRect(new Point(9,8),new Size(7,6),5.432);
MatOfRotatedRect m = new MatOfRotatedRect(a,b,a,b,a,b,a,b);
assertEquals(m.rows(), 8);
assertEquals(m.cols(), 1);
assertEquals(m.type(), CvType.CV_32FC(5));
RotatedRect[] arr = m.toArray();
assertEquals(arr[2].angle, a.angle, EPS);
assertEquals(arr[3].center.x, b.center.x);
assertEquals(arr[3].size.width, b.size.width);
List<RotatedRect> li = m.toList();
assertEquals(li.size(), 8);
RotatedRect rr = li.get(7);
assertEquals(rr.angle, b.angle, EPS);
assertEquals(rr.center.y, b.center.y);
}
} }

View File

@ -324,6 +324,163 @@ static void copy_convert_bgr_to_nv12(const VAImage& image, const Mat& bgr, unsig
dstUV += dstStepUV; dstUV += dstStepUV;
} }
} }
static void copy_convert_yv12_to_bgr(const VAImage& image, const unsigned char* buffer, Mat& bgr)
{
const float d1 = 16.0f;
const float d2 = 128.0f;
static const float coeffs[5] =
{
1.163999557f,
2.017999649f,
-0.390999794f,
-0.812999725f,
1.5959997177f
};
CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format");
CV_CheckEQ(image.num_planes, 3, "");
const size_t srcOffsetY = image.offsets[0];
const size_t srcOffsetV = image.offsets[1];
const size_t srcOffsetU = image.offsets[2];
const size_t srcStepY = image.pitches[0];
const size_t srcStepU = image.pitches[1];
const size_t srcStepV = image.pitches[2];
const size_t dstStep = bgr.step;
const unsigned char* srcY_ = buffer + srcOffsetY;
const unsigned char* srcV_ = buffer + srcOffsetV;
const unsigned char* srcU_ = buffer + srcOffsetU;
for (int y = 0; y < bgr.rows; y += 2)
{
const unsigned char* srcY0 = srcY_ + (srcStepY) * y;
const unsigned char* srcY1 = srcY0 + srcStepY;
const unsigned char* srcV = srcV_ + (srcStepV) * y / 2;
const unsigned char* srcU = srcU_ + (srcStepU) * y / 2;
unsigned char* dst0 = bgr.data + (dstStep) * y;
unsigned char* dst1 = dst0 + dstStep;
for (int x = 0; x < bgr.cols; x += 2)
{
float Y0 = float(srcY0[x+0]);
float Y1 = float(srcY0[x+1]);
float Y2 = float(srcY1[x+0]);
float Y3 = float(srcY1[x+1]);
float U = float(srcU[x/2]) - d2;
float V = float(srcV[x/2]) - d2;
Y0 = std::max(0.0f, Y0 - d1) * coeffs[0];
Y1 = std::max(0.0f, Y1 - d1) * coeffs[0];
Y2 = std::max(0.0f, Y2 - d1) * coeffs[0];
Y3 = std::max(0.0f, Y3 - d1) * coeffs[0];
float ruv = coeffs[4]*V;
float guv = coeffs[3]*V + coeffs[2]*U;
float buv = coeffs[1]*U;
dst0[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y0 + buv);
dst0[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y0 + guv);
dst0[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y0 + ruv);
dst0[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y1 + buv);
dst0[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y1 + guv);
dst0[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y1 + ruv);
dst1[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y2 + buv);
dst1[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y2 + guv);
dst1[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y2 + ruv);
dst1[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y3 + buv);
dst1[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y3 + guv);
dst1[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y3 + ruv);
}
}
}
static void copy_convert_bgr_to_yv12(const VAImage& image, const Mat& bgr, unsigned char* buffer)
{
const float d1 = 16.0f;
const float d2 = 128.0f;
static const float coeffs[8] =
{
0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f,
-0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f
};
CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format");
CV_CheckEQ(image.num_planes, 3, "");
const size_t dstOffsetY = image.offsets[0];
const size_t dstOffsetV = image.offsets[1];
const size_t dstOffsetU = image.offsets[2];
const size_t dstStepY = image.pitches[0];
const size_t dstStepU = image.pitches[1];
const size_t dstStepV = image.pitches[2];
unsigned char* dstY_ = buffer + dstOffsetY;
unsigned char* dstV_ = buffer + dstOffsetV;
unsigned char* dstU_ = buffer + dstOffsetU;
const size_t srcStep = bgr.step;
for (int y = 0; y < bgr.rows; y += 2)
{
unsigned char* dstY0 = dstY_ + (dstStepY) * y;
unsigned char* dstY1 = dstY0 + dstStepY;
unsigned char* dstV = dstV_ + (dstStepV) * y / 2;
unsigned char* dstU = dstU_ + (dstStepU) * y / 2;
const unsigned char* src0 = bgr.data + (srcStep) * y;
const unsigned char* src1 = src0 + srcStep;
for (int x = 0; x < bgr.cols; x += 2)
{
float B0 = float(src0[(x+0)*NCHANNELS+0]);
float G0 = float(src0[(x+0)*NCHANNELS+1]);
float R0 = float(src0[(x+0)*NCHANNELS+2]);
float B1 = float(src0[(x+1)*NCHANNELS+0]);
float G1 = float(src0[(x+1)*NCHANNELS+1]);
float R1 = float(src0[(x+1)*NCHANNELS+2]);
float B2 = float(src1[(x+0)*NCHANNELS+0]);
float G2 = float(src1[(x+0)*NCHANNELS+1]);
float R2 = float(src1[(x+0)*NCHANNELS+2]);
float B3 = float(src1[(x+1)*NCHANNELS+0]);
float G3 = float(src1[(x+1)*NCHANNELS+1]);
float R3 = float(src1[(x+1)*NCHANNELS+2]);
float Y0 = coeffs[0]*R0 + coeffs[1]*G0 + coeffs[2]*B0 + d1;
float Y1 = coeffs[0]*R1 + coeffs[1]*G1 + coeffs[2]*B1 + d1;
float Y2 = coeffs[0]*R2 + coeffs[1]*G2 + coeffs[2]*B2 + d1;
float Y3 = coeffs[0]*R3 + coeffs[1]*G3 + coeffs[2]*B3 + d1;
float U = coeffs[3]*R0 + coeffs[4]*G0 + coeffs[5]*B0 + d2;
float V = coeffs[5]*R0 + coeffs[6]*G0 + coeffs[7]*B0 + d2;
dstY0[x+0] = saturate_cast<unsigned char>(Y0);
dstY0[x+1] = saturate_cast<unsigned char>(Y1);
dstY1[x+0] = saturate_cast<unsigned char>(Y2);
dstY1[x+1] = saturate_cast<unsigned char>(Y3);
dstU[x/2] = saturate_cast<unsigned char>(U);
dstV[x/2] = saturate_cast<unsigned char>(V);
}
}
}
#endif // HAVE_VA #endif // HAVE_VA
void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size) void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size)
@ -412,9 +569,12 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
if (status != VA_STATUS_SUCCESS) if (status != VA_STATUS_SUCCESS)
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
CV_Assert(image.format.fourcc == VA_FOURCC_NV12); if (image.format.fourcc == VA_FOURCC_NV12)
copy_convert_bgr_to_nv12(image, m, buffer);
copy_convert_bgr_to_nv12(image, m, buffer); if (image.format.fourcc == VA_FOURCC_YV12)
copy_convert_bgr_to_yv12(image, m, buffer);
else
CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format");
status = vaUnmapBuffer(display, image.buf); status = vaUnmapBuffer(display, image.buf);
if (status != VA_STATUS_SUCCESS) if (status != VA_STATUS_SUCCESS)
@ -510,9 +670,12 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
if (status != VA_STATUS_SUCCESS) if (status != VA_STATUS_SUCCESS)
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
CV_Assert(image.format.fourcc == VA_FOURCC_NV12); if (image.format.fourcc == VA_FOURCC_NV12)
copy_convert_nv12_to_bgr(image, buffer, m);
copy_convert_nv12_to_bgr(image, buffer, m); if (image.format.fourcc == VA_FOURCC_YV12)
copy_convert_yv12_to_bgr(image, buffer, m);
else
CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format");
status = vaUnmapBuffer(display, image.buf); status = vaUnmapBuffer(display, image.buf);
if (status != VA_STATUS_SUCCESS) if (status != VA_STATUS_SUCCESS)

View File

@ -2158,4 +2158,71 @@ TEST(Core_Norm, IPP_regression_NORM_L1_16UC3_small)
EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask)); EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask));
} }
TEST(Core_ConvertTo, regression_12121)
{
{
Mat src(4, 64, CV_32SC1, Scalar(-1));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(32768));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(255, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(65536));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(65535, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
}
}} // namespace }} // namespace

View File

@ -278,20 +278,12 @@ namespace
{ {
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T> template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
{ {
float power; typedef typename LargerType<T, float>::type LargerType;
LargerType power;
__device__ __forceinline__ T operator()(T e) const __device__ __forceinline__ T operator()(T e) const
{ {
return cudev::saturate_cast<T>(__powf((float)e, power)); T res = cudev::saturate_cast<T>(__powf(e < 0 ? -e : e, power));
}
};
template<typename T> struct PowOp<T, true> : unary_function<T, T>
{
float power;
__device__ __forceinline__ T operator()(T e) const
{
T res = cudev::saturate_cast<T>(__powf((float)e, power));
if ((e < 0) && (1 & static_cast<int>(power))) if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1; res *= -1;
@ -299,22 +291,15 @@ namespace
return res; return res;
} }
}; };
template<> struct PowOp<float> : unary_function<float, float>
{
float power;
__device__ __forceinline__ float operator()(float e) const template<typename T> struct PowOp<T, false> : unary_function<T, T>
{
return __powf(::fabs(e), power);
}
};
template<> struct PowOp<double> : unary_function<double, double>
{ {
double power; typedef typename LargerType<T, float>::type LargerType;
LargerType power;
__device__ __forceinline__ double operator()(double e) const __device__ __forceinline__ T operator()(T e) const
{ {
return ::pow(::fabs(e), power); return cudev::saturate_cast<T>(__powf(e, power));
} }
}; };

View File

@ -46,9 +46,9 @@
#include <opencv2/core.hpp> #include <opencv2/core.hpp>
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS #if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v6 { #define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v7 {
#define CV__DNN_EXPERIMENTAL_NS_END } #define CV__DNN_EXPERIMENTAL_NS_END }
namespace cv { namespace dnn { namespace experimental_dnn_v6 { } using namespace experimental_dnn_v6; }} namespace cv { namespace dnn { namespace experimental_dnn_34_v7 { } using namespace experimental_dnn_34_v7; }}
#else #else
#define CV__DNN_EXPERIMENTAL_NS_BEGIN #define CV__DNN_EXPERIMENTAL_NS_BEGIN
#define CV__DNN_EXPERIMENTAL_NS_END #define CV__DNN_EXPERIMENTAL_NS_END
@ -900,7 +900,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
CV_OUT std::vector<int>& indices, CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0); const float eta = 1.f, const int top_k = 0);
CV_EXPORTS void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores, CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
const float score_threshold, const float nms_threshold, const float score_threshold, const float nms_threshold,
CV_OUT std::vector<int>& indices, CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0); const float eta = 1.f, const int top_k = 0);

View File

@ -699,9 +699,9 @@ public:
} }
} }
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half) void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
{ {
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate) if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
{ {
Mat bestBlob; Mat bestBlob;
LayerPin bestBlobPin; LayerPin bestBlobPin;
@ -747,7 +747,7 @@ public:
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs, std::vector<LayerPin>& pinsForInternalBlobs,
bool forceCreate = false, bool use_half = false) bool use_half = false)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
@ -818,7 +818,7 @@ public:
reuse(ld.inputBlobsId[0], blobPin); reuse(ld.inputBlobsId[0], blobPin);
} }
else else
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half); reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
} }
} }
} }
@ -1607,7 +1607,6 @@ struct Net::Impl
std::vector<LayerPin> pinsForInternalBlobs; std::vector<LayerPin> pinsForInternalBlobs;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
preferableBackend == DNN_BACKEND_OPENCV && preferableBackend == DNN_BACKEND_OPENCV &&
preferableTarget == DNN_TARGET_OPENCL_FP16); preferableTarget == DNN_TARGET_OPENCL_FP16);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());

View File

@ -81,6 +81,7 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
{ {
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE) if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{ {
if (type == "Convolution") if (type == "Convolution")
@ -91,13 +92,19 @@ public:
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
const int group = numOutput / outGroupCn; const int group = numOutput / outGroupCn;
if (group != 1) if (group != 1)
{
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R3)
return preferableTarget == DNN_TARGET_CPU;
#endif
return false; return false;
}
if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16)
return dilation.width == 1 && dilation.height == 1; return dilation.width == 1 && dilation.height == 1;
return true; return true;
} }
} }
else else
#endif // HAVE_INF_ENGINE
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
} }

View File

@ -99,6 +99,13 @@ public:
} }
} }
} }
if (boxes.rows < out.size[0])
{
// left = top = right = bottom = 0
std::vector<cv::Range> dstRanges(4, Range::all());
dstRanges[0] = Range(boxes.rows, out.size[0]);
out(dstRanges).setTo(inp.ptr<float>(0, 0, 0)[0]);
}
} }
private: private:

View File

@ -115,6 +115,7 @@ public:
// It's true whenever predicted bounding boxes and proposals are normalized to [0, 1]. // It's true whenever predicted bounding boxes and proposals are normalized to [0, 1].
bool _bboxesNormalized; bool _bboxesNormalized;
bool _clip; bool _clip;
bool _groupByClasses;
enum { _numAxes = 4 }; enum { _numAxes = 4 };
static const std::string _layerName; static const std::string _layerName;
@ -183,6 +184,7 @@ public:
_locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false); _locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true); _bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
_clip = getParameter<bool>(params, "clip", 0, false, false); _clip = getParameter<bool>(params, "clip", 0, false, false);
_groupByClasses = getParameter<bool>(params, "group_by_classes", 0, false, true);
getCodeType(params); getCodeType(params);
@ -381,7 +383,7 @@ public:
{ {
count += outputDetections_(i, &outputsData[count * 7], count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i], allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]); allIndices[i], _groupByClasses);
} }
CV_Assert(count == numKept); CV_Assert(count == numKept);
} }
@ -497,7 +499,7 @@ public:
{ {
count += outputDetections_(i, &outputsData[count * 7], count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i], allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]); allIndices[i], _groupByClasses);
} }
CV_Assert(count == numKept); CV_Assert(count == numKept);
} }
@ -505,9 +507,36 @@ public:
size_t outputDetections_( size_t outputDetections_(
const int i, float* outputsData, const int i, float* outputsData,
const LabelBBox& decodeBBoxes, Mat& confidenceScores, const LabelBBox& decodeBBoxes, Mat& confidenceScores,
const std::map<int, std::vector<int> >& indicesMap const std::map<int, std::vector<int> >& indicesMap,
bool groupByClasses
) )
{ {
std::vector<int> dstIndices;
std::vector<std::pair<float, int> > allScores;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
if (confidenceScores.rows <= label)
CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
const std::vector<float>& scores = confidenceScores.row(label);
const std::vector<int>& indices = it->second;
const int numAllScores = allScores.size();
allScores.reserve(numAllScores + indices.size());
for (size_t j = 0; j < indices.size(); ++j)
{
allScores.push_back(std::make_pair(scores[indices[j]], numAllScores + j));
}
}
if (!groupByClasses)
std::sort(allScores.begin(), allScores.end(), util::SortScorePairDescend<int>);
dstIndices.resize(allScores.size());
for (size_t j = 0; j < dstIndices.size(); ++j)
{
dstIndices[allScores[j].second] = j;
}
size_t count = 0; size_t count = 0;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it) for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{ {
@ -524,14 +553,15 @@ public:
for (size_t j = 0; j < indices.size(); ++j, ++count) for (size_t j = 0; j < indices.size(); ++j, ++count)
{ {
int idx = indices[j]; int idx = indices[j];
int dstIdx = dstIndices[count];
const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx]; const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
outputsData[count * 7] = i; outputsData[dstIdx * 7] = i;
outputsData[count * 7 + 1] = label; outputsData[dstIdx * 7 + 1] = label;
outputsData[count * 7 + 2] = scores[idx]; outputsData[dstIdx * 7 + 2] = scores[idx];
outputsData[count * 7 + 3] = decode_bbox.xmin; outputsData[dstIdx * 7 + 3] = decode_bbox.xmin;
outputsData[count * 7 + 4] = decode_bbox.ymin; outputsData[dstIdx * 7 + 4] = decode_bbox.ymin;
outputsData[count * 7 + 5] = decode_bbox.xmax; outputsData[dstIdx * 7 + 5] = decode_bbox.xmax;
outputsData[count * 7 + 6] = decode_bbox.ymax; outputsData[dstIdx * 7 + 6] = decode_bbox.ymax;
} }
} }
return count; return count;

View File

@ -599,7 +599,8 @@ struct ELUFunctor
bool supportBackend(int backendId, int) bool supportBackend(int backendId, int)
{ {
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
backendId == DNN_BACKEND_INFERENCE_ENGINE;
} }
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
@ -653,8 +654,8 @@ struct ELUFunctor
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp) InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{ {
CV_Error(Error::StsNotImplemented, "ELU"); lp.type = "ELU";
return InferenceEngine::CNNLayerPtr(); return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));
} }
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE

View File

@ -91,8 +91,8 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
{ {
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE && haveHalide() || backendId == DNN_BACKEND_HALIDE ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine(); backendId == DNN_BACKEND_INFERENCE_ENGINE && (preferableTarget != DNN_TARGET_MYRIAD || type == CHANNEL_NRM);
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL

View File

@ -33,9 +33,7 @@ public:
interpolation = params.get<String>("interpolation"); interpolation = params.get<String>("interpolation");
CV_Assert(interpolation == "nearest" || interpolation == "bilinear"); CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
bool alignCorners = params.get<bool>("align_corners", false); alignCorners = params.get<bool>("align_corners", false);
if (alignCorners)
CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented");
} }
bool getMemoryShapes(const std::vector<MatShape> &inputs, bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -66,8 +64,15 @@ public:
outHeight = outputs[0].size[2]; outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3]; outWidth = outputs[0].size[3];
} }
scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight; if (alignCorners && outHeight > 1)
scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth; scaleHeight = static_cast<float>(inputs[0]->size[2] - 1) / (outHeight - 1);
else
scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight;
if (alignCorners && outWidth > 1)
scaleWidth = static_cast<float>(inputs[0]->size[3] - 1) / (outWidth - 1);
else
scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@ -166,6 +171,7 @@ protected:
int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight; int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
String interpolation; String interpolation;
float scaleWidth, scaleHeight; float scaleWidth, scaleHeight;
bool alignCorners;
}; };

View File

@ -24,6 +24,7 @@
#define INF_ENGINE_RELEASE_2018R1 2018010000 #define INF_ENGINE_RELEASE_2018R1 2018010000
#define INF_ENGINE_RELEASE_2018R2 2018020000 #define INF_ENGINE_RELEASE_2018R2 2018020000
#define INF_ENGINE_RELEASE_2018R3 2018030000
#ifndef INF_ENGINE_RELEASE #ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2018R2 by default") #warning("IE version have not been provided via command-line. Using 2018R2 by default")
@ -31,6 +32,7 @@
#endif #endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE

View File

@ -737,11 +737,18 @@ void TFImporter::populateNet(Net dstNet)
int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
data_layouts[name] = predictedLayout; data_layouts[name] = predictedLayout;
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative") if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad")
{ {
// The first node of dilated convolution subgraph. // The first node of dilated convolution subgraph.
// Extract input node, dilation rate and paddings. // Extract input node, dilation rate and paddings.
std::string input = layer.input(0); std::string input = layer.input(0);
StrIntVector next_layers;
if (type == "SpaceToBatchND" || type == "Pad")
{
next_layers = getNextLayers(net, name, "Conv2D");
if (next_layers.empty())
next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
}
if (type == "SpaceToBatchND") if (type == "SpaceToBatchND")
{ {
// op: "SpaceToBatchND" // op: "SpaceToBatchND"
@ -762,17 +769,57 @@ void TFImporter::populateNet(Net dstNet)
layerParams.set("pad_h", paddings.at<float>(0)); layerParams.set("pad_h", paddings.at<float>(0));
layerParams.set("pad_w", paddings.at<float>(2)); layerParams.set("pad_w", paddings.at<float>(2));
StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
if (next_layers.empty())
{
next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
}
CV_Assert(next_layers.size() == 1); CV_Assert(next_layers.size() == 1);
layer = net.node(next_layers[0].second); layer = net.node(next_layers[0].second);
layers_to_ignore.insert(next_layers[0].first); layers_to_ignore.insert(next_layers[0].first);
name = layer.name(); name = layer.name();
type = layer.op(); type = layer.op();
} }
else if (type == "Pad")
{
Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(paddings.type() == CV_32SC1);
if (paddings.total() == 8)
{
// Perhabs, we have NHWC padding dimensions order.
// N H W C
// 0 1 2 3 4 5 6 7
std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));
std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));
// N C W H
// 0 1 2 3 4 5 6 7
std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));
std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));
// N C H W
// 0 1 2 3 4 5 6 7
}
if (next_layers.empty() || paddings.total() != 8 ||
paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||
paddings.at<int32_t>(6) != paddings.at<int32_t>(7))
{
// Just a single padding layer.
layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
int id = dstNet.addLayer(name, "Padding", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(input), id, 0);
continue;
}
else
{
// Merge with subsequent convolutional layer.
CV_Assert(next_layers.size() == 1);
layerParams.set("pad_h", paddings.at<int32_t>(4));
layerParams.set("pad_w", paddings.at<int32_t>(6));
layer = net.node(next_layers[0].second);
layers_to_ignore.insert(next_layers[0].first);
name = layer.name();
type = layer.op();
}
}
// For the object detection networks, TensorFlow Object Detection API // For the object detection networks, TensorFlow Object Detection API
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
@ -784,7 +831,7 @@ void TFImporter::populateNet(Net dstNet)
layerParams.set("bias_term", false); layerParams.set("bias_term", false);
layerParams.blobs.resize(1); layerParams.blobs.resize(1);
StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); next_layers = getNextLayers(net, name, "BiasAdd");
if (next_layers.size() == 1) { if (next_layers.size() == 1) {
layerParams.set("bias_term", true); layerParams.set("bias_term", true);
layerParams.blobs.resize(2); layerParams.blobs.resize(2);
@ -1416,31 +1463,6 @@ void TFImporter::populateNet(Net dstNet)
} }
} }
} }
else if (type == "Pad")
{
Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(paddings.type() == CV_32SC1);
if (paddings.total() == 8)
{
// Perhabs, we have NHWC padding dimensions order.
// N H W C
// 0 1 2 3 4 5 6 7
std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
// N C W H
// 0 1 2 3 4 5 6 7
std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
// N C H W
// 0 1 2 3 4 5 6 7
}
layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
int id = dstNet.addLayer(name, "Padding", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "FusedBatchNorm") else if (type == "FusedBatchNorm")
{ {
// op: "FusedBatchNorm" // op: "FusedBatchNorm"

View File

@ -222,9 +222,12 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
TEST_P(DNNTestNetwork, OpenFace) TEST_P(DNNTestNetwork, OpenFace)
{ {
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
if (backend == DNN_BACKEND_HALIDE || if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16))
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
throw SkipTestException(""); throw SkipTestException("");
processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), ""); processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
} }
@ -253,12 +256,19 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
TEST_P(DNNTestNetwork, DenseNet_121) TEST_P(DNNTestNetwork, DenseNet_121)
{ {
if ((backend == DNN_BACKEND_HALIDE) || if (backend == DNN_BACKEND_HALIDE)
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 ||
target == DNN_TARGET_MYRIAD)))
throw SkipTestException(""); throw SkipTestException("");
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe");
float l1 = 0.0, lInf = 0.0;
if (target == DNN_TARGET_OPENCL_FP16)
{
l1 = 9e-3; lInf = 5e-2;
}
else if (target == DNN_TARGET_MYRIAD)
{
l1 = 6e-2; lInf = 0.27;
}
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
} }
TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)

View File

@ -374,14 +374,6 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
TEST_P(Test_Caffe_nets, Colorization) TEST_P(Test_Caffe_nets, Colorization)
{ {
checkBackend(); checkBackend();
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
const float l1 = 4e-4;
const float lInf = 3e-3;
Mat inp = blobFromNPY(_tf("colorization_inp.npy")); Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
Mat ref = blobFromNPY(_tf("colorization_out.npy")); Mat ref = blobFromNPY(_tf("colorization_out.npy"));
Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy")); Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy"));
@ -398,11 +390,15 @@ TEST_P(Test_Caffe_nets, Colorization)
net.setInput(inp); net.setInput(inp);
Mat out = net.forward(); Mat out = net.forward();
// Reference output values are in range [-29.1, 69.5]
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.21 : 4e-4;
const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5.3 : 3e-3;
normAssert(out, ref, "", l1, lInf); normAssert(out, ref, "", l1, lInf);
} }
TEST(Reproducibility_DenseNet_121, Accuracy) TEST_P(Test_Caffe_nets, DenseNet_121)
{ {
checkBackend();
const string proto = findDataFile("dnn/DenseNet_121.prototxt", false); const string proto = findDataFile("dnn/DenseNet_121.prototxt", false);
const string model = findDataFile("dnn/DenseNet_121.caffemodel", false); const string model = findDataFile("dnn/DenseNet_121.caffemodel", false);
@ -411,12 +407,23 @@ TEST(Reproducibility_DenseNet_121, Accuracy)
Mat ref = blobFromNPY(_tf("densenet_121_output.npy")); Mat ref = blobFromNPY(_tf("densenet_121_output.npy"));
Net net = readNetFromCaffe(proto, model); Net net = readNetFromCaffe(proto, model);
net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(inp); net.setInput(inp);
Mat out = net.forward(); Mat out = net.forward();
normAssert(out, ref); // Reference is an array of 1000 values from a range [-6.16, 7.9]
float l1 = default_l1, lInf = default_lInf;
if (target == DNN_TARGET_OPENCL_FP16)
{
l1 = 0.017; lInf = 0.067;
}
else if (target == DNN_TARGET_MYRIAD)
{
l1 = 0.097; lInf = 0.52;
}
normAssert(out, ref, "", l1, lInf);
} }
TEST(Test_Caffe, multiple_inputs) TEST(Test_Caffe, multiple_inputs)

View File

@ -177,7 +177,8 @@ TEST_P(DNNTestOpenVINO, models)
Target target = (dnn::Target)(int)get<0>(GetParam()); Target target = (dnn::Target)(int)get<0>(GetParam());
std::string modelName = get<1>(GetParam()); std::string modelName = get<1>(GetParam());
if (modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) if ((modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) ||
(modelName == "vehicle-license-plate-detection-barrier-0106"))
throw SkipTestException(""); throw SkipTestException("");
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32"; std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";

View File

@ -127,15 +127,9 @@ TEST_P(Test_Caffe_layers, Softmax)
testLayerUsingCaffeModels("layer_softmax"); testLayerUsingCaffeModels("layer_softmax");
} }
TEST_P(Test_Caffe_layers, LRN_spatial) TEST_P(Test_Caffe_layers, LRN)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
testLayerUsingCaffeModels("layer_lrn_spatial"); testLayerUsingCaffeModels("layer_lrn_spatial");
}
TEST_P(Test_Caffe_layers, LRN_channels)
{
testLayerUsingCaffeModels("layer_lrn_channels"); testLayerUsingCaffeModels("layer_lrn_channels");
} }

View File

@ -399,8 +399,10 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
TEST_P(Test_TensorFlow_nets, EAST_text_detection) TEST_P(Test_TensorFlow_nets, EAST_text_detection)
{ {
checkBackend(); checkBackend();
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
throw SkipTestException(""); if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false); std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false); std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
@ -425,8 +427,25 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection)
Mat scores = outs[0]; Mat scores = outs[0];
Mat geometry = outs[1]; Mat geometry = outs[1];
normAssert(scores, blobFromNPY(refScoresPath), "scores"); // Scores are in range [0, 1]. Geometry values are in range [-0.23, 290]
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3); double l1_scores = default_l1, lInf_scores = default_lInf;
double l1_geometry = default_l1, lInf_geometry = default_lInf;
if (target == DNN_TARGET_OPENCL_FP16)
{
lInf_scores = 0.11;
l1_geometry = 0.28; lInf_geometry = 5.94;
}
else if (target == DNN_TARGET_MYRIAD)
{
lInf_scores = 0.214;
l1_geometry = 0.47; lInf_geometry = 15.34;
}
else
{
l1_geometry = 1e-4, lInf_geometry = 3e-3;
}
normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores);
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry);
} }
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets());
@ -537,4 +556,56 @@ TEST(Test_TensorFlow, two_inputs)
normAssert(out, firstInput + secondInput); normAssert(out, firstInput + secondInput);
} }
TEST(Test_TensorFlow, Mask_RCNN)
{
std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
std::string model = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb", false);
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/street.png", false));
Mat refDetections = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setInput(blob);
// Mask-RCNN predicts bounding boxes and segmentation masks.
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
std::vector<Mat> outs;
net.forward(outs, outNames);
Mat outDetections = outs[0];
Mat outMasks = outs[1];
normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5);
// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
const int numDetections = outDetections.size[2];
int masksSize[] = {1, numDetections, outMasks.size[2], outMasks.size[3]};
Mat masks(4, &masksSize[0], CV_32F);
std::vector<cv::Range> srcRanges(4, cv::Range::all());
std::vector<cv::Range> dstRanges(4, cv::Range::all());
outDetections = outDetections.reshape(1, outDetections.total() / 7);
for (int i = 0; i < numDetections; ++i)
{
// Get a class id for this bounding box and copy mask only for that class.
int classId = static_cast<int>(outDetections.at<float>(i, 1));
srcRanges[0] = dstRanges[1] = cv::Range(i, i + 1);
srcRanges[1] = cv::Range(classId, classId + 1);
outMasks(srcRanges).copyTo(masks(dstRanges));
}
cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()};
normAssert(masks, refMasks(&topRefMasks[0]));
}
} }

View File

@ -242,15 +242,23 @@ TEST_P(Test_Torch_layers, net_residual)
runTorchNet("net_residual", "", false, true); runTorchNet("net_residual", "", false, true);
} }
typedef testing::TestWithParam<Target> Test_Torch_nets; class Test_Torch_nets : public DNNTestLayer {};
TEST_P(Test_Torch_nets, OpenFace_accuracy) TEST_P(Test_Torch_nets, OpenFace_accuracy)
{ {
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
checkBackend();
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false); const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
Net net = readNetFromTorch(model); Net net = readNetFromTorch(model);
net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableBackend(backend);
net.setPreferableTarget(GetParam()); net.setPreferableTarget(target);
Mat sample = imread(findDataFile("cv/shared/lena.png", false)); Mat sample = imread(findDataFile("cv/shared/lena.png", false));
Mat sampleF32(sample.size(), CV_32FC3); Mat sampleF32(sample.size(), CV_32FC3);
@ -264,11 +272,16 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy)
Mat out = net.forward(); Mat out = net.forward();
Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true); Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
normAssert(out, outRef); normAssert(out, outRef, "", default_l1, default_lInf);
} }
TEST_P(Test_Torch_nets, ENet_accuracy) TEST_P(Test_Torch_nets, ENet_accuracy)
{ {
checkBackend();
if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
Net net; Net net;
{ {
const string model = findDataFile("dnn/Enet-model-best.net", false); const string model = findDataFile("dnn/Enet-model-best.net", false);
@ -276,8 +289,8 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
ASSERT_TRUE(!net.empty()); ASSERT_TRUE(!net.empty());
} }
net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableBackend(backend);
net.setPreferableTarget(GetParam()); net.setPreferableTarget(target);
Mat sample = imread(_tf("street.png", false)); Mat sample = imread(_tf("street.png", false));
Mat inputBlob = blobFromImage(sample, 1./255); Mat inputBlob = blobFromImage(sample, 1./255);
@ -314,6 +327,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
// -model models/instance_norm/feathers.t7 // -model models/instance_norm/feathers.t7
TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
{ {
checkBackend();
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7", std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
"dnn/fast_neural_style_instance_norm_feathers.t7"}; "dnn/fast_neural_style_instance_norm_feathers.t7"};
std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"}; std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
@ -323,8 +337,8 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
const string model = findDataFile(models[i], false); const string model = findDataFile(models[i], false);
Net net = readNetFromTorch(model); Net net = readNetFromTorch(model);
net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableBackend(backend);
net.setPreferableTarget(GetParam()); net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/googlenet_1.png", false)); Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false); Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
@ -341,12 +355,20 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
Mat ref = imread(findDataFile(targets[i])); Mat ref = imread(findDataFile(targets[i]));
Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false); Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
normAssert(out, refBlob, "", 0.5, 1.1); if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{
double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
if (target == DNN_TARGET_MYRIAD)
EXPECT_LE(normL1, 4.0f);
else
EXPECT_LE(normL1, 0.6f);
}
else
normAssert(out, refBlob, "", 0.5, 1.1);
} }
} }
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
// Test a custom layer // Test a custom layer
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest

View File

@ -199,6 +199,7 @@ typedef std::vector<Vec6f> vector_Vec6f;
typedef std::vector<Vec4i> vector_Vec4i; typedef std::vector<Vec4i> vector_Vec4i;
typedef std::vector<Rect> vector_Rect; typedef std::vector<Rect> vector_Rect;
typedef std::vector<Rect2d> vector_Rect2d; typedef std::vector<Rect2d> vector_Rect2d;
typedef std::vector<RotatedRect> vector_RotatedRect;
typedef std::vector<KeyPoint> vector_KeyPoint; typedef std::vector<KeyPoint> vector_KeyPoint;
typedef std::vector<Mat> vector_Mat; typedef std::vector<Mat> vector_Mat;
typedef std::vector<std::vector<Mat> > vector_vector_Mat; typedef std::vector<std::vector<Mat> > vector_vector_Mat;
@ -1643,6 +1644,18 @@ template<> struct pyopencvVecConverter<String>
} }
}; };
template<> struct pyopencvVecConverter<RotatedRect>
{
static bool to(PyObject* obj, std::vector<RotatedRect>& value, const ArgInfo info)
{
return pyopencv_to_generic_vec(obj, value, info);
}
static PyObject* from(const std::vector<RotatedRect>& value)
{
return pyopencv_from_generic_vec(value);
}
};
template<> template<>
bool pyopencv_to(PyObject *obj, TermCriteria& dst, const char *name) bool pyopencv_to(PyObject *obj, TermCriteria& dst, const char *name)
{ {

View File

@ -165,8 +165,8 @@ static Mat _localAffineEstimate(const std::vector<Point2f>& shape1, const std::v
} }
else else
{ {
therow.at<float>(0,0)=-shape1[contPt].y; therow.at<float>(0,0)=shape1[contPt].y;
therow.at<float>(0,1)=shape1[contPt].x; therow.at<float>(0,1)=-shape1[contPt].x;
therow.at<float>(0,3)=1; therow.at<float>(0,3)=1;
therow.row(0).copyTo(matM.row(ii)); therow.row(0).copyTo(matM.row(ii));
matP.at<float>(ii,0) = shape2[contPt].y; matP.at<float>(ii,0) = shape2[contPt].y;

View File

@ -921,6 +921,14 @@ inline static void recordPropertyVerbose(const std::string & property,
} }
} }
inline static void recordPropertyVerbose(const std::string& property, const std::string& msg,
const char* value, const char* build_value = NULL)
{
return recordPropertyVerbose(property, msg,
value ? std::string(value) : std::string(),
build_value ? std::string(build_value) : std::string());
}
#ifdef _DEBUG #ifdef _DEBUG
#define CV_TEST_BUILD_CONFIG "Debug" #define CV_TEST_BUILD_CONFIG "Debug"
#else #else

View File

@ -107,7 +107,7 @@ class XINECapture : public IVideoCapture
bool open(const char *filename) bool open(const char *filename)
{ {
CV_Assert(!xine, !stream, !vo_port); CV_Assert_N(!xine, !stream, !vo_port);
char configfile[2048] = {0}; char configfile[2048] = {0};
xine = xine_new(); xine = xine_new();
@ -207,7 +207,7 @@ class XINECapture : public IVideoCapture
double getProperty(int property_id) const CV_OVERRIDE double getProperty(int property_id) const CV_OVERRIDE
{ {
CV_Assert(xine, vo_port, stream); CV_Assert_N(xine, vo_port, stream);
int pos_t, pos_l, length; int pos_t, pos_l, length;
bool res = (bool)xine_get_pos_length(stream, &pos_l, &pos_t, &length); bool res = (bool)xine_get_pos_length(stream, &pos_l, &pos_t, &length);
@ -240,7 +240,7 @@ class XINECapture : public IVideoCapture
protected: protected:
bool oldSeekFrame(int f) bool oldSeekFrame(int f)
{ {
CV_Assert(xine, vo_port, stream); CV_Assert_N(xine, vo_port, stream);
// no need to seek if we are already there... // no need to seek if we are already there...
if (f == frame_number) if (f == frame_number)
{ {
@ -290,7 +290,7 @@ protected:
bool seekFrame(int f) bool seekFrame(int f)
{ {
CV_Assert(xine, vo_port, stream); CV_Assert_N(xine, vo_port, stream);
if (seekable) if (seekable)
{ {
int new_time = (int)((f + 1) * (float)frame_duration); int new_time = (int)((f + 1) * (float)frame_duration);
@ -309,7 +309,7 @@ protected:
bool seekTime(int t) bool seekTime(int t)
{ {
CV_Assert(xine, vo_port, stream); CV_Assert_N(xine, vo_port, stream);
if (seekable) if (seekable)
{ {
if (xine_play(stream, 0, t)) if (xine_play(stream, 0, t))
@ -328,7 +328,7 @@ protected:
bool seekRatio(double ratio) bool seekRatio(double ratio)
{ {
CV_Assert(xine, vo_port, stream); CV_Assert_N(xine, vo_port, stream);
if (ratio > 1 || ratio < 0) if (ratio > 1 || ratio < 0)
return false; return false;
if (seekable) if (seekable)

View File

@ -301,6 +301,7 @@ void cv::viz::Widget3D::applyTransform(const Affine3d &transform)
vtkSmartPointer<vtkPolyDataMapper> mapper = vtkPolyDataMapper::SafeDownCast(actor->GetMapper()); vtkSmartPointer<vtkPolyDataMapper> mapper = vtkPolyDataMapper::SafeDownCast(actor->GetMapper());
CV_Assert("Widget doesn't have a polydata mapper" && mapper); CV_Assert("Widget doesn't have a polydata mapper" && mapper);
mapper->Update(); // #10945
VtkUtils::SetInputData(mapper, VtkUtils::TransformPolydata(mapper->GetInput(), transform)); VtkUtils::SetInputData(mapper, VtkUtils::TransformPolydata(mapper->GetInput(), transform));
mapper->Update(); mapper->Update();
} }

View File

@ -106,9 +106,13 @@ class ABI:
self.cmake_vars = dict( self.cmake_vars = dict(
ANDROID_STL="gnustl_static", ANDROID_STL="gnustl_static",
ANDROID_ABI=self.name, ANDROID_ABI=self.name,
ANDROID_TOOLCHAIN_NAME=toolchain,
ANDROID_PLATFORM_ID=platform_id, ANDROID_PLATFORM_ID=platform_id,
) )
if toolchain is not None:
self.cmake_vars['ANDROID_TOOLCHAIN_NAME'] = toolchain
else:
self.cmake_vars['ANDROID_TOOLCHAIN'] = 'clang'
self.cmake_vars['ANDROID_STL'] = 'c++_static'
if ndk_api_level: if ndk_api_level:
self.cmake_vars['ANDROID_NATIVE_API_LEVEL'] = ndk_api_level self.cmake_vars['ANDROID_NATIVE_API_LEVEL'] = ndk_api_level
self.cmake_vars.update(cmake_vars) self.cmake_vars.update(cmake_vars)
@ -206,7 +210,7 @@ class Builder:
# Add extra data # Add extra data
apkxmldest = check_dir(os.path.join(apkdest, "res", "xml"), create=True) apkxmldest = check_dir(os.path.join(apkdest, "res", "xml"), create=True)
apklibdest = check_dir(os.path.join(apkdest, "libs", abi.name), create=True) apklibdest = check_dir(os.path.join(apkdest, "libs", abi.name), create=True)
for ver, d in self.extra_packs + [("3.4.2", os.path.join(self.libdest, "lib"))]: for ver, d in self.extra_packs + [("3.4.3", os.path.join(self.libdest, "lib"))]:
r = ET.Element("library", attrib={"version": ver}) r = ET.Element("library", attrib={"version": ver})
log.info("Adding libraries from %s", d) log.info("Adding libraries from %s", d)

View File

@ -0,0 +1,6 @@
ABIs = [
ABI("2", "armeabi-v7a", None, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),
ABI("3", "arm64-v8a", None),
ABI("5", "x86_64", None),
ABI("4", "x86", None),
]

View File

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android" <manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.opencv.engine" package="org.opencv.engine"
android:versionCode="342@ANDROID_PLATFORM_ID@" android:versionCode="343@ANDROID_PLATFORM_ID@"
android:versionName="3.42"> android:versionName="3.43">
<uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" android:targetSdkVersion="22"/> <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" android:targetSdkVersion="22"/>
<uses-feature android:name="android.hardware.touchscreen" android:required="false"/> <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>

View File

@ -137,7 +137,7 @@ public class OpenCVEngineService extends Service {
@Override @Override
public int getEngineVersion() throws RemoteException { public int getEngineVersion() throws RemoteException {
int version = 3420; int version = 3430;
try { try {
version = getPackageManager().getPackageInfo(getPackageName(), 0).versionCode; version = getPackageManager().getPackageInfo(getPackageName(), 0).versionCode;
} catch (NameNotFoundException e) { } catch (NameNotFoundException e) {

View File

@ -12,7 +12,7 @@ manually using adb tool:
adb install <path-to-OpenCV-sdk>/apk/OpenCV_<version>_Manager_<app_version>_<platform>.apk adb install <path-to-OpenCV-sdk>/apk/OpenCV_<version>_Manager_<app_version>_<platform>.apk
Example: OpenCV_3.4.2-dev_Manager_3.42_armeabi-v7a.apk Example: OpenCV_3.4.3-dev_Manager_3.43_armeabi-v7a.apk
Use the list of platforms below to determine proper OpenCV Manager package for your device: Use the list of platforms below to determine proper OpenCV Manager package for your device:

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>org.opencv</groupId> <groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId> <artifactId>opencv-parent</artifactId>
<version>3.4.2</version> <version>3.4.3</version>
</parent> </parent>
<groupId>org.opencv</groupId> <groupId>org.opencv</groupId>
<artifactId>opencv-it</artifactId> <artifactId>opencv-it</artifactId>

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>org.opencv</groupId> <groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId> <artifactId>opencv-parent</artifactId>
<version>3.4.2</version> <version>3.4.3</version>
</parent> </parent>
<groupId>org.opencv</groupId> <groupId>org.opencv</groupId>
<artifactId>opencv</artifactId> <artifactId>opencv</artifactId>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>org.opencv</groupId> <groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId> <artifactId>opencv-parent</artifactId>
<version>3.4.2</version> <version>3.4.3</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>OpenCV Parent POM</name> <name>OpenCV Parent POM</name>
<licenses> <licenses>

143
samples/dnn/mask_rcnn.py Normal file
View File

@ -0,0 +1,143 @@
import cv2 as cv
import argparse
import numpy as np
parser = argparse.ArgumentParser(description=
'Use this script to run Mask-RCNN object detection and semantic '
'segmentation network from TensorFlow Object Detection API.')
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--model', required=True, help='Path to a .pb file with weights.')
parser.add_argument('--config', required=True, help='Path to a .pxtxt file contains network configuration.')
parser.add_argument('--classes', help='Optional path to a text file with names of classes.')
parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. '
'An every color is represented with three values from 0 to 255 in BGR channels order.')
parser.add_argument('--width', type=int, default=800,
help='Preprocess input image by resizing to a specific width.')
parser.add_argument('--height', type=int, default=800,
help='Preprocess input image by resizing to a specific height.')
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
args = parser.parse_args()
np.random.seed(324)
# Load names of classes
classes = None
if args.classes:
with open(args.classes, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load colors
colors = None
if args.colors:
with open(args.colors, 'rt') as f:
colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')]
legend = None
def showLegend(classes):
global legend
if not classes is None and legend is None:
blockHeight = 30
assert(len(classes) == len(colors))
legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend)
classes = None
def drawBox(frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0))
label = '%.2f' % conf
# Print a label of class.
if classes:
assert(classId < len(classes))
label = '%s: %s' % (classes[classId], label)
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# Load a network
net = cv.dnn.readNet(args.model, args.config)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
winName = 'Mask-RCNN in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
legend = None
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
frameH = frame.shape[0]
frameW = frame.shape[1]
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, size=(args.width, args.height), swapRB=True, crop=False)
# Run a model
net.setInput(blob)
boxes, masks = net.forward(['detection_out_final', 'detection_masks'])
numClasses = masks.shape[1]
numDetections = boxes.shape[2]
# Draw segmentation
if not colors:
# Generate colors
colors = [np.array([0, 0, 0], np.uint8)]
for i in range(1, numClasses + 1):
colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2)
del colors[0]
boxesToDraw = []
for i in range(numDetections):
box = boxes[0, 0, i]
mask = masks[i]
score = box[2]
if score > args.thr:
classId = int(box[1])
left = int(frameW * box[3])
top = int(frameH * box[4])
right = int(frameW * box[5])
bottom = int(frameH * box[6])
left = max(0, min(left, frameW - 1))
top = max(0, min(top, frameH - 1))
right = max(0, min(right, frameW - 1))
bottom = max(0, min(bottom, frameH - 1))
boxesToDraw.append([frame, classId, score, left, top, right, bottom])
classMask = mask[classId]
classMask = cv.resize(classMask, (right - left + 1, bottom - top + 1))
mask = (classMask > 0.5)
roi = frame[top:bottom+1, left:right+1][mask]
frame[top:bottom+1, left:right+1][mask] = (0.7 * colors[classId] + 0.3 * roi).astype(np.uint8)
for box in boxesToDraw:
drawBox(*box)
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
showLegend(classes)
cv.imshow(winName, frame)

View File

@ -23,3 +23,98 @@ def addConstNode(name, values, graph_def):
node.op = 'Const' node.op = 'Const'
text_format.Merge(tensorMsg(values), node.attr["value"]) text_format.Merge(tensorMsg(values), node.attr["value"])
graph_def.node.extend([node]) graph_def.node.extend([node])
def addSlice(inp, out, begins, sizes, graph_def):
beginsNode = NodeDef()
beginsNode.name = out + '/begins'
beginsNode.op = 'Const'
text_format.Merge(tensorMsg(begins), beginsNode.attr["value"])
graph_def.node.extend([beginsNode])
sizesNode = NodeDef()
sizesNode.name = out + '/sizes'
sizesNode.op = 'Const'
text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"])
graph_def.node.extend([sizesNode])
sliced = NodeDef()
sliced.name = out
sliced.op = 'Slice'
sliced.input.append(inp)
sliced.input.append(beginsNode.name)
sliced.input.append(sizesNode.name)
graph_def.node.extend([sliced])
def addReshape(inp, out, shape, graph_def):
shapeNode = NodeDef()
shapeNode.name = out + '/shape'
shapeNode.op = 'Const'
text_format.Merge(tensorMsg(shape), shapeNode.attr["value"])
graph_def.node.extend([shapeNode])
reshape = NodeDef()
reshape.name = out
reshape.op = 'Reshape'
reshape.input.append(inp)
reshape.input.append(shapeNode.name)
graph_def.node.extend([reshape])
def addSoftMax(inp, out, graph_def):
softmax = NodeDef()
softmax.name = out
softmax.op = 'Softmax'
text_format.Merge('i: -1', softmax.attr['axis'])
softmax.input.append(inp)
graph_def.node.extend([softmax])
def addFlatten(inp, out, graph_def):
flatten = NodeDef()
flatten.name = out
flatten.op = 'Flatten'
flatten.input.append(inp)
graph_def.node.extend([flatten])
# Removes Identity nodes
def removeIdentity(graph_def):
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node:
for i in range(len(node.input)):
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
def removeUnusedNodesAndAttrs(to_remove, graph_def):
unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
removedNodes = []
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
if op == 'Const' or to_remove(name, op):
if op != 'Const':
removedNodes.append(name)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]

View File

@ -6,7 +6,7 @@ from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format from google.protobuf import text_format
from tf_text_graph_common import tensorMsg, addConstNode from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. ' 'SSD model from TensorFlow Object Detection API. '
@ -37,50 +37,17 @@ scopesToIgnore = ('FirstStageFeatureExtractor/Assert',
'FirstStageFeatureExtractor/GreaterEqual', 'FirstStageFeatureExtractor/GreaterEqual',
'FirstStageFeatureExtractor/LogicalAnd') 'FirstStageFeatureExtractor/LogicalAnd')
unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
# Read the graph. # Read the graph.
with tf.gfile.FastGFile(args.input, 'rb') as f: with tf.gfile.FastGFile(args.input, 'rb') as f:
graph_def = tf.GraphDef() graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read()) graph_def.ParseFromString(f.read())
# Removes Identity nodes removeIdentity(graph_def)
def removeIdentity():
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node: def to_remove(name, op):
for i in range(len(node.input)): return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
removeIdentity() removeUnusedNodesAndAttrs(to_remove, graph_def)
removedNodes = []
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
if op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep):
if op != 'Const':
removedNodes.append(name)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]
# Connect input node to the first layer # Connect input node to the first layer
@ -95,68 +62,18 @@ while True:
if node.op == 'CropAndResize': if node.op == 'CropAndResize':
break break
def addSlice(inp, out, begins, sizes):
beginsNode = NodeDef()
beginsNode.name = out + '/begins'
beginsNode.op = 'Const'
text_format.Merge(tensorMsg(begins), beginsNode.attr["value"])
graph_def.node.extend([beginsNode])
sizesNode = NodeDef()
sizesNode.name = out + '/sizes'
sizesNode.op = 'Const'
text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"])
graph_def.node.extend([sizesNode])
sliced = NodeDef()
sliced.name = out
sliced.op = 'Slice'
sliced.input.append(inp)
sliced.input.append(beginsNode.name)
sliced.input.append(sizesNode.name)
graph_def.node.extend([sliced])
def addReshape(inp, out, shape):
shapeNode = NodeDef()
shapeNode.name = out + '/shape'
shapeNode.op = 'Const'
text_format.Merge(tensorMsg(shape), shapeNode.attr["value"])
graph_def.node.extend([shapeNode])
reshape = NodeDef()
reshape.name = out
reshape.op = 'Reshape'
reshape.input.append(inp)
reshape.input.append(shapeNode.name)
graph_def.node.extend([reshape])
def addSoftMax(inp, out):
softmax = NodeDef()
softmax.name = out
softmax.op = 'Softmax'
text_format.Merge('i: -1', softmax.attr['axis'])
softmax.input.append(inp)
graph_def.node.extend([softmax])
def addFlatten(inp, out):
flatten = NodeDef()
flatten.name = out
flatten.op = 'Flatten'
flatten.input.append(inp)
graph_def.node.extend([flatten])
addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd', addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd',
'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2]) 'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def)
addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1', addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1',
'FirstStageBoxPredictor/ClassPredictor/softmax') # Compare with Reshape_4 'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4
addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax', addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax',
'FirstStageBoxPredictor/ClassPredictor/softmax/flatten') 'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def)
# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd # Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd
addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd', addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd',
'FirstStageBoxPredictor/BoxEncodingPredictor/flatten') 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def)
proposals = NodeDef() proposals = NodeDef()
proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized) proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized)
@ -218,14 +135,14 @@ graph_def.node.extend([clipByValueNode])
for node in reversed(topNodes): for node in reversed(topNodes):
graph_def.node.extend([node]) graph_def.node.extend([node])
addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax') addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def)
addSlice('SecondStageBoxPredictor/Reshape_1/softmax', addSlice('SecondStageBoxPredictor/Reshape_1/softmax',
'SecondStageBoxPredictor/Reshape_1/slice', 'SecondStageBoxPredictor/Reshape_1/slice',
[0, 0, 1], [-1, -1, -1]) [0, 0, 1], [-1, -1, -1], graph_def)
addReshape('SecondStageBoxPredictor/Reshape_1/slice', addReshape('SecondStageBoxPredictor/Reshape_1/slice',
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1]) 'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
# Replace Flatten subgraph onto a single node. # Replace Flatten subgraph onto a single node.
for i in reversed(range(len(graph_def.node))): for i in reversed(range(len(graph_def.node))):
@ -255,7 +172,7 @@ for node in graph_def.node:
################################################################################ ################################################################################
### Postprocessing ### Postprocessing
################################################################################ ################################################################################
addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4]) addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def)
variance = NodeDef() variance = NodeDef()
variance.name = 'proposals/variance' variance.name = 'proposals/variance'
@ -271,8 +188,8 @@ varianceEncoder.input.append(variance.name)
text_format.Merge('i: 2', varianceEncoder.attr["axis"]) text_format.Merge('i: 2', varianceEncoder.attr["axis"])
graph_def.node.extend([varianceEncoder]) graph_def.node.extend([varianceEncoder])
addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1]) addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def)
addFlatten('variance_encoded', 'variance_encoded/flatten') addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def)
detectionOut = NodeDef() detectionOut = NodeDef()
detectionOut.name = 'detection_out_final' detectionOut.name = 'detection_out_final'

View File

@ -0,0 +1,230 @@
import argparse
import numpy as np
import tensorflow as tf
from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'Mask-RCNN model from TensorFlow Object Detection API. '
'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.')
parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.')
parser.add_argument('--output', required=True, help='Path to output text graph.')
parser.add_argument('--num_classes', default=90, type=int, help='Number of trained classes.')
parser.add_argument('--scales', default=[0.25, 0.5, 1.0, 2.0], type=float, nargs='+',
help='Hyper-parameter of grid_anchor_generator from a config file.')
parser.add_argument('--aspect_ratios', default=[0.5, 1.0, 2.0], type=float, nargs='+',
help='Hyper-parameter of grid_anchor_generator from a config file.')
parser.add_argument('--features_stride', default=16, type=float, nargs='+',
help='Hyper-parameter from a config file.')
args = parser.parse_args()
scopesToKeep = ('FirstStageFeatureExtractor', 'Conv',
'FirstStageBoxPredictor/BoxEncodingPredictor',
'FirstStageBoxPredictor/ClassPredictor',
'CropAndResize',
'MaxPool2D',
'SecondStageFeatureExtractor',
'SecondStageBoxPredictor',
'Preprocessor/sub',
'Preprocessor/mul',
'image_tensor')
scopesToIgnore = ('FirstStageFeatureExtractor/Assert',
'FirstStageFeatureExtractor/Shape',
'FirstStageFeatureExtractor/strided_slice',
'FirstStageFeatureExtractor/GreaterEqual',
'FirstStageFeatureExtractor/LogicalAnd')
# Read the graph.
with tf.gfile.FastGFile(args.input, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
removeIdentity(graph_def)
def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Connect input node to the first layer
assert(graph_def.node[0].op == 'Placeholder')
graph_def.node[1].input.insert(0, graph_def.node[0].name)
# Temporarily remove top nodes.
topNodes = []
numCropAndResize = 0
while True:
node = graph_def.node.pop()
topNodes.append(node)
if node.op == 'CropAndResize':
numCropAndResize += 1
if numCropAndResize == 2:
break
addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd',
'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def)
addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1',
'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4
addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax',
'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def)
# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd
addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd',
'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def)
proposals = NodeDef()
proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized)
proposals.op = 'PriorBox'
proposals.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd')
proposals.input.append(graph_def.node[0].name) # image_tensor
text_format.Merge('b: false', proposals.attr["flip"])
text_format.Merge('b: true', proposals.attr["clip"])
text_format.Merge('f: %f' % args.features_stride, proposals.attr["step"])
text_format.Merge('f: 0.0', proposals.attr["offset"])
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), proposals.attr["variance"])
widths = []
heights = []
for a in args.aspect_ratios:
for s in args.scales:
ar = np.sqrt(a)
heights.append((args.features_stride**2) * s / ar)
widths.append((args.features_stride**2) * s * ar)
text_format.Merge(tensorMsg(widths), proposals.attr["width"])
text_format.Merge(tensorMsg(heights), proposals.attr["height"])
graph_def.node.extend([proposals])
# Compare with Reshape_5
detectionOut = NodeDef()
detectionOut.name = 'detection_out'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/flatten')
detectionOut.input.append('FirstStageBoxPredictor/ClassPredictor/softmax/flatten')
detectionOut.input.append('proposals')
text_format.Merge('i: 2', detectionOut.attr['num_classes'])
text_format.Merge('b: true', detectionOut.attr['share_location'])
text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
text_format.Merge('f: 0.7', detectionOut.attr['nms_threshold'])
text_format.Merge('i: 6000', detectionOut.attr['top_k'])
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
text_format.Merge('b: true', detectionOut.attr['clip'])
graph_def.node.extend([detectionOut])
# Save as text.
for node in reversed(topNodes):
if node.op != 'CropAndResize':
graph_def.node.extend([node])
topNodes.pop()
else:
if numCropAndResize == 1:
break
else:
graph_def.node.extend([node])
topNodes.pop()
numCropAndResize -= 1
addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def)
addSlice('SecondStageBoxPredictor/Reshape_1/softmax',
'SecondStageBoxPredictor/Reshape_1/slice',
[0, 0, 1], [-1, -1, -1], graph_def)
addReshape('SecondStageBoxPredictor/Reshape_1/slice',
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
# Replace Flatten subgraph onto a single node.
for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'CropAndResize':
graph_def.node[i].input.insert(1, 'detection_out')
if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
graph_def.node[i].input.pop()
graph_def.node[i].input.append('SecondStageBoxPredictor/Reshape/shape2')
if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']:
del graph_def.node[i]
for node in graph_def.node:
if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape':
node.op = 'Flatten'
node.input.pop()
if node.name in ['FirstStageBoxPredictor/BoxEncodingPredictor/Conv2D',
'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
text_format.Merge('b: true', node.attr["loc_pred_transposed"])
################################################################################
### Postprocessing
################################################################################
addSlice('detection_out', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def)
variance = NodeDef()
variance.name = 'proposals/variance'
variance.op = 'Const'
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), variance.attr["value"])
graph_def.node.extend([variance])
varianceEncoder = NodeDef()
varianceEncoder.name = 'variance_encoded'
varianceEncoder.op = 'Mul'
varianceEncoder.input.append('SecondStageBoxPredictor/Reshape')
varianceEncoder.input.append(variance.name)
text_format.Merge('i: 2', varianceEncoder.attr["axis"])
graph_def.node.extend([varianceEncoder])
addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def)
addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def)
detectionOut = NodeDef()
detectionOut.name = 'detection_out_final'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('variance_encoded/flatten')
detectionOut.input.append('SecondStageBoxPredictor/Reshape_1/Reshape')
detectionOut.input.append('detection_out/slice/reshape')
text_format.Merge('i: %d' % args.num_classes, detectionOut.attr['num_classes'])
text_format.Merge('b: false', detectionOut.attr['share_location'])
text_format.Merge('i: %d' % (args.num_classes + 1), detectionOut.attr['background_label_id'])
text_format.Merge('f: 0.6', detectionOut.attr['nms_threshold'])
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
text_format.Merge('b: true', detectionOut.attr['clip'])
text_format.Merge('b: true', detectionOut.attr['variance_encoded_in_target'])
text_format.Merge('f: 0.3', detectionOut.attr['confidence_threshold'])
text_format.Merge('b: false', detectionOut.attr['group_by_classes'])
graph_def.node.extend([detectionOut])
for node in reversed(topNodes):
graph_def.node.extend([node])
for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'CropAndResize':
graph_def.node[i].input.insert(1, 'detection_out_final')
break
graph_def.node[-1].name = 'detection_masks'
graph_def.node[-1].op = 'Sigmoid'
graph_def.node[-1].input.pop()
tf.train.write_graph(graph_def, "", args.output, as_text=True)

View File

@ -15,7 +15,7 @@ from math import sqrt
from tensorflow.core.framework.node_def_pb2 import NodeDef from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format from google.protobuf import text_format
from tf_text_graph_common import tensorMsg, addConstNode from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. ' 'SSD model from TensorFlow Object Detection API. '
@ -41,10 +41,6 @@ args = parser.parse_args()
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm', keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm',
'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity'] 'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity']
# Nodes attributes that could be removed because they are not used during import.
unusedAttrs = ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
# Node with which prefixes should be removed # Node with which prefixes should be removed
prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/') prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/')
@ -66,7 +62,6 @@ def getUnconnectedNodes():
unconnected.remove(inp) unconnected.remove(inp)
return unconnected return unconnected
removedNodes = []
# Detect unfused batch normalization nodes and fuse them. # Detect unfused batch normalization nodes and fuse them.
def fuse_batch_normalization(): def fuse_batch_normalization():
@ -118,41 +113,13 @@ def fuse_batch_normalization():
fuse_batch_normalization() fuse_batch_normalization()
# Removes Identity nodes removeIdentity(graph_def)
def removeIdentity():
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node: def to_remove(name, op):
for i in range(len(node.input)): return (not op in keepOps) or name.startswith(prefixesToRemove)
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
removeIdentity() removeUnusedNodesAndAttrs(to_remove, graph_def)
# Remove extra nodes and attributes.
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
if (not op in keepOps) or name.startswith(prefixesToRemove):
if op != 'Const':
removedNodes.append(name)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]
# Connect input node to the first layer # Connect input node to the first layer
assert(graph_def.node[0].op == 'Placeholder') assert(graph_def.node[0].op == 'Placeholder')
@ -175,8 +142,8 @@ def addConcatNode(name, inputs, axisNodeName):
concat.input.append(axisNodeName) concat.input.append(axisNodeName)
graph_def.node.extend([concat]) graph_def.node.extend([concat])
addConstNode('concat/axis_flatten', [-1]) addConstNode('concat/axis_flatten', [-1], graph_def)
addConstNode('PriorBox/concat/axis', [-2]) addConstNode('PriorBox/concat/axis', [-2], graph_def)
for label in ['ClassPredictor', 'BoxEncodingPredictor' if args.box_predictor is 'convolutional' else 'BoxPredictor']: for label in ['ClassPredictor', 'BoxEncodingPredictor' if args.box_predictor is 'convolutional' else 'BoxPredictor']:
concatInputs = [] concatInputs = []