mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
631b246881
@ -577,7 +577,7 @@ else()
|
||||
# Note: layout differs from OpenCV 3.4
|
||||
include(GNUInstallDirs)
|
||||
ocv_update(OPENCV_INCLUDE_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/opencv4")
|
||||
ocv_update(OPENCV_LIB_INSTALL_PATH "${CMAKE_INSTALL_LIBDIR}${LIB_SUFFIX}")
|
||||
ocv_update(OPENCV_LIB_INSTALL_PATH "${CMAKE_INSTALL_LIBDIR}")
|
||||
ocv_update(OPENCV_CONFIG_INSTALL_PATH "${OPENCV_LIB_INSTALL_PATH}/cmake/opencv4")
|
||||
ocv_update(OPENCV_3P_LIB_INSTALL_PATH "${OPENCV_LIB_INSTALL_PATH}/opencv4/3rdparty")
|
||||
ocv_update(OPENCV_SAMPLES_SRC_INSTALL_PATH "${CMAKE_INSTALL_DATAROOTDIR}/opencv4/samples")
|
||||
|
@ -144,6 +144,7 @@ if(DOXYGEN_FOUND)
|
||||
string(REPLACE ";" " " CMAKE_DOXYGEN_ENABLED_SECTIONS "${CMAKE_DOXYGEN_ENABLED_SECTIONS}")
|
||||
# TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders
|
||||
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${example_path} ; ${paths_doc} ; ${paths_sample}")
|
||||
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INCLUDE_ROOTS "${paths_include}")
|
||||
set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_BINARY_DIR}/DoxygenLayout.xml")
|
||||
set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen")
|
||||
set(CMAKE_DOXYGEN_MAIN_REFERENCE "${refs_main}")
|
||||
|
@ -22,8 +22,8 @@ ABBREVIATE_BRIEF = "The $name class" \
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = YES
|
||||
STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@/modules
|
||||
STRIP_FROM_INC_PATH =
|
||||
STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@/modules @CMAKE_DOXYGEN_INCLUDE_ROOTS@
|
||||
STRIP_FROM_INC_PATH = @CMAKE_DOXYGEN_INCLUDE_ROOTS@
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
QT_AUTOBRIEF = NO
|
||||
@ -72,8 +72,8 @@ INTERNAL_DOCS = NO
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
SHOW_GROUPED_MEMB_INC = NO
|
||||
FORCE_LOCAL_INCLUDES = YES
|
||||
SHOW_GROUPED_MEMB_INC = YES
|
||||
FORCE_LOCAL_INCLUDES = NO
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = YES
|
||||
SORT_BRIEF_DOCS = YES
|
||||
|
@ -53,8 +53,8 @@ import numpy as np
|
||||
import cv2 as cv
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
img1 = cv.imread('box.png',0) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',0) # trainImage
|
||||
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
|
||||
|
||||
# Initiate ORB detector
|
||||
orb = cv.ORB_create()
|
||||
@ -79,7 +79,7 @@ matches = bf.match(des1,des2)
|
||||
matches = sorted(matches, key = lambda x:x.distance)
|
||||
|
||||
# Draw first 10 matches.
|
||||
img3 = cv.drawMatches(img1,kp1,img2,kp2,matches[:10], flags=2)
|
||||
img3 = cv.drawMatches(img1,kp1,img2,kp2,matches[:10],None,flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
|
||||
|
||||
plt.imshow(img3),plt.show()
|
||||
@endcode
|
||||
@ -104,13 +104,13 @@ so that we can apply ratio test explained by D.Lowe in his paper.
|
||||
@code{.py}
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
img1 = cv.imread('box.png',0) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',0) # trainImage
|
||||
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
|
||||
|
||||
# Initiate SIFT detector
|
||||
sift = cv.SIFT()
|
||||
sift = cv.xfeatures2d.SIFT_create()
|
||||
|
||||
# find the keypoints and descriptors with SIFT
|
||||
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||
@ -118,7 +118,7 @@ kp2, des2 = sift.detectAndCompute(img2,None)
|
||||
|
||||
# BFMatcher with default params
|
||||
bf = cv.BFMatcher()
|
||||
matches = bf.knnMatch(des1,des2, k=2)
|
||||
matches = bf.knnMatch(des1,des2,k=2)
|
||||
|
||||
# Apply ratio test
|
||||
good = []
|
||||
@ -127,7 +127,7 @@ for m,n in matches:
|
||||
good.append([m])
|
||||
|
||||
# cv.drawMatchesKnn expects list of lists as matches.
|
||||
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,good,flags=2)
|
||||
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,good,None,flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
|
||||
|
||||
plt.imshow(img3),plt.show()
|
||||
@endcode
|
||||
@ -168,13 +168,13 @@ With this information, we are good to go.
|
||||
@code{.py}
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
img1 = cv.imread('box.png',0) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',0) # trainImage
|
||||
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
|
||||
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
|
||||
|
||||
# Initiate SIFT detector
|
||||
sift = cv.SIFT()
|
||||
sift = cv.xfeatures2d.SIFT_create()
|
||||
|
||||
# find the keypoints and descriptors with SIFT
|
||||
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||
@ -190,7 +190,7 @@ flann = cv.FlannBasedMatcher(index_params,search_params)
|
||||
matches = flann.knnMatch(des1,des2,k=2)
|
||||
|
||||
# Need to draw only good matches, so create a mask
|
||||
matchesMask = [[0,0] for i in xrange(len(matches))]
|
||||
matchesMask = [[0,0] for i in range(len(matches))]
|
||||
|
||||
# ratio test as per Lowe's paper
|
||||
for i,(m,n) in enumerate(matches):
|
||||
@ -200,7 +200,7 @@ for i,(m,n) in enumerate(matches):
|
||||
draw_params = dict(matchColor = (0,255,0),
|
||||
singlePointColor = (255,0,0),
|
||||
matchesMask = matchesMask,
|
||||
flags = 0)
|
||||
flags = cv.DrawMatchesFlags_DEFAULT)
|
||||
|
||||
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,matches,None,**draw_params)
|
||||
|
||||
|
@ -156,7 +156,7 @@ void CirclesGridClusterFinder::findGrid(const std::vector<cv::Point2f> &points,
|
||||
#endif
|
||||
|
||||
std::vector<Point2f> hull2f;
|
||||
convexHull(Mat(patternPoints), hull2f, false);
|
||||
convexHull(patternPoints, hull2f, false);
|
||||
const size_t cornersCount = isAsymmetricGrid ? 6 : 4;
|
||||
if(hull2f.size() < cornersCount)
|
||||
return;
|
||||
@ -407,7 +407,7 @@ void CirclesGridClusterFinder::rectifyPatternPoints(const std::vector<cv::Point2
|
||||
}
|
||||
}
|
||||
|
||||
Mat homography = findHomography(Mat(sortedCorners), Mat(idealPoints), 0);
|
||||
Mat homography = findHomography(sortedCorners, idealPoints, 0);
|
||||
Mat rectifiedPointsMat;
|
||||
transform(patternPoints, rectifiedPointsMat, homography);
|
||||
rectifiedPatternPoints.clear();
|
||||
@ -863,8 +863,8 @@ Mat CirclesGridFinder::rectifyGrid(Size detectedGridSize, const std::vector<Poin
|
||||
}
|
||||
}
|
||||
|
||||
Mat H = findHomography(Mat(centers), Mat(dstPoints), RANSAC);
|
||||
//Mat H = findHomography( Mat( corners ), Mat( dstPoints ) );
|
||||
Mat H = findHomography(centers, dstPoints, RANSAC);
|
||||
//Mat H = findHomography(corners, dstPoints);
|
||||
|
||||
if (H.empty())
|
||||
{
|
||||
@ -880,7 +880,7 @@ Mat CirclesGridFinder::rectifyGrid(Size detectedGridSize, const std::vector<Poin
|
||||
}
|
||||
|
||||
Mat dstKeypointsMat;
|
||||
transform(Mat(srcKeypoints), dstKeypointsMat, H);
|
||||
transform(srcKeypoints, dstKeypointsMat, H);
|
||||
std::vector<Point2f> dstKeypoints;
|
||||
convertPointsFromHomogeneous(dstKeypointsMat, dstKeypoints);
|
||||
|
||||
@ -1168,7 +1168,7 @@ void CirclesGridFinder::findBasis(const std::vector<Point2f> &samples, std::vect
|
||||
}
|
||||
for (size_t i = 0; i < basis.size(); i++)
|
||||
{
|
||||
convexHull(Mat(clusters[i]), hulls[i]);
|
||||
convexHull(clusters[i], hulls[i]);
|
||||
}
|
||||
|
||||
basisGraphs.resize(basis.size(), Graph(keypoints.size()));
|
||||
@ -1183,7 +1183,7 @@ void CirclesGridFinder::findBasis(const std::vector<Point2f> &samples, std::vect
|
||||
|
||||
for (size_t k = 0; k < hulls.size(); k++)
|
||||
{
|
||||
if (pointPolygonTest(Mat(hulls[k]), vec, false) >= 0)
|
||||
if (pointPolygonTest(hulls[k], vec, false) >= 0)
|
||||
{
|
||||
basisGraphs[k].addEdge(i, j);
|
||||
}
|
||||
@ -1414,7 +1414,6 @@ void CirclesGridFinder::drawHoles(const Mat &srcImage, Mat &drawImage) const
|
||||
if (i != holes.size() - 1)
|
||||
line(drawImage, keypoints[holes[i][j]], keypoints[holes[i + 1][j]], Scalar(255, 0, 0), 2);
|
||||
|
||||
//circle(drawImage, keypoints[holes[i][j]], holeRadius, holeColor, holeThickness);
|
||||
circle(drawImage, keypoints[holes[i][j]], holeRadius, holeColor, holeThickness);
|
||||
}
|
||||
}
|
||||
|
@ -185,6 +185,10 @@ bool HomographyDecompZhang::findMotionFrom_tstar_n(const cv::Vec3d& tstar, const
|
||||
temp(1, 1) += 1.0;
|
||||
temp(2, 2) += 1.0;
|
||||
motion.R = getHnorm() * temp.inv();
|
||||
if (cv::determinant(motion.R) < 0)
|
||||
{
|
||||
motion.R *= -1;
|
||||
}
|
||||
motion.t = motion.R * tstar;
|
||||
motion.n = n;
|
||||
return passesSameSideOfPlaneConstraint(motion);
|
||||
@ -312,6 +316,10 @@ void HomographyDecompInria::findRmatFrom_tstar_n(const cv::Vec3d& tstar, const c
|
||||
0.0, 0.0, 1.0);
|
||||
|
||||
R = getHnorm() * (I - (2/v) * tstar_m * n_m.t() );
|
||||
if (cv::determinant(R) < 0)
|
||||
{
|
||||
R *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
void HomographyDecompInria::decompose(std::vector<CameraMotion>& camMotions)
|
||||
|
@ -194,9 +194,8 @@ bool cv::find4QuadCornerSubpix(InputArray _img, InputOutputArray _corners, Size
|
||||
erode(white_comp, white_comp, Mat(), Point(-1, -1), erode_count);
|
||||
|
||||
std::vector<std::vector<Point> > white_contours, black_contours;
|
||||
std::vector<Vec4i> white_hierarchy, black_hierarchy;
|
||||
findContours(black_comp, black_contours, black_hierarchy, RETR_LIST, CHAIN_APPROX_SIMPLE);
|
||||
findContours(white_comp, white_contours, white_hierarchy, RETR_LIST, CHAIN_APPROX_SIMPLE);
|
||||
findContours(black_comp, black_contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
|
||||
findContours(white_comp, white_contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
|
||||
|
||||
if(black_contours.size() < 5 || white_contours.size() < 5) continue;
|
||||
|
||||
|
@ -1408,7 +1408,7 @@ bool CV_StereoCalibrationTest::checkPandROI( int test_case_idx, const Mat& M, co
|
||||
for( x = 0; x < N; x++ )
|
||||
pts.push_back(Point2f((float)x*imgsize.width/(N-1), (float)y*imgsize.height/(N-1)));
|
||||
|
||||
undistortPoints(Mat(pts), upts, M, D, R, P );
|
||||
undistortPoints(pts, upts, M, D, R, P );
|
||||
for( k = 0; k < N*N; k++ )
|
||||
if( upts[k].x < -imgsize.width*eps || upts[k].x > imgsize.width*(1+eps) ||
|
||||
upts[k].y < -imgsize.height*eps || upts[k].y > imgsize.height*(1+eps) )
|
||||
@ -1717,8 +1717,8 @@ void CV_StereoCalibrationTest::run( int )
|
||||
for( int i = 0, k = 0; i < nframes; i++ )
|
||||
{
|
||||
vector<Point2f> temp[2];
|
||||
undistortPoints(Mat(imgpt1[i]), temp[0], M1, D1, R1, P1);
|
||||
undistortPoints(Mat(imgpt2[i]), temp[1], M2, D2, R2, P2);
|
||||
undistortPoints(imgpt1[i], temp[0], M1, D1, R1, P1);
|
||||
undistortPoints(imgpt2[i], temp[1], M2, D2, R2, P2);
|
||||
|
||||
for( int j = 0; j < npoints; j++, k++ )
|
||||
{
|
||||
|
@ -353,7 +353,7 @@ protected:
|
||||
rvecs_spnp.resize(brdsNum);
|
||||
tvecs_spnp.resize(brdsNum);
|
||||
for(size_t i = 0; i < brdsNum; ++i)
|
||||
solvePnP(Mat(objectPoints[i]), Mat(imagePoints[i]), camMat, distCoeffs, rvecs_spnp[i], tvecs_spnp[i]);
|
||||
solvePnP(objectPoints[i], imagePoints[i], camMat, distCoeffs, rvecs_spnp[i], tvecs_spnp[i]);
|
||||
|
||||
compareShiftVecs(tvecs_exp, tvecs_spnp);
|
||||
compareRotationVecs(rvecs_exp, rvecs_spnp);
|
||||
|
@ -126,10 +126,10 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
|
||||
generateEdge(p3, p4, pts_square3d);
|
||||
generateEdge(p4, p1, pts_square3d);
|
||||
|
||||
projectPoints(Mat(pts_square3d), rvec, tvec, camMat, distCoeffs, pts_square2d);
|
||||
projectPoints(pts_square3d, rvec, tvec, camMat, distCoeffs, pts_square2d);
|
||||
squares_black.resize(squares_black.size() + 1);
|
||||
vector<Point2f> temp;
|
||||
approxPolyDP(Mat(pts_square2d), temp, 1.0, true);
|
||||
approxPolyDP(pts_square2d, temp, 1.0, true);
|
||||
transform(temp.begin(), temp.end(), back_inserter(squares_black.back()), Mult(rendererResolutionMultiplier));
|
||||
}
|
||||
|
||||
@ -139,7 +139,7 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
|
||||
for(int i = 0; i < patternSize.width - 1; ++i)
|
||||
corners3d.push_back(zero + (i + 1) * sqWidth * pb1 + (j + 1) * sqHeight * pb2);
|
||||
corners.clear();
|
||||
projectPoints(Mat(corners3d), rvec, tvec, camMat, distCoeffs, corners);
|
||||
projectPoints(corners3d, rvec, tvec, camMat, distCoeffs, corners);
|
||||
|
||||
vector<Point3f> whole3d;
|
||||
vector<Point2f> whole2d;
|
||||
@ -147,9 +147,9 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
|
||||
generateEdge(whole[1], whole[2], whole3d);
|
||||
generateEdge(whole[2], whole[3], whole3d);
|
||||
generateEdge(whole[3], whole[0], whole3d);
|
||||
projectPoints(Mat(whole3d), rvec, tvec, camMat, distCoeffs, whole2d);
|
||||
projectPoints(whole3d, rvec, tvec, camMat, distCoeffs, whole2d);
|
||||
vector<Point2f> temp_whole2d;
|
||||
approxPolyDP(Mat(whole2d), temp_whole2d, 1.0, true);
|
||||
approxPolyDP(whole2d, temp_whole2d, 1.0, true);
|
||||
|
||||
vector< vector<Point > > whole_contour(1);
|
||||
transform(temp_whole2d.begin(), temp_whole2d.end(),
|
||||
@ -213,7 +213,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
|
||||
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
|
||||
|
||||
/* can remake with better perf */
|
||||
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
|
||||
bool inrect1 = pts2d[0].x < bg.cols && pts2d[0].y < bg.rows && pts2d[0].x > 0 && pts2d[0].y > 0;
|
||||
bool inrect2 = pts2d[1].x < bg.cols && pts2d[1].y < bg.rows && pts2d[1].x > 0 && pts2d[1].y > 0;
|
||||
@ -278,7 +278,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
|
||||
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
|
||||
|
||||
/* can remake with better perf */
|
||||
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
|
||||
bool inrect1 = pts2d[0].x < bg.cols && pts2d[0].y < bg.rows && pts2d[0].x > 0 && pts2d[0].y > 0;
|
||||
bool inrect2 = pts2d[1].x < bg.cols && pts2d[1].y < bg.rows && pts2d[1].x > 0 && pts2d[1].y > 0;
|
||||
@ -320,7 +320,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
|
||||
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
|
||||
|
||||
/* can remake with better perf */
|
||||
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
|
||||
|
||||
Point3f zero = p - pb1 * cbHalfWidth - cbHalfHeight * pb2;
|
||||
|
||||
|
@ -134,4 +134,36 @@ private:
|
||||
|
||||
TEST(Calib3d_DecomposeHomography, regression) { CV_HomographyDecompTest test; test.safe_run(); }
|
||||
|
||||
|
||||
TEST(Calib3d_DecomposeHomography, issue_4978)
|
||||
{
|
||||
Matx33d K(
|
||||
1.0, 0.0, 0.0,
|
||||
0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 1.0
|
||||
);
|
||||
|
||||
Matx33d H(
|
||||
-0.102896, 0.270191, -0.0031153,
|
||||
0.0406387, 1.19569, -0.0120456,
|
||||
0.445351, 0.0410889, 1
|
||||
);
|
||||
|
||||
vector<Mat> rotations;
|
||||
vector<Mat> translations;
|
||||
vector<Mat> normals;
|
||||
|
||||
decomposeHomographyMat(H, K, rotations, translations, normals);
|
||||
|
||||
ASSERT_GT(rotations.size(), (size_t)0u);
|
||||
for (size_t i = 0; i < rotations.size(); i++)
|
||||
{
|
||||
// check: det(R) = 1
|
||||
EXPECT_TRUE(std::fabs(cv::determinant(rotations[i]) - 1.0) < 0.01)
|
||||
<< "R: det=" << cv::determinant(rotations[0]) << std::endl << rotations[i] << std::endl
|
||||
<< "T:" << std::endl << translations[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}} // namespace
|
||||
|
@ -124,7 +124,7 @@ protected:
|
||||
|
||||
vector<Point2f> projectedPoints;
|
||||
projectedPoints.resize(points.size());
|
||||
projectPoints(Mat(points), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
projectPoints(points, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
for (size_t i = 0; i < projectedPoints.size(); i++)
|
||||
{
|
||||
if (i % 20 == 0)
|
||||
@ -241,7 +241,7 @@ protected:
|
||||
|
||||
vector<Point2f> projectedPoints;
|
||||
projectedPoints.resize(opoints.size());
|
||||
projectPoints(Mat(opoints), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
|
||||
bool isEstimateSuccess = solvePnP(opoints, projectedPoints, intrinsics, distCoeffs, rvec, tvec, false, method);
|
||||
if (isEstimateSuccess == false)
|
||||
@ -291,7 +291,7 @@ class CV_solveP3P_Test : public CV_solvePnPRansac_Test
|
||||
|
||||
vector<Point2f> projectedPoints;
|
||||
projectedPoints.resize(opoints.size());
|
||||
projectPoints(Mat(opoints), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
|
||||
|
||||
int num_of_solutions = solveP3P(opoints, projectedPoints, intrinsics, distCoeffs, rvecs, tvecs, method);
|
||||
if (num_of_solutions != (int) rvecs.size() || num_of_solutions != (int) tvecs.size() || num_of_solutions == 0)
|
||||
|
@ -186,6 +186,16 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_ALWAYS_INLINE
|
||||
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
#define CV_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
#define CV_ALWAYS_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
|
||||
# define CV_ENABLE_UNROLLED 0
|
||||
#else
|
||||
|
@ -6,7 +6,7 @@
|
||||
#define OPENCV_DNN_VERSION_HPP
|
||||
|
||||
/// Use with major OpenCV version only.
|
||||
#define OPENCV_DNN_API_VERSION 20181221
|
||||
#define OPENCV_DNN_API_VERSION 20190122
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_INLINE_NS
|
||||
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
||||
|
@ -157,8 +157,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)))
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
|
||||
Mat(cv::Size(224, 224), CV_32FC3));
|
||||
@ -211,8 +210,7 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/dog416.png", false));
|
||||
Mat inp;
|
||||
@ -222,8 +220,11 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
|
||||
if (backend == DNN_BACKEND_HALIDE
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
|
||||
|| (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
#endif
|
||||
)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3));
|
||||
}
|
||||
|
@ -707,12 +707,6 @@ struct DataLayer : public Layer
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ScaleShift";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
|
||||
|
||||
CV_CheckEQ(inputsData.size(), (size_t)1, "");
|
||||
CV_CheckEQ(inputsData[0].dims, 4, "");
|
||||
const size_t numChannels = inputsData[0].size[1];
|
||||
@ -723,7 +717,6 @@ struct DataLayer : public Layer
|
||||
{numChannels});
|
||||
weights->allocate();
|
||||
weights->set(std::vector<float>(numChannels, scaleFactors[0]));
|
||||
ieLayer->_weights = weights;
|
||||
|
||||
// Mean subtraction
|
||||
auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
@ -735,8 +728,21 @@ struct DataLayer : public Layer
|
||||
biasesVec[i] = -means[0][i] * scaleFactors[0];
|
||||
}
|
||||
biases->set(biasesVec);
|
||||
ieLayer->_biases = biases;
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
|
||||
ieLayer.setWeights(weights);
|
||||
ieLayer.setBiases(biases);
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ScaleShift";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
|
||||
|
||||
ieLayer->_weights = weights;
|
||||
ieLayer->_biases = biases;
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
@ -1480,7 +1486,11 @@ struct Net::Impl
|
||||
if (layerNet != ieInpNode->net)
|
||||
{
|
||||
// layerNet is empty or nodes are from different graphs.
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
ieInpNode->net->addOutput(ieInpNode->layer.getName());
|
||||
#else
|
||||
ieInpNode->net->addOutput(ieInpNode->layer->name);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1590,7 +1600,7 @@ struct Net::Impl
|
||||
|
||||
// Build Inference Engine networks from sets of layers that support this
|
||||
// backend. Split a whole model on several Inference Engine networks if
|
||||
// some of layers is not implemented.
|
||||
// some of layers are not implemented.
|
||||
|
||||
// Set of all input and output blobs wrappers for current network.
|
||||
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
|
||||
@ -1606,7 +1616,7 @@ struct Net::Impl
|
||||
{
|
||||
addInfEngineNetOutputs(ld);
|
||||
net = Ptr<InfEngineBackendNet>();
|
||||
netBlobsWrappers.clear();
|
||||
netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
|
||||
layer->preferableTarget = DNN_TARGET_CPU;
|
||||
continue;
|
||||
}
|
||||
@ -1624,12 +1634,13 @@ struct Net::Impl
|
||||
if (ieInpNode->net != net)
|
||||
{
|
||||
net = Ptr<InfEngineBackendNet>();
|
||||
netBlobsWrappers.clear();
|
||||
netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
|
||||
// The same blobs wrappers cannot be shared between two Inference Engine
|
||||
// networks because of explicit references between layers and blobs.
|
||||
// So we need to rewrap all the external blobs.
|
||||
@ -1646,6 +1657,7 @@ struct Net::Impl
|
||||
ld.inputBlobsWrappers[i] = it->second;
|
||||
}
|
||||
netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
|
||||
#endif // IE < R5
|
||||
|
||||
Ptr<BackendNode> node;
|
||||
if (!net.empty())
|
||||
@ -1676,6 +1688,40 @@ struct Net::Impl
|
||||
CV_Assert(!ieNode.empty());
|
||||
ieNode->net = net;
|
||||
|
||||
// Convert weights in FP16 for specific targets.
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
|
||||
preferableTarget == DNN_TARGET_MYRIAD ||
|
||||
preferableTarget == DNN_TARGET_FPGA) && !fused)
|
||||
{
|
||||
auto& blobs = ieNode->layer.getConstantData();
|
||||
if (blobs.empty())
|
||||
{
|
||||
// In case of non weightable layer we have to specify
|
||||
// it's precision adding dummy blob.
|
||||
auto blob = InferenceEngine::make_shared_blob<int16_t>(
|
||||
InferenceEngine::Precision::FP16,
|
||||
InferenceEngine::Layout::C, {1});
|
||||
blob->allocate();
|
||||
blobs[""] = blob;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto& it : blobs)
|
||||
it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
|
||||
}
|
||||
}
|
||||
|
||||
if (!fused)
|
||||
net->addLayer(ieNode->layer);
|
||||
|
||||
net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
addInfEngineNetOutputs(ld);
|
||||
|
||||
#else // IE >= R5
|
||||
|
||||
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
|
||||
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
|
||||
preferableTarget == DNN_TARGET_MYRIAD ||
|
||||
@ -1713,10 +1759,10 @@ struct Net::Impl
|
||||
if (!fused)
|
||||
net->addLayer(ieNode->layer);
|
||||
addInfEngineNetOutputs(ld);
|
||||
#endif // IE >= R5
|
||||
}
|
||||
|
||||
// Initialize all networks.
|
||||
std::set<InfEngineBackendNet> initializedNets;
|
||||
for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
@ -2622,7 +2668,11 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
||||
Net cvNet;
|
||||
cvNet.setInputsNames(inputsNames);
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
|
||||
#else
|
||||
Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
|
||||
#endif
|
||||
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
|
||||
for (auto& it : ieNet.getOutputsInfo())
|
||||
{
|
||||
|
@ -349,6 +349,14 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
|
||||
|
||||
const size_t numChannels = weights_.total();
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C));
|
||||
ieLayer.setBiases(wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ScaleShift";
|
||||
@ -360,6 +368,7 @@ public:
|
||||
ieLayer->_biases = wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -110,6 +110,11 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::SplitLayer ieLayer(name);
|
||||
ieLayer.setOutputPorts({InferenceEngine::Port()});
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
CV_Assert(!input->dims.empty());
|
||||
|
||||
@ -123,6 +128,7 @@ public:
|
||||
ieLayer->params["out_sizes"] = format("%d", (int)input->dims[0]);
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -313,6 +313,14 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::Builder::ConcatLayer ieLayer(name);
|
||||
ieLayer.setAxis(clamp(axis, input->dims.size()));
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
@ -321,6 +329,7 @@ public:
|
||||
std::shared_ptr<InferenceEngine::ConcatLayer> ieLayer(new InferenceEngine::ConcatLayer(lp));
|
||||
ieLayer->_axis = clamp(axis, input->dims.size());
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -521,6 +521,54 @@ public:
|
||||
const int inpGroupCn = blobs[0].size[1];
|
||||
const int group = inpCn / inpGroupCn;
|
||||
|
||||
auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
if (newWeightAndBias)
|
||||
{
|
||||
if (weightsMat.isContinuous())
|
||||
{
|
||||
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
|
||||
ieWeights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
|
||||
}
|
||||
else
|
||||
{
|
||||
ieWeights = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
|
||||
ieWeights->dims());
|
||||
ieWeights->allocate();
|
||||
|
||||
Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn);
|
||||
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
|
||||
fusedWeights.copyTo(newWeights);
|
||||
}
|
||||
}
|
||||
InferenceEngine::Blob::Ptr ieBiases;
|
||||
if (hasBias() || fusedBias)
|
||||
{
|
||||
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
|
||||
ieBiases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C);
|
||||
}
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setDilation({dilation.height, dilation.width});
|
||||
ieLayer.setPaddingsBegin({pad.height, pad.width});
|
||||
ieLayer.setPaddingsEnd({pad.height, pad.width});
|
||||
ieLayer.setGroup(group);
|
||||
ieLayer.setOutDepth(outCn);
|
||||
|
||||
ieLayer.setWeights(ieWeights);
|
||||
if (ieBiases)
|
||||
ieLayer.setBiases(ieBiases);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (!padMode.empty())
|
||||
l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Convolution";
|
||||
@ -557,32 +605,11 @@ public:
|
||||
ieLayer->_out_depth = outCn;
|
||||
ieLayer->_group = group;
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
if (newWeightAndBias)
|
||||
{
|
||||
if (weightsMat.isContinuous())
|
||||
{
|
||||
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
|
||||
ieLayer->_weights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
|
||||
}
|
||||
else
|
||||
{
|
||||
ieLayer->_weights = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
|
||||
ieLayer->_weights->dims());
|
||||
ieLayer->_weights->allocate();
|
||||
|
||||
Mat newWeights = infEngineBlobToMat(ieLayer->_weights).reshape(1, outCn);
|
||||
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
|
||||
fusedWeights.copyTo(newWeights);
|
||||
}
|
||||
}
|
||||
if (hasBias() || fusedBias)
|
||||
{
|
||||
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C);
|
||||
}
|
||||
ieLayer->_weights = ieWeights;
|
||||
if (ieBiases)
|
||||
ieLayer->_biases = ieBiases;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -1193,6 +1220,9 @@ public:
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
if (INF_ENGINE_RELEASE == 2018050000 && (adjustPad.height || adjustPad.width))
|
||||
return false;
|
||||
|
||||
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
|
||||
const int group = numOutput / outGroupCn;
|
||||
if (group != 1)
|
||||
@ -1747,6 +1777,27 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
|
||||
const int group = numOutput / outGroupCn;
|
||||
|
||||
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setDilation({dilation.height, dilation.width});
|
||||
ieLayer.setPaddingsBegin({pad.height, pad.width});
|
||||
ieLayer.setPaddingsEnd({pad.height, pad.width});
|
||||
ieLayer.setGroup(group);
|
||||
ieLayer.setOutDepth(numOutput);
|
||||
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW));
|
||||
if (hasBias())
|
||||
{
|
||||
ieLayer.setBiases(wrapToInfEngineBlob(blobs[1], {(size_t)numOutput}, InferenceEngine::Layout::C));
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
|
||||
const int group = numOutput / outGroupCn;
|
||||
|
||||
@ -1786,6 +1837,7 @@ public:
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {(size_t)numOutput}, InferenceEngine::Layout::C);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -67,8 +67,12 @@ public:
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && crop_ranges.size() == 4);
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
return INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5) && crop_ranges.size() == 4;
|
||||
else
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -145,9 +149,10 @@ public:
|
||||
input(&crop_ranges[0]).copyTo(outputs[0]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Crop";
|
||||
@ -181,9 +186,11 @@ public:
|
||||
ieLayer->dim.push_back(crop_ranges[3].end - crop_ranges[3].start);
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
#else
|
||||
return Ptr<BackendNode>();
|
||||
#endif // IE < R5
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<Range> crop_ranges;
|
||||
};
|
||||
|
@ -939,6 +939,25 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::DetectionOutputLayer ieLayer(name);
|
||||
|
||||
ieLayer.setNumClasses(_numClasses);
|
||||
ieLayer.setShareLocation(_shareLocation);
|
||||
ieLayer.setBackgroudLabelId(_backgroundLabelId);
|
||||
ieLayer.setNMSThreshold(_nmsThreshold);
|
||||
ieLayer.setTopK(_topK);
|
||||
ieLayer.setKeepTopK(_keepTopK);
|
||||
ieLayer.setConfidenceThreshold(_confidenceThreshold);
|
||||
ieLayer.setVariantEncodedInTarget(_varianceEncodedInTarget);
|
||||
ieLayer.setCodeType("caffe.PriorBoxParameter." + _codeType);
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(3));
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
l.getParameters()["eta"] = std::string("1.0");
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "DetectionOutput";
|
||||
@ -956,6 +975,7 @@ public:
|
||||
ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0";
|
||||
ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -153,10 +153,16 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI();
|
||||
ieLayer.setName(this->name);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = this->name;
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(func.initInfEngine(lp)));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -355,6 +361,12 @@ struct ReLUFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope);
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "ReLU";
|
||||
@ -363,6 +375,7 @@ struct ReLUFunctor
|
||||
ieLayer->params["negative_slope"] = format("%f", slope);
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -472,6 +485,12 @@ struct ReLU6Functor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue);
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "Clamp";
|
||||
@ -482,6 +501,7 @@ struct ReLU6Functor
|
||||
ieLayer->params["max"] = format("%f", maxValue);
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -558,12 +578,19 @@ struct TanHFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::TanHLayer("");
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "TanH";
|
||||
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -640,12 +667,19 @@ struct SigmoidFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::SigmoidLayer("");
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "Sigmoid";
|
||||
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -724,11 +758,18 @@ struct ELUFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::ELULayer("");
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "ELU";
|
||||
return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -805,6 +846,12 @@ struct AbsValFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-1);
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "ReLU";
|
||||
@ -813,6 +860,7 @@ struct AbsValFunctor
|
||||
ieLayer->params["negative_slope"] = "-1.0";
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -868,11 +916,18 @@ struct BNLLFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "BNLL");
|
||||
return InferenceEngine::CNNLayerPtr();
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -985,6 +1040,14 @@ struct PowerFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
return InferenceEngine::Builder::PowerLayer("").setPower(power)
|
||||
.setScale(scale)
|
||||
.setShift(shift);
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
if (power == 1.0f && scale == 1.0f && shift == 0.0f)
|
||||
@ -1004,6 +1067,7 @@ struct PowerFunctor
|
||||
return ieLayer;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
@ -1143,6 +1207,15 @@ struct ChannelsPReLUFunctor
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
InferenceEngine::Builder::PReLULayer ieLayer("");
|
||||
const size_t numChannels = scale.total();
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C));
|
||||
return ieLayer;
|
||||
}
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "PReLU";
|
||||
@ -1151,6 +1224,7 @@ struct ChannelsPReLUFunctor
|
||||
ieLayer->_weights = wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C);
|
||||
return ieLayer;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
|
@ -99,7 +99,7 @@ public:
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE &&
|
||||
(preferableTarget != DNN_TARGET_MYRIAD || coeffs.empty()));
|
||||
(preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()));
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -420,9 +420,29 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::EltwiseLayer ieLayer(name);
|
||||
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
|
||||
|
||||
if (op == SUM)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
|
||||
else if (op == PROD)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
|
||||
else if (op == MAX)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (!coeffs.empty())
|
||||
l.getParameters()["coeff"] = coeffs;
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Eltwise";
|
||||
@ -438,6 +458,7 @@ public:
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -152,9 +152,19 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
ieLayer.setType("Flatten");
|
||||
ieLayer.getParameters()["axis"] = _startAxis;
|
||||
ieLayer.getParameters()["end_axis"] = _endAxis;
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Flatten";
|
||||
@ -163,6 +173,7 @@ public:
|
||||
ieLayer->params["axis"] = format("%d", _startAxis);
|
||||
ieLayer->params["end_axis"] = format("%d", _endAxis);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -442,6 +442,18 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
|
||||
|
||||
const int outNum = blobs[0].size[0];
|
||||
ieLayer.setOutputNum(outNum);
|
||||
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW));
|
||||
if (blobs.size() > 1)
|
||||
ieLayer.setBiases(wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C));
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "FullyConnected";
|
||||
@ -456,6 +468,7 @@ public:
|
||||
if (blobs.size() > 1)
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {(size_t)ieLayer->_out_num}, InferenceEngine::Layout::C);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -393,6 +393,17 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::NormLayer ieLayer(name);
|
||||
ieLayer.setSize(size);
|
||||
ieLayer.setAlpha(alpha);
|
||||
ieLayer.setBeta(beta);
|
||||
ieLayer.setAcrossMaps(type == CHANNEL_NRM);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
l.getParameters()["k"] = bias;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Norm";
|
||||
@ -405,6 +416,7 @@ public:
|
||||
ieLayer->_alpha = alpha;
|
||||
ieLayer->_isAcrossMaps = (type == CHANNEL_NRM);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -371,6 +371,13 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::MVNLayer ieLayer(name);
|
||||
ieLayer.setAcrossChannels(acrossChannels);
|
||||
ieLayer.setNormalize(normVariance);
|
||||
ieLayer.setEpsilon(eps);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "MVN";
|
||||
@ -380,6 +387,7 @@ public:
|
||||
ieLayer->params["normalize_variance"] = normVariance ? "1" : "0";
|
||||
ieLayer->params["eps"] = format("%f", eps);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -264,6 +264,49 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
if (input->dims.size() == 4)
|
||||
{
|
||||
InferenceEngine::Builder::NormalizeLayer ieLayer(name);
|
||||
|
||||
ieLayer.setChannelShared(false);
|
||||
ieLayer.setAcrossMaps(acrossSpatial);
|
||||
ieLayer.setEpsilon(epsilon);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
const int numChannels = input->dims[2]; // NOTE: input->dims are reversed (whcn)
|
||||
if (blobs.empty())
|
||||
{
|
||||
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Layout::C,
|
||||
{(size_t)numChannels});
|
||||
weights->allocate();
|
||||
std::vector<float> ones(numChannels, 1);
|
||||
weights->set(ones);
|
||||
l.addConstantData("weights", weights);
|
||||
l.getParameters()["channel_shared"] = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(numChannels == blobs[0].total());
|
||||
l.addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C));
|
||||
l.getParameters()["channel_shared"] = blobs[0].total() == 1;
|
||||
}
|
||||
l.getParameters()["across_spatial"] = acrossSpatial;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceEngine::Builder::GRNLayer ieLayer(name);
|
||||
ieLayer.setBeta(epsilon);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
l.getParameters()["bias"] = epsilon;
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#else
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::LayerParams lp;
|
||||
@ -307,6 +350,7 @@ public:
|
||||
ieLayer->params["bias"] = format("%f", epsilon);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -385,6 +385,11 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::PermuteLayer ieLayer(name);
|
||||
ieLayer.setOrder(_order);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Permute";
|
||||
@ -397,6 +402,7 @@ public:
|
||||
ieLayer->params["order"] += format(",%zu", _order[i]);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -295,6 +295,48 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
if (type == MAX || type == AVE)
|
||||
{
|
||||
InferenceEngine::Builder::PoolingLayer ieLayer(name);
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setPaddingsBegin({pad_t, pad_l});
|
||||
ieLayer.setPaddingsEnd({pad_b, pad_r});
|
||||
ieLayer.setPoolingType(type == MAX ?
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
|
||||
ieLayer.setRoundingType(ceilMode ?
|
||||
InferenceEngine::Builder::PoolingLayer::RoundingType::CEIL :
|
||||
InferenceEngine::Builder::PoolingLayer::RoundingType::FLOOR);
|
||||
ieLayer.setExcludePad(type == AVE && padMode == "SAME");
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (!padMode.empty())
|
||||
l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
else if (type == ROI)
|
||||
{
|
||||
InferenceEngine::Builder::ROIPoolingLayer ieLayer(name);
|
||||
ieLayer.setSpatialScale(spatialScale);
|
||||
ieLayer.setPooled({pooledSize.height, pooledSize.width});
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
else if (type == PSROI)
|
||||
{
|
||||
InferenceEngine::Builder::PSROIPoolingLayer ieLayer(name);
|
||||
ieLayer.setSpatialScale(spatialScale);
|
||||
ieLayer.setOutputDim(psRoiOutChannels);
|
||||
ieLayer.setGroupSize(pooledSize.width);
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
|
||||
return Ptr<BackendNode>();
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
@ -353,6 +395,7 @@ public:
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -498,6 +498,58 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
if (_explicitSizes)
|
||||
{
|
||||
InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
|
||||
|
||||
CV_Assert(_stepX == _stepY);
|
||||
ieLayer.setStep(_stepX);
|
||||
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
ieLayer.setOffset(_offsetsX[0]);
|
||||
|
||||
ieLayer.setClip(_clip);
|
||||
ieLayer.setFlip(false); // We already flipped aspect ratios.
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
|
||||
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
|
||||
CV_Assert(_boxWidths.size() == _boxHeights.size());
|
||||
l.getParameters()["width"] = _boxWidths;
|
||||
l.getParameters()["height"] = _boxHeights;
|
||||
l.getParameters()["variance"] = _variance;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
|
||||
|
||||
CV_Assert(!_explicitSizes);
|
||||
|
||||
ieLayer.setMinSize(_minSize);
|
||||
if (_maxSize > 0)
|
||||
ieLayer.setMaxSize(_maxSize);
|
||||
|
||||
CV_Assert(_stepX == _stepY);
|
||||
ieLayer.setStep(_stepX);
|
||||
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
ieLayer.setOffset(_offsetsX[0]);
|
||||
|
||||
ieLayer.setClip(_clip);
|
||||
ieLayer.setFlip(false); // We already flipped aspect ratios.
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (!_aspectRatios.empty())
|
||||
{
|
||||
l.getParameters()["aspect_ratio"] = _aspectRatios;
|
||||
}
|
||||
CV_Assert(!_variance.empty());
|
||||
l.getParameters()["variance"] = _variance;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = _explicitSizes ? "PriorBoxClustered" : "PriorBox";
|
||||
@ -553,6 +605,7 @@ public:
|
||||
ieLayer->params["offset"] = format("%f", _offsetsX[0]);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -328,6 +328,28 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ProposalLayer ieLayer(name);
|
||||
|
||||
ieLayer.setBaseSize(baseSize);
|
||||
ieLayer.setFeatStride(featStride);
|
||||
ieLayer.setMinSize(16);
|
||||
ieLayer.setNMSThresh(nmsThreshold);
|
||||
ieLayer.setPostNMSTopN(keepTopAfterNMS);
|
||||
ieLayer.setPreNMSTopN(keepTopBeforeNMS);
|
||||
|
||||
std::vector<float> scalesVec(scales.size());
|
||||
for (int i = 0; i < scales.size(); ++i)
|
||||
scalesVec[i] = scales.get<float>(i);
|
||||
ieLayer.setScale(scalesVec);
|
||||
|
||||
std::vector<float> ratiosVec(ratios.size());
|
||||
for (int i = 0; i < ratios.size(); ++i)
|
||||
ratiosVec[i] = ratios.get<float>(i);
|
||||
ieLayer.setRatio(ratiosVec);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Proposal";
|
||||
@ -353,6 +375,7 @@ public:
|
||||
ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -181,6 +181,11 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
|
||||
ieLayer.setStride(reorgStride);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ReorgYolo";
|
||||
@ -188,6 +193,7 @@ public:
|
||||
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
|
||||
ieLayer->params["stride"] = format("%d", reorgStride);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -203,6 +203,17 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> outputs;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(!outputs.empty());
|
||||
outShapes.resize(outputs.size());
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
outShapes[i] = shape(outputs[i]);
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
@ -218,8 +229,7 @@ public:
|
||||
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
|
||||
if (src_handle != dst_handle)
|
||||
{
|
||||
MatShape outShape = shape(outputs[i]);
|
||||
UMat umat = srcBlob.reshape(1, (int)outShape.size(), &outShape[0]);
|
||||
UMat umat = srcBlob.reshape(1, (int)outShapes[i].size(), &outShapes[i][0]);
|
||||
umat.copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
@ -250,6 +260,12 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ReshapeLayer ieLayer(name);
|
||||
CV_Assert(outShapes.size() == 1);
|
||||
ieLayer.setDims(outShapes[0]);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Reshape";
|
||||
@ -265,9 +281,13 @@ public:
|
||||
ieLayer->shape = std::vector<int>(shapeSrc->dims.rbegin(), shapeSrc->dims.rend());
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<MatShape> outShapes;
|
||||
};
|
||||
|
||||
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
|
||||
|
@ -163,6 +163,33 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
if (interpolation == "nearest")
|
||||
{
|
||||
ieLayer.setType("Resample");
|
||||
ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
|
||||
ieLayer.getParameters()["antialias"] = false;
|
||||
if (scaleWidth != scaleHeight)
|
||||
CV_Error(Error::StsNotImplemented, "resample with sw != sh");
|
||||
ieLayer.getParameters()["factor"] = 1.0 / scaleWidth;
|
||||
}
|
||||
else if (interpolation == "bilinear")
|
||||
{
|
||||
ieLayer.setType("Interp");
|
||||
ieLayer.getParameters()["pad_beg"] = 0;
|
||||
ieLayer.getParameters()["pad_end"] = 0;
|
||||
ieLayer.getParameters()["align_corners"] = false;
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
|
||||
ieLayer.getParameters()["width"] = outWidth;
|
||||
ieLayer.getParameters()["height"] = outHeight;
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
@ -187,6 +214,7 @@ public:
|
||||
ieLayer->params["width"] = cv::format("%d", outWidth);
|
||||
ieLayer->params["height"] = cv::format("%d", outHeight);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -247,6 +275,18 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
ieLayer.setType("Interp");
|
||||
ieLayer.getParameters()["pad_beg"] = 0;
|
||||
ieLayer.getParameters()["pad_end"] = 0;
|
||||
ieLayer.getParameters()["width"] = outWidth;
|
||||
ieLayer.getParameters()["height"] = outHeight;
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Interp";
|
||||
@ -256,6 +296,7 @@ public:
|
||||
ieLayer->params["pad_beg"] = "0";
|
||||
ieLayer->params["pad_end"] = "0";
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -197,6 +197,29 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
|
||||
|
||||
CV_Assert(!blobs.empty());
|
||||
const size_t numChannels = blobs[0].total();
|
||||
if (hasWeights)
|
||||
{
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
{numChannels});
|
||||
weights->allocate();
|
||||
|
||||
std::vector<float> ones(numChannels, 1);
|
||||
weights->set(ones);
|
||||
ieLayer.setWeights(weights);
|
||||
}
|
||||
if (hasBias)
|
||||
ieLayer.setBiases(wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ScaleShift";
|
||||
@ -223,6 +246,7 @@ public:
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -110,8 +110,15 @@ public:
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && sliceRanges.size() == 1 && sliceRanges[0].size() == 4);
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
return INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5) &&
|
||||
sliceRanges.size() == 1 && sliceRanges[0].size() == 4;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -254,9 +261,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
@ -286,10 +294,11 @@ public:
|
||||
ieLayer->dim.push_back(sliceRanges[0][i].end - sliceRanges[0][i].start);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
|
||||
#endif // HAVE_INF_ENGINE
|
||||
#else
|
||||
return Ptr<BackendNode>();
|
||||
#endif // IE < R5
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)
|
||||
|
@ -326,6 +326,13 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
|
||||
ieLayer.setAxis(clamp(axisRaw, input->dims.size()));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::LayerParams lp;
|
||||
@ -335,6 +342,7 @@ public:
|
||||
std::shared_ptr<InferenceEngine::SoftMaxLayer> ieLayer(new InferenceEngine::SoftMaxLayer(lp));
|
||||
ieLayer->axis = clamp(axisRaw, input->dims.size());
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
@ -18,6 +18,10 @@ namespace cv { namespace dnn {
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::Builder::Layer& _layer)
|
||||
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
|
||||
#else
|
||||
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& _layer)
|
||||
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
|
||||
|
||||
@ -40,6 +44,7 @@ void InfEngineBackendNode::connect(std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
layer->outData[0] = dataPtr;
|
||||
dataPtr->creatorLayer = InferenceEngine::CNNLayerWeakPtr(layer);
|
||||
}
|
||||
#endif
|
||||
|
||||
static std::vector<Ptr<InfEngineBackendWrapper> >
|
||||
infEngineWrappers(const std::vector<Ptr<BackendWrapper> >& ptrs)
|
||||
@ -54,6 +59,129 @@ infEngineWrappers(const std::vector<Ptr<BackendWrapper> >& ptrs)
|
||||
return wrappers;
|
||||
}
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
|
||||
InfEngineBackendNet::InfEngineBackendNet() : netBuilder("")
|
||||
{
|
||||
hasNetOwner = false;
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
}
|
||||
|
||||
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) : netBuilder(""), cnn(net)
|
||||
{
|
||||
hasNetOwner = true;
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendWrapper> >& outputs,
|
||||
const std::string& layerName)
|
||||
{
|
||||
std::vector<Ptr<InfEngineBackendWrapper> > inpWrappers = infEngineWrappers(inputs);
|
||||
std::map<std::string, int>::iterator it = layers.find(layerName);
|
||||
CV_Assert(it != layers.end());
|
||||
|
||||
const int layerId = it->second;
|
||||
for (int i = 0; i < inpWrappers.size(); ++i)
|
||||
{
|
||||
const auto& inp = inpWrappers[i];
|
||||
const std::string& inpName = inp->dataPtr->name;
|
||||
int inpId;
|
||||
it = layers.find(inpName);
|
||||
if (it == layers.end())
|
||||
{
|
||||
InferenceEngine::Builder::InputLayer inpLayer(inpName);
|
||||
|
||||
std::vector<size_t> shape(inp->blob->dims());
|
||||
std::reverse(shape.begin(), shape.end());
|
||||
|
||||
inpLayer.setPort(InferenceEngine::Port(shape));
|
||||
inpId = netBuilder.addLayer(inpLayer);
|
||||
|
||||
layers.insert({inpName, inpId});
|
||||
}
|
||||
else
|
||||
inpId = it->second;
|
||||
|
||||
netBuilder.connect(inpId, {layerId, i});
|
||||
unconnectedLayersIds.erase(inpId);
|
||||
}
|
||||
CV_Assert(!outputs.empty());
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(outputs[0]);
|
||||
dataPtr->name = layerName;
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::init(int targetId)
|
||||
{
|
||||
if (!hasNetOwner)
|
||||
{
|
||||
CV_Assert(!unconnectedLayersIds.empty());
|
||||
for (int id : unconnectedLayersIds)
|
||||
{
|
||||
InferenceEngine::Builder::OutputLayer outLayer("myconv1");
|
||||
netBuilder.addLayer({id}, outLayer);
|
||||
}
|
||||
cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build()));
|
||||
}
|
||||
|
||||
switch (targetId)
|
||||
{
|
||||
case DNN_TARGET_CPU:
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
break;
|
||||
case DNN_TARGET_OPENCL: case DNN_TARGET_OPENCL_FP16:
|
||||
targetDevice = InferenceEngine::TargetDevice::eGPU;
|
||||
break;
|
||||
case DNN_TARGET_MYRIAD:
|
||||
targetDevice = InferenceEngine::TargetDevice::eMYRIAD;
|
||||
break;
|
||||
case DNN_TARGET_FPGA:
|
||||
targetDevice = InferenceEngine::TargetDevice::eFPGA;
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsError, format("Unknown target identifier: %d", targetId));
|
||||
}
|
||||
|
||||
for (const auto& name : requestedOutputs)
|
||||
{
|
||||
cnn.addOutput(name);
|
||||
}
|
||||
|
||||
for (const auto& it : cnn.getInputsInfo())
|
||||
{
|
||||
const std::string& name = it.first;
|
||||
auto blobIt = allBlobs.find(name);
|
||||
CV_Assert(blobIt != allBlobs.end());
|
||||
inpBlobs[name] = blobIt->second;
|
||||
it.second->setPrecision(blobIt->second->precision());
|
||||
}
|
||||
for (const auto& it : cnn.getOutputsInfo())
|
||||
{
|
||||
const std::string& name = it.first;
|
||||
auto blobIt = allBlobs.find(name);
|
||||
CV_Assert(blobIt != allBlobs.end());
|
||||
outBlobs[name] = blobIt->second;
|
||||
it.second->setPrecision(blobIt->second->precision()); // Should be always FP32
|
||||
}
|
||||
|
||||
initPlugin(cnn);
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::addLayer(const InferenceEngine::Builder::Layer& layer)
|
||||
{
|
||||
int id = netBuilder.addLayer(layer);
|
||||
const std::string& layerName = layer.getName();
|
||||
CV_Assert(layers.insert({layerName, id}).second);
|
||||
unconnectedLayersIds.insert(id);
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::addOutput(const std::string& name)
|
||||
{
|
||||
requestedOutputs.push_back(name);
|
||||
}
|
||||
|
||||
#endif // IE >= R5
|
||||
|
||||
static InferenceEngine::Layout estimateLayout(const Mat& m)
|
||||
{
|
||||
if (m.dims == 4)
|
||||
@ -148,6 +276,7 @@ void InfEngineBackendWrapper::setHostDirty()
|
||||
|
||||
}
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
|
||||
InfEngineBackendNet::InfEngineBackendNet()
|
||||
{
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
@ -491,6 +620,8 @@ void InfEngineBackendNet::init(int targetId)
|
||||
initPlugin(*this);
|
||||
}
|
||||
|
||||
#endif // IE < R5
|
||||
|
||||
static std::map<InferenceEngine::TargetDevice, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
|
||||
|
||||
void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
|
||||
@ -566,7 +697,11 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
|
||||
auto wrappers = infEngineWrappers(ptrs);
|
||||
for (const auto& wrapper : wrappers)
|
||||
{
|
||||
allBlobs.insert({wrapper->dataPtr->name, wrapper->blob});
|
||||
std::string name = wrapper->dataPtr->name;
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
name = name.empty() ? "id1" : name; // TODO: drop the magic input name.
|
||||
#endif
|
||||
allBlobs.insert({name, wrapper->blob});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,11 @@
|
||||
|
||||
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
|
||||
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
|
||||
#define INF_ENGINE_VER_MAJOR_LT(ver) (((INF_ENGINE_RELEASE) / 10000) < ((ver) / 10000))
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
#include <ie_builders.hpp>
|
||||
#endif
|
||||
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
@ -42,6 +47,7 @@ namespace cv { namespace dnn {
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
|
||||
class InfEngineBackendNet : public InferenceEngine::ICNNNetwork
|
||||
{
|
||||
public:
|
||||
@ -146,17 +152,75 @@ private:
|
||||
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
||||
};
|
||||
|
||||
#else // IE < R5
|
||||
|
||||
class InfEngineBackendNet
|
||||
{
|
||||
public:
|
||||
InfEngineBackendNet();
|
||||
|
||||
InfEngineBackendNet(InferenceEngine::CNNNetwork& net);
|
||||
|
||||
void addLayer(const InferenceEngine::Builder::Layer& layer);
|
||||
|
||||
void addOutput(const std::string& name);
|
||||
|
||||
void connect(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendWrapper> >& outputs,
|
||||
const std::string& layerName);
|
||||
|
||||
bool isInitialized();
|
||||
|
||||
void init(int targetId);
|
||||
|
||||
void forward();
|
||||
|
||||
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
||||
|
||||
void addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs);
|
||||
|
||||
private:
|
||||
InferenceEngine::Builder::Network netBuilder;
|
||||
|
||||
InferenceEngine::InferenceEnginePluginPtr enginePtr;
|
||||
InferenceEngine::InferencePlugin plugin;
|
||||
InferenceEngine::ExecutableNetwork netExec;
|
||||
InferenceEngine::InferRequest infRequest;
|
||||
InferenceEngine::BlobMap allBlobs;
|
||||
InferenceEngine::BlobMap inpBlobs;
|
||||
InferenceEngine::BlobMap outBlobs;
|
||||
InferenceEngine::TargetDevice targetDevice;
|
||||
|
||||
InferenceEngine::CNNNetwork cnn;
|
||||
bool hasNetOwner;
|
||||
|
||||
std::map<std::string, int> layers;
|
||||
std::vector<std::string> requestedOutputs;
|
||||
|
||||
std::set<int> unconnectedLayersIds;
|
||||
};
|
||||
#endif // IE < R5
|
||||
|
||||
class InfEngineBackendNode : public BackendNode
|
||||
{
|
||||
public:
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InfEngineBackendNode(const InferenceEngine::Builder::Layer& layer);
|
||||
#else
|
||||
InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& layer);
|
||||
#endif
|
||||
|
||||
void connect(std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
std::vector<Ptr<BackendWrapper> >& outputs);
|
||||
|
||||
InferenceEngine::CNNLayerPtr layer;
|
||||
// Inference Engine network object that allows to obtain the outputs of this layer.
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer layer;
|
||||
Ptr<InfEngineBackendNet> net;
|
||||
#else
|
||||
InferenceEngine::CNNLayerPtr layer;
|
||||
Ptr<InfEngineBackendNet> net;
|
||||
#endif
|
||||
};
|
||||
|
||||
class InfEngineBackendWrapper : public BackendWrapper
|
||||
|
@ -1,10 +1,2 @@
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#if defined(TH_DISABLE_HEAP_TRACKING)
|
||||
#elif (defined(__unix) || defined(_WIN32))
|
||||
#include <malloc.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
#include "THGeneral.h"
|
||||
|
@ -180,7 +180,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 0.0;
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 2e-5;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
|
||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf, 0.25);
|
||||
@ -288,7 +288,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
|
||||
// Output image has values in range [-143.526, 148.539].
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.3 : 4e-5;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.0 : 2e-3;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.28 : 2e-3;
|
||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
|
||||
}
|
||||
|
||||
|
@ -306,7 +306,7 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
|
||||
// batch size 1
|
||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff);
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
|
||||
#endif
|
||||
// batch size 2
|
||||
|
@ -163,7 +163,7 @@ TEST_P(Deconvolution, Accuracy)
|
||||
bool hasBias = get<6>(GetParam());
|
||||
Backend backendId = get<0>(get<7>(GetParam()));
|
||||
Target targetId = get<1>(get<7>(GetParam()));
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && (targetId == DNN_TARGET_CPU || targetId == DNN_TARGET_MYRIAD) &&
|
||||
dilation.width == 2 && dilation.height == 2)
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
|
||||
@ -466,6 +466,7 @@ void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId)
|
||||
pool.set("stride_w", 2);
|
||||
pool.set("stride_h", 2);
|
||||
pool.type = "Pooling";
|
||||
pool.name = "ave_pool";
|
||||
|
||||
Net net;
|
||||
int poolId = net.addLayer(pool.name, pool.type, pool);
|
||||
|
@ -295,10 +295,6 @@ TEST_P(Test_Caffe_layers, Eltwise)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
|
||||
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
|
||||
#endif
|
||||
testLayerUsingCaffeModels("layer_eltwise");
|
||||
}
|
||||
|
||||
|
@ -351,6 +351,10 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
|
||||
l1 = 0.009;
|
||||
lInf = 0.035;
|
||||
}
|
||||
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU) {
|
||||
l1 = 4.5e-5;
|
||||
lInf = 1.9e-4;
|
||||
}
|
||||
testONNXModels("LResNet100E_IR", pb, l1, lInf);
|
||||
}
|
||||
|
||||
@ -366,6 +370,10 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus)
|
||||
l1 = 0.021;
|
||||
lInf = 0.034;
|
||||
}
|
||||
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_CPU || target == DNN_TARGET_OPENCL)) {
|
||||
l1 = 2.4e-4;
|
||||
lInf = 6e-4;
|
||||
}
|
||||
testONNXModels("emotion_ferplus", pb, l1, lInf);
|
||||
}
|
||||
|
||||
@ -389,7 +397,7 @@ TEST_P(Test_ONNX_nets, Inception_v1)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
|
||||
#endif
|
||||
testONNXModels("inception_v1", pb);
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ TEST(Test_TensorFlow, read_inception)
|
||||
ASSERT_TRUE(!sample.empty());
|
||||
Mat input;
|
||||
resize(sample, input, Size(224, 224));
|
||||
input -= 128; // mean sub
|
||||
input -= Scalar::all(117); // mean sub
|
||||
|
||||
Mat inputBlob = blobFromImage(input);
|
||||
|
||||
@ -351,8 +351,8 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy"));
|
||||
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1e-5;
|
||||
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0098 : 1e-3;
|
||||
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1.5e-5;
|
||||
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3;
|
||||
normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
@ -366,6 +366,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("");
|
||||
|
||||
double scoresDiff = backend == DNN_BACKEND_INFERENCE_ENGINE ? 2.9e-5 : 1e-5;
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
|
||||
@ -381,7 +382,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/" + names[i] + ".detection_out.npy"));
|
||||
normAssertDetections(ref, out, names[i].c_str(), 0.3);
|
||||
normAssertDetections(ref, out, names[i].c_str(), 0.3, scoresDiff);
|
||||
}
|
||||
}
|
||||
|
||||
@ -406,7 +407,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
|
||||
net.setInput(blob);
|
||||
Mat out = net.forward();
|
||||
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : default_l1;
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 1.1e-5;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.021 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.4, scoreDiff, iouDiff);
|
||||
}
|
||||
@ -568,10 +569,6 @@ TEST_P(Test_TensorFlow_layers, slice)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
|
||||
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
runTensorFlowNet("slice_4d");
|
||||
}
|
||||
|
||||
|
@ -260,6 +260,11 @@ TEST_P(Test_Torch_layers, run_paralel)
|
||||
|
||||
TEST_P(Test_Torch_layers, net_residual)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL ||
|
||||
target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
|
||||
#endif
|
||||
runTorchNet("net_residual", "", false, true);
|
||||
}
|
||||
|
||||
@ -390,10 +395,6 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
|
||||
// -model models/instance_norm/feathers.t7
|
||||
TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
checkBackend();
|
||||
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
|
||||
"dnn/fast_neural_style_instance_norm_feathers.t7"};
|
||||
|
@ -197,8 +197,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
|
||||
centers.clear();
|
||||
|
||||
std::vector < std::vector<Point> > contours;
|
||||
Mat tmpBinaryImage = binaryImage.clone();
|
||||
findContours(tmpBinaryImage, contours, RETR_LIST, CHAIN_APPROX_NONE);
|
||||
findContours(binaryImage, contours, RETR_LIST, CHAIN_APPROX_NONE);
|
||||
|
||||
#ifdef DEBUG_BLOB_DETECTOR
|
||||
// Mat keypointsImage;
|
||||
@ -214,7 +213,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
|
||||
{
|
||||
Center center;
|
||||
center.confidence = 1;
|
||||
Moments moms = moments(Mat(contours[contourIdx]));
|
||||
Moments moms = moments(contours[contourIdx]);
|
||||
if (params.filterByArea)
|
||||
{
|
||||
double area = moms.m00;
|
||||
@ -225,7 +224,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
|
||||
if (params.filterByCircularity)
|
||||
{
|
||||
double area = moms.m00;
|
||||
double perimeter = arcLength(Mat(contours[contourIdx]), true);
|
||||
double perimeter = arcLength(contours[contourIdx], true);
|
||||
double ratio = 4 * CV_PI * area / (perimeter * perimeter);
|
||||
if (ratio < params.minCircularity || ratio >= params.maxCircularity)
|
||||
continue;
|
||||
@ -261,9 +260,9 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
|
||||
if (params.filterByConvexity)
|
||||
{
|
||||
std::vector < Point > hull;
|
||||
convexHull(Mat(contours[contourIdx]), hull);
|
||||
double area = contourArea(Mat(contours[contourIdx]));
|
||||
double hullArea = contourArea(Mat(hull));
|
||||
convexHull(contours[contourIdx], hull);
|
||||
double area = contourArea(contours[contourIdx]);
|
||||
double hullArea = contourArea(hull);
|
||||
if (fabs(hullArea) < DBL_EPSILON)
|
||||
continue;
|
||||
double ratio = area / hullArea;
|
||||
|
@ -84,4 +84,26 @@ PERF_TEST_P(TestFindContoursFF, findContours,
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
typedef TestBaseWithParam< tuple<MatDepth, int> > TestBoundingRect;
|
||||
|
||||
PERF_TEST_P(TestBoundingRect, BoundingRect,
|
||||
Combine(
|
||||
testing::Values(CV_32S, CV_32F), // points type
|
||||
Values(400, 511, 1000, 10000, 100000) // points count
|
||||
)
|
||||
)
|
||||
|
||||
{
|
||||
int ptType = get<0>(GetParam());
|
||||
int n = get<1>(GetParam());
|
||||
|
||||
Mat pts(n, 2, ptType);
|
||||
declare.in(pts, WARMUP_RNG);
|
||||
|
||||
cv::Rect rect;
|
||||
TEST_CYCLE() rect = boundingRect(pts);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
} } // namespace
|
||||
|
@ -11,7 +11,7 @@ typedef perf::TestBaseWithParam<Size_MatType_OutMatDepth_t> Size_MatType_OutMatD
|
||||
PERF_TEST_P(Size_MatType_OutMatDepth, integral,
|
||||
testing::Combine(
|
||||
testing::Values(TYPICAL_MAT_SIZES),
|
||||
testing::Values(CV_8UC1, CV_8UC4),
|
||||
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4),
|
||||
testing::Values(CV_32S, CV_32F, CV_64F)
|
||||
)
|
||||
)
|
||||
|
@ -213,7 +213,7 @@ int FilterEngine::start(const Size &_wholeSize, const Size &sz, const Point &ofs
|
||||
}
|
||||
|
||||
// adjust bufstep so that the used part of the ring buffer stays compact in memory
|
||||
bufStep = bufElemSize*(int)alignSize(roi.width + (!isSeparable() ? ksize.width - 1 : 0),16);
|
||||
bufStep = bufElemSize*(int)alignSize(roi.width + (!isSeparable() ? ksize.width - 1 : 0),VEC_ALIGN);
|
||||
|
||||
dx1 = std::max(anchor.x - roi.x, 0);
|
||||
dx2 = std::max(ksize.width - anchor.x - 1 + roi.x + roi.width - wholeSize.width, 0);
|
||||
|
@ -11,16 +11,6 @@
|
||||
|
||||
#include "opencv2/core/softfloat.hpp"
|
||||
|
||||
#ifndef CV_ALWAYS_INLINE
|
||||
#if defined(__GNUC__) && (__GNUC__ > 3 ||(__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
#define CV_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
#define CV_ALWAYS_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "opencl_kernels_imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include "hal_replacement.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
|
||||
/****************************************************************************************\
|
||||
@ -97,73 +98,65 @@ struct MorphNoVec
|
||||
int operator()(uchar**, int, uchar*, int) const { return 0; }
|
||||
};
|
||||
|
||||
#if CV_SSE2
|
||||
#if CV_SIMD
|
||||
|
||||
template<class VecUpdate> struct MorphRowIVec
|
||||
template<class VecUpdate> struct MorphRowVec
|
||||
{
|
||||
enum { ESZ = VecUpdate::ESZ };
|
||||
|
||||
MorphRowIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
typedef typename VecUpdate::vtype vtype;
|
||||
typedef typename vtype::lane_type stype;
|
||||
MorphRowVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
int operator()(const uchar* src, uchar* dst, int width, int cn) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE2) )
|
||||
return 0;
|
||||
|
||||
cn *= ESZ;
|
||||
int i, k, _ksize = ksize*cn;
|
||||
width = (width & -4)*cn;
|
||||
width *= cn;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i <= width - 16; i += 16 )
|
||||
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes )
|
||||
{
|
||||
__m128i s = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
for( k = cn; k < _ksize; k += cn )
|
||||
vtype s0 = vx_load((const stype*)src + i);
|
||||
vtype s1 = vx_load((const stype*)src + i + vtype::nlanes);
|
||||
vtype s2 = vx_load((const stype*)src + i + 2*vtype::nlanes);
|
||||
vtype s3 = vx_load((const stype*)src + i + 3*vtype::nlanes);
|
||||
for (k = cn; k < _ksize; k += cn)
|
||||
{
|
||||
__m128i x = _mm_loadu_si128((const __m128i*)(src + i + k));
|
||||
s = updateOp(s, x);
|
||||
s0 = updateOp(s0, vx_load((const stype*)src + i + k));
|
||||
s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes));
|
||||
s2 = updateOp(s2, vx_load((const stype*)src + i + k + 2*vtype::nlanes));
|
||||
s3 = updateOp(s3, vx_load((const stype*)src + i + k + 3*vtype::nlanes));
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)(dst + i), s);
|
||||
v_store((stype*)dst + i, s0);
|
||||
v_store((stype*)dst + i + vtype::nlanes, s1);
|
||||
v_store((stype*)dst + i + 2*vtype::nlanes, s2);
|
||||
v_store((stype*)dst + i + 3*vtype::nlanes, s3);
|
||||
}
|
||||
|
||||
for( ; i < width; i += 4 )
|
||||
if( i <= width - 2*vtype::nlanes )
|
||||
{
|
||||
__m128i s = _mm_cvtsi32_si128(*(const int*)(src + i));
|
||||
vtype s0 = vx_load((const stype*)src + i);
|
||||
vtype s1 = vx_load((const stype*)src + i + vtype::nlanes);
|
||||
for( k = cn; k < _ksize; k += cn )
|
||||
{
|
||||
__m128i x = _mm_cvtsi32_si128(*(const int*)(src + i + k));
|
||||
s = updateOp(s, x);
|
||||
s0 = updateOp(s0, vx_load((const stype*)src + i + k));
|
||||
s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes));
|
||||
}
|
||||
*(int*)(dst + i) = _mm_cvtsi128_si32(s);
|
||||
v_store((stype*)dst + i, s0);
|
||||
v_store((stype*)dst + i + vtype::nlanes, s1);
|
||||
i += 2*vtype::nlanes;
|
||||
}
|
||||
|
||||
return i/ESZ;
|
||||
}
|
||||
|
||||
int ksize, anchor;
|
||||
};
|
||||
|
||||
|
||||
template<class VecUpdate> struct MorphRowFVec
|
||||
{
|
||||
MorphRowFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
int operator()(const uchar* src, uchar* dst, int width, int cn) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE) )
|
||||
return 0;
|
||||
|
||||
int i, k, _ksize = ksize*cn;
|
||||
width = (width & -4)*cn;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i < width; i += 4 )
|
||||
if( i <= width - vtype::nlanes )
|
||||
{
|
||||
__m128 s = _mm_loadu_ps((const float*)src + i);
|
||||
vtype s = vx_load((const stype*)src + i);
|
||||
for( k = cn; k < _ksize; k += cn )
|
||||
{
|
||||
__m128 x = _mm_loadu_ps((const float*)src + i + k);
|
||||
s = updateOp(s, x);
|
||||
}
|
||||
_mm_storeu_ps((float*)dst + i, s);
|
||||
s = updateOp(s, vx_load((const stype*)src + i + k));
|
||||
v_store((stype*)dst + i, s);
|
||||
i += vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes/2 )
|
||||
{
|
||||
vtype s = vx_load_low((const stype*)src + i);
|
||||
for( k = cn; k < _ksize; k += cn )
|
||||
s = updateOp(s, vx_load_low((const stype*)src + i + k));
|
||||
v_store_low((stype*)dst + i, s);
|
||||
i += vtype::nlanes/2;
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -173,230 +166,156 @@ template<class VecUpdate> struct MorphRowFVec
|
||||
};
|
||||
|
||||
|
||||
template<class VecUpdate> struct MorphColumnIVec
|
||||
template<class VecUpdate> struct MorphColumnVec
|
||||
{
|
||||
enum { ESZ = VecUpdate::ESZ };
|
||||
|
||||
MorphColumnIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
int operator()(const uchar** src, uchar* dst, int dststep, int count, int width) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE2) )
|
||||
return 0;
|
||||
|
||||
int i = 0, k, _ksize = ksize;
|
||||
width *= ESZ;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i < count + ksize - 1; i++ )
|
||||
CV_Assert( ((size_t)src[i] & 15) == 0 );
|
||||
|
||||
for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
|
||||
{
|
||||
for( i = 0; i <= width - 32; i += 32 )
|
||||
{
|
||||
const uchar* sptr = src[1] + i;
|
||||
__m128i s0 = _mm_load_si128((const __m128i*)sptr);
|
||||
__m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
__m128i x0, x1;
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_si128((const __m128i*)sptr);
|
||||
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
}
|
||||
|
||||
sptr = src[0] + i;
|
||||
x0 = _mm_load_si128((const __m128i*)sptr);
|
||||
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
_mm_storeu_si128((__m128i*)(dst + i), updateOp(s0, x0));
|
||||
_mm_storeu_si128((__m128i*)(dst + i + 16), updateOp(s1, x1));
|
||||
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_si128((const __m128i*)sptr);
|
||||
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
_mm_storeu_si128((__m128i*)(dst + dststep + i), updateOp(s0, x0));
|
||||
_mm_storeu_si128((__m128i*)(dst + dststep + i + 16), updateOp(s1, x1));
|
||||
}
|
||||
|
||||
for( ; i <= width - 8; i += 8 )
|
||||
{
|
||||
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[1] + i)), x0;
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
{
|
||||
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
|
||||
s0 = updateOp(s0, x0);
|
||||
}
|
||||
|
||||
x0 = _mm_loadl_epi64((const __m128i*)(src[0] + i));
|
||||
_mm_storel_epi64((__m128i*)(dst + i), updateOp(s0, x0));
|
||||
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
|
||||
_mm_storel_epi64((__m128i*)(dst + dststep + i), updateOp(s0, x0));
|
||||
}
|
||||
}
|
||||
|
||||
for( ; count > 0; count--, dst += dststep, src++ )
|
||||
{
|
||||
for( i = 0; i <= width - 32; i += 32 )
|
||||
{
|
||||
const uchar* sptr = src[0] + i;
|
||||
__m128i s0 = _mm_load_si128((const __m128i*)sptr);
|
||||
__m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
__m128i x0, x1;
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_si128((const __m128i*)sptr);
|
||||
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)(dst + i), s0);
|
||||
_mm_storeu_si128((__m128i*)(dst + i + 16), s1);
|
||||
}
|
||||
|
||||
for( ; i <= width - 8; i += 8 )
|
||||
{
|
||||
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
{
|
||||
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
|
||||
s0 = updateOp(s0, x0);
|
||||
}
|
||||
_mm_storel_epi64((__m128i*)(dst + i), s0);
|
||||
}
|
||||
}
|
||||
|
||||
return i/ESZ;
|
||||
}
|
||||
|
||||
int ksize, anchor;
|
||||
};
|
||||
|
||||
|
||||
template<class VecUpdate> struct MorphColumnFVec
|
||||
{
|
||||
MorphColumnFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
typedef typename VecUpdate::vtype vtype;
|
||||
typedef typename vtype::lane_type stype;
|
||||
MorphColumnVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
|
||||
int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE) )
|
||||
return 0;
|
||||
|
||||
int i = 0, k, _ksize = ksize;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i < count + ksize - 1; i++ )
|
||||
CV_Assert( ((size_t)_src[i] & 15) == 0 );
|
||||
CV_Assert( ((size_t)_src[i] & (CV_SIMD_WIDTH-1)) == 0 );
|
||||
|
||||
const float** src = (const float**)_src;
|
||||
float* dst = (float*)_dst;
|
||||
const stype** src = (const stype**)_src;
|
||||
stype* dst = (stype*)_dst;
|
||||
dststep /= sizeof(dst[0]);
|
||||
|
||||
for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
|
||||
{
|
||||
for( i = 0; i <= width - 16; i += 16 )
|
||||
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes)
|
||||
{
|
||||
const float* sptr = src[1] + i;
|
||||
__m128 s0 = _mm_load_ps(sptr);
|
||||
__m128 s1 = _mm_load_ps(sptr + 4);
|
||||
__m128 s2 = _mm_load_ps(sptr + 8);
|
||||
__m128 s3 = _mm_load_ps(sptr + 12);
|
||||
__m128 x0, x1, x2, x3;
|
||||
const stype* sptr = src[1] + i;
|
||||
vtype s0 = vx_load_aligned(sptr);
|
||||
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
|
||||
vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes);
|
||||
vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes);
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_ps(sptr);
|
||||
x1 = _mm_load_ps(sptr + 4);
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
x2 = _mm_load_ps(sptr + 8);
|
||||
x3 = _mm_load_ps(sptr + 12);
|
||||
s2 = updateOp(s2, x2);
|
||||
s3 = updateOp(s3, x3);
|
||||
s0 = updateOp(s0, vx_load_aligned(sptr));
|
||||
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
|
||||
s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes));
|
||||
s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes));
|
||||
}
|
||||
|
||||
sptr = src[0] + i;
|
||||
x0 = _mm_load_ps(sptr);
|
||||
x1 = _mm_load_ps(sptr + 4);
|
||||
x2 = _mm_load_ps(sptr + 8);
|
||||
x3 = _mm_load_ps(sptr + 12);
|
||||
_mm_storeu_ps(dst + i, updateOp(s0, x0));
|
||||
_mm_storeu_ps(dst + i + 4, updateOp(s1, x1));
|
||||
_mm_storeu_ps(dst + i + 8, updateOp(s2, x2));
|
||||
_mm_storeu_ps(dst + i + 12, updateOp(s3, x3));
|
||||
v_store(dst + i, updateOp(s0, vx_load_aligned(sptr)));
|
||||
v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
|
||||
v_store(dst + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)));
|
||||
v_store(dst + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)));
|
||||
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_ps(sptr);
|
||||
x1 = _mm_load_ps(sptr + 4);
|
||||
x2 = _mm_load_ps(sptr + 8);
|
||||
x3 = _mm_load_ps(sptr + 12);
|
||||
_mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
|
||||
_mm_storeu_ps(dst + dststep + i + 4, updateOp(s1, x1));
|
||||
_mm_storeu_ps(dst + dststep + i + 8, updateOp(s2, x2));
|
||||
_mm_storeu_ps(dst + dststep + i + 12, updateOp(s3, x3));
|
||||
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr)));
|
||||
v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
|
||||
v_store(dst + dststep + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)));
|
||||
v_store(dst + dststep + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)));
|
||||
}
|
||||
|
||||
for( ; i <= width - 4; i += 4 )
|
||||
if( i <= width - 2*vtype::nlanes )
|
||||
{
|
||||
__m128 s0 = _mm_load_ps(src[1] + i), x0;
|
||||
const stype* sptr = src[1] + i;
|
||||
vtype s0 = vx_load_aligned(sptr);
|
||||
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
{
|
||||
x0 = _mm_load_ps(src[k] + i);
|
||||
s0 = updateOp(s0, x0);
|
||||
sptr = src[k] + i;
|
||||
s0 = updateOp(s0, vx_load_aligned(sptr));
|
||||
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
|
||||
}
|
||||
|
||||
x0 = _mm_load_ps(src[0] + i);
|
||||
_mm_storeu_ps(dst + i, updateOp(s0, x0));
|
||||
x0 = _mm_load_ps(src[k] + i);
|
||||
_mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
|
||||
sptr = src[0] + i;
|
||||
v_store(dst + i, updateOp(s0, vx_load_aligned(sptr)));
|
||||
v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
|
||||
|
||||
sptr = src[k] + i;
|
||||
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr)));
|
||||
v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
|
||||
i += 2*vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes )
|
||||
{
|
||||
vtype s0 = vx_load_aligned(src[1] + i);
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
s0 = updateOp(s0, vx_load_aligned(src[k] + i));
|
||||
|
||||
v_store(dst + i, updateOp(s0, vx_load_aligned(src[0] + i)));
|
||||
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(src[k] + i)));
|
||||
i += vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes/2 )
|
||||
{
|
||||
vtype s0 = vx_load_low(src[1] + i);
|
||||
|
||||
for( k = 2; k < _ksize; k++ )
|
||||
s0 = updateOp(s0, vx_load_low(src[k] + i));
|
||||
|
||||
v_store_low(dst + i, updateOp(s0, vx_load_low(src[0] + i)));
|
||||
v_store_low(dst + dststep + i, updateOp(s0, vx_load_low(src[k] + i)));
|
||||
i += vtype::nlanes/2;
|
||||
}
|
||||
}
|
||||
|
||||
for( ; count > 0; count--, dst += dststep, src++ )
|
||||
{
|
||||
for( i = 0; i <= width - 16; i += 16 )
|
||||
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes)
|
||||
{
|
||||
const float* sptr = src[0] + i;
|
||||
__m128 s0 = _mm_load_ps(sptr);
|
||||
__m128 s1 = _mm_load_ps(sptr + 4);
|
||||
__m128 s2 = _mm_load_ps(sptr + 8);
|
||||
__m128 s3 = _mm_load_ps(sptr + 12);
|
||||
__m128 x0, x1, x2, x3;
|
||||
const stype* sptr = src[0] + i;
|
||||
vtype s0 = vx_load_aligned(sptr);
|
||||
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
|
||||
vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes);
|
||||
vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes);
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_load_ps(sptr);
|
||||
x1 = _mm_load_ps(sptr + 4);
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
x2 = _mm_load_ps(sptr + 8);
|
||||
x3 = _mm_load_ps(sptr + 12);
|
||||
s2 = updateOp(s2, x2);
|
||||
s3 = updateOp(s3, x3);
|
||||
s0 = updateOp(s0, vx_load_aligned(sptr));
|
||||
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
|
||||
s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes));
|
||||
s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes));
|
||||
}
|
||||
_mm_storeu_ps(dst + i, s0);
|
||||
_mm_storeu_ps(dst + i + 4, s1);
|
||||
_mm_storeu_ps(dst + i + 8, s2);
|
||||
_mm_storeu_ps(dst + i + 12, s3);
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + vtype::nlanes, s1);
|
||||
v_store(dst + i + 2*vtype::nlanes, s2);
|
||||
v_store(dst + i + 3*vtype::nlanes, s3);
|
||||
}
|
||||
|
||||
for( i = 0; i <= width - 4; i += 4 )
|
||||
if( i <= width - 2*vtype::nlanes )
|
||||
{
|
||||
__m128 s0 = _mm_load_ps(src[0] + i), x0;
|
||||
const stype* sptr = src[0] + i;
|
||||
vtype s0 = vx_load_aligned(sptr);
|
||||
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
{
|
||||
x0 = _mm_load_ps(src[k] + i);
|
||||
s0 = updateOp(s0, x0);
|
||||
sptr = src[k] + i;
|
||||
s0 = updateOp(s0, vx_load_aligned(sptr));
|
||||
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
|
||||
}
|
||||
_mm_storeu_ps(dst + i, s0);
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + vtype::nlanes, s1);
|
||||
i += 2*vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes )
|
||||
{
|
||||
vtype s0 = vx_load_aligned(src[0] + i);
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
s0 = updateOp(s0, vx_load_aligned(src[k] + i));
|
||||
v_store(dst + i, s0);
|
||||
i += vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes/2 )
|
||||
{
|
||||
vtype s0 = vx_load_low(src[0] + i);
|
||||
|
||||
for( k = 1; k < _ksize; k++ )
|
||||
s0 = updateOp(s0, vx_load_low(src[k] + i));
|
||||
v_store_low(dst + i, s0);
|
||||
i += vtype::nlanes/2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -407,185 +326,109 @@ template<class VecUpdate> struct MorphColumnFVec
|
||||
};
|
||||
|
||||
|
||||
template<class VecUpdate> struct MorphIVec
|
||||
{
|
||||
enum { ESZ = VecUpdate::ESZ };
|
||||
|
||||
int operator()(uchar** src, int nz, uchar* dst, int width) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE2) )
|
||||
return 0;
|
||||
|
||||
int i, k;
|
||||
width *= ESZ;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i <= width - 32; i += 32 )
|
||||
{
|
||||
const uchar* sptr = src[0] + i;
|
||||
__m128i s0 = _mm_loadu_si128((const __m128i*)sptr);
|
||||
__m128i s1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
|
||||
__m128i x0, x1;
|
||||
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_loadu_si128((const __m128i*)sptr);
|
||||
x1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)(dst + i), s0);
|
||||
_mm_storeu_si128((__m128i*)(dst + i + 16), s1);
|
||||
}
|
||||
|
||||
for( ; i <= width - 8; i += 8 )
|
||||
{
|
||||
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
|
||||
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
|
||||
s0 = updateOp(s0, x0);
|
||||
}
|
||||
_mm_storel_epi64((__m128i*)(dst + i), s0);
|
||||
}
|
||||
|
||||
return i/ESZ;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class VecUpdate> struct MorphFVec
|
||||
template<class VecUpdate> struct MorphVec
|
||||
{
|
||||
typedef typename VecUpdate::vtype vtype;
|
||||
typedef typename vtype::lane_type stype;
|
||||
int operator()(uchar** _src, int nz, uchar* _dst, int width) const
|
||||
{
|
||||
if( !checkHardwareSupport(CV_CPU_SSE) )
|
||||
return 0;
|
||||
|
||||
const float** src = (const float**)_src;
|
||||
float* dst = (float*)_dst;
|
||||
const stype** src = (const stype**)_src;
|
||||
stype* dst = (stype*)_dst;
|
||||
int i, k;
|
||||
VecUpdate updateOp;
|
||||
|
||||
for( i = 0; i <= width - 16; i += 16 )
|
||||
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes )
|
||||
{
|
||||
const float* sptr = src[0] + i;
|
||||
__m128 s0 = _mm_loadu_ps(sptr);
|
||||
__m128 s1 = _mm_loadu_ps(sptr + 4);
|
||||
__m128 s2 = _mm_loadu_ps(sptr + 8);
|
||||
__m128 s3 = _mm_loadu_ps(sptr + 12);
|
||||
__m128 x0, x1, x2, x3;
|
||||
|
||||
const stype* sptr = src[0] + i;
|
||||
vtype s0 = vx_load(sptr);
|
||||
vtype s1 = vx_load(sptr + vtype::nlanes);
|
||||
vtype s2 = vx_load(sptr + 2*vtype::nlanes);
|
||||
vtype s3 = vx_load(sptr + 3*vtype::nlanes);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
sptr = src[k] + i;
|
||||
x0 = _mm_loadu_ps(sptr);
|
||||
x1 = _mm_loadu_ps(sptr + 4);
|
||||
x2 = _mm_loadu_ps(sptr + 8);
|
||||
x3 = _mm_loadu_ps(sptr + 12);
|
||||
s0 = updateOp(s0, x0);
|
||||
s1 = updateOp(s1, x1);
|
||||
s2 = updateOp(s2, x2);
|
||||
s3 = updateOp(s3, x3);
|
||||
s0 = updateOp(s0, vx_load(sptr));
|
||||
s1 = updateOp(s1, vx_load(sptr + vtype::nlanes));
|
||||
s2 = updateOp(s2, vx_load(sptr + 2*vtype::nlanes));
|
||||
s3 = updateOp(s3, vx_load(sptr + 3*vtype::nlanes));
|
||||
}
|
||||
_mm_storeu_ps(dst + i, s0);
|
||||
_mm_storeu_ps(dst + i + 4, s1);
|
||||
_mm_storeu_ps(dst + i + 8, s2);
|
||||
_mm_storeu_ps(dst + i + 12, s3);
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + vtype::nlanes, s1);
|
||||
v_store(dst + i + 2*vtype::nlanes, s2);
|
||||
v_store(dst + i + 3*vtype::nlanes, s3);
|
||||
}
|
||||
|
||||
for( ; i <= width - 4; i += 4 )
|
||||
if( i <= width - 2*vtype::nlanes )
|
||||
{
|
||||
__m128 s0 = _mm_loadu_ps(src[0] + i), x0;
|
||||
|
||||
const stype* sptr = src[0] + i;
|
||||
vtype s0 = vx_load(sptr);
|
||||
vtype s1 = vx_load(sptr + vtype::nlanes);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
x0 = _mm_loadu_ps(src[k] + i);
|
||||
s0 = updateOp(s0, x0);
|
||||
sptr = src[k] + i;
|
||||
s0 = updateOp(s0, vx_load(sptr));
|
||||
s1 = updateOp(s1, vx_load(sptr + vtype::nlanes));
|
||||
}
|
||||
_mm_storeu_ps(dst + i, s0);
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + vtype::nlanes, s1);
|
||||
i += 2*vtype::nlanes;
|
||||
}
|
||||
|
||||
for( ; i < width; i++ )
|
||||
if( i <= width - vtype::nlanes )
|
||||
{
|
||||
__m128 s0 = _mm_load_ss(src[0] + i), x0;
|
||||
|
||||
vtype s0 = vx_load(src[0] + i);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
x0 = _mm_load_ss(src[k] + i);
|
||||
s0 = updateOp(s0, x0);
|
||||
}
|
||||
_mm_store_ss(dst + i, s0);
|
||||
s0 = updateOp(s0, vx_load(src[k] + i));
|
||||
v_store(dst + i, s0);
|
||||
i += vtype::nlanes;
|
||||
}
|
||||
if( i <= width - vtype::nlanes/2 )
|
||||
{
|
||||
vtype s0 = vx_load_low(src[0] + i);
|
||||
for( k = 1; k < nz; k++ )
|
||||
s0 = updateOp(s0, vx_load_low(src[k] + i));
|
||||
v_store_low(dst + i, s0);
|
||||
i += vtype::nlanes/2;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
|
||||
struct VMin8u
|
||||
template <typename T> struct VMin
|
||||
{
|
||||
enum { ESZ = 1 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epu8(a,b); }
|
||||
typedef T vtype;
|
||||
vtype operator()(const vtype& a, const vtype& b) const { return v_min(a,b); }
|
||||
};
|
||||
struct VMax8u
|
||||
template <typename T> struct VMax
|
||||
{
|
||||
enum { ESZ = 1 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epu8(a,b); }
|
||||
typedef T vtype;
|
||||
vtype operator()(const vtype& a, const vtype& b) const { return v_max(a,b); }
|
||||
};
|
||||
struct VMin16u
|
||||
{
|
||||
enum { ESZ = 2 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const
|
||||
{ return _mm_subs_epu16(a,_mm_subs_epu16(a,b)); }
|
||||
};
|
||||
struct VMax16u
|
||||
{
|
||||
enum { ESZ = 2 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const
|
||||
{ return _mm_adds_epu16(_mm_subs_epu16(a,b), b); }
|
||||
};
|
||||
struct VMin16s
|
||||
{
|
||||
enum { ESZ = 2 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const
|
||||
{ return _mm_min_epi16(a, b); }
|
||||
};
|
||||
struct VMax16s
|
||||
{
|
||||
enum { ESZ = 2 };
|
||||
__m128i operator()(const __m128i& a, const __m128i& b) const
|
||||
{ return _mm_max_epi16(a, b); }
|
||||
};
|
||||
struct VMin32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_min_ps(a,b); }};
|
||||
struct VMax32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_max_ps(a,b); }};
|
||||
|
||||
typedef MorphRowIVec<VMin8u> ErodeRowVec8u;
|
||||
typedef MorphRowIVec<VMax8u> DilateRowVec8u;
|
||||
typedef MorphRowIVec<VMin16u> ErodeRowVec16u;
|
||||
typedef MorphRowIVec<VMax16u> DilateRowVec16u;
|
||||
typedef MorphRowIVec<VMin16s> ErodeRowVec16s;
|
||||
typedef MorphRowIVec<VMax16s> DilateRowVec16s;
|
||||
typedef MorphRowFVec<VMin32f> ErodeRowVec32f;
|
||||
typedef MorphRowFVec<VMax32f> DilateRowVec32f;
|
||||
typedef MorphRowVec<VMin<v_uint8> > ErodeRowVec8u;
|
||||
typedef MorphRowVec<VMax<v_uint8> > DilateRowVec8u;
|
||||
typedef MorphRowVec<VMin<v_uint16> > ErodeRowVec16u;
|
||||
typedef MorphRowVec<VMax<v_uint16> > DilateRowVec16u;
|
||||
typedef MorphRowVec<VMin<v_int16> > ErodeRowVec16s;
|
||||
typedef MorphRowVec<VMax<v_int16> > DilateRowVec16s;
|
||||
typedef MorphRowVec<VMin<v_float32> > ErodeRowVec32f;
|
||||
typedef MorphRowVec<VMax<v_float32> > DilateRowVec32f;
|
||||
|
||||
typedef MorphColumnIVec<VMin8u> ErodeColumnVec8u;
|
||||
typedef MorphColumnIVec<VMax8u> DilateColumnVec8u;
|
||||
typedef MorphColumnIVec<VMin16u> ErodeColumnVec16u;
|
||||
typedef MorphColumnIVec<VMax16u> DilateColumnVec16u;
|
||||
typedef MorphColumnIVec<VMin16s> ErodeColumnVec16s;
|
||||
typedef MorphColumnIVec<VMax16s> DilateColumnVec16s;
|
||||
typedef MorphColumnFVec<VMin32f> ErodeColumnVec32f;
|
||||
typedef MorphColumnFVec<VMax32f> DilateColumnVec32f;
|
||||
typedef MorphColumnVec<VMin<v_uint8> > ErodeColumnVec8u;
|
||||
typedef MorphColumnVec<VMax<v_uint8> > DilateColumnVec8u;
|
||||
typedef MorphColumnVec<VMin<v_uint16> > ErodeColumnVec16u;
|
||||
typedef MorphColumnVec<VMax<v_uint16> > DilateColumnVec16u;
|
||||
typedef MorphColumnVec<VMin<v_int16> > ErodeColumnVec16s;
|
||||
typedef MorphColumnVec<VMax<v_int16> > DilateColumnVec16s;
|
||||
typedef MorphColumnVec<VMin<v_float32> > ErodeColumnVec32f;
|
||||
typedef MorphColumnVec<VMax<v_float32> > DilateColumnVec32f;
|
||||
|
||||
typedef MorphIVec<VMin8u> ErodeVec8u;
|
||||
typedef MorphIVec<VMax8u> DilateVec8u;
|
||||
typedef MorphIVec<VMin16u> ErodeVec16u;
|
||||
typedef MorphIVec<VMax16u> DilateVec16u;
|
||||
typedef MorphIVec<VMin16s> ErodeVec16s;
|
||||
typedef MorphIVec<VMax16s> DilateVec16s;
|
||||
typedef MorphFVec<VMin32f> ErodeVec32f;
|
||||
typedef MorphFVec<VMax32f> DilateVec32f;
|
||||
typedef MorphVec<VMin<v_uint8> > ErodeVec8u;
|
||||
typedef MorphVec<VMax<v_uint8> > DilateVec8u;
|
||||
typedef MorphVec<VMin<v_uint16> > ErodeVec16u;
|
||||
typedef MorphVec<VMax<v_uint16> > DilateVec16u;
|
||||
typedef MorphVec<VMin<v_int16> > ErodeVec16s;
|
||||
typedef MorphVec<VMax<v_int16> > DilateVec16s;
|
||||
typedef MorphVec<VMin<v_float32> > ErodeVec32f;
|
||||
typedef MorphVec<VMax<v_float32> > DilateVec32f;
|
||||
|
||||
#else
|
||||
|
||||
|
@ -39,6 +39,8 @@
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
@ -746,109 +748,161 @@ static Rect pointSetBoundingRect( const Mat& points )
|
||||
if( npoints == 0 )
|
||||
return Rect();
|
||||
|
||||
#if CV_SIMD
|
||||
const int64_t* pts = points.ptr<int64_t>();
|
||||
|
||||
if( !is_float )
|
||||
{
|
||||
v_int32 minval, maxval;
|
||||
minval = maxval = v_reinterpret_as_s32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
|
||||
for( i = 1; i <= npoints - v_int32::nlanes/2; i+= v_int32::nlanes/2 )
|
||||
{
|
||||
v_int32 ptXY2 = v_reinterpret_as_s32(vx_load(pts + i));
|
||||
minval = v_min(ptXY2, minval);
|
||||
maxval = v_max(ptXY2, maxval);
|
||||
}
|
||||
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
|
||||
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
|
||||
if( i <= npoints - v_int32::nlanes/4 )
|
||||
{
|
||||
v_int32 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
|
||||
minval = v_min(ptXY, minval);
|
||||
maxval = v_max(ptXY, maxval);
|
||||
i += v_int64::nlanes/2;
|
||||
}
|
||||
for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
|
||||
{
|
||||
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
|
||||
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
|
||||
}
|
||||
xmin = minval.get0();
|
||||
xmax = maxval.get0();
|
||||
ymin = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))).get0();
|
||||
ymax = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))).get0();
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
if( i < npoints )
|
||||
{
|
||||
v_int32x4 minval2, maxval2;
|
||||
minval2 = maxval2 = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
|
||||
for( i++; i < npoints; i++ )
|
||||
{
|
||||
v_int32x4 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
|
||||
minval2 = v_min(ptXY, minval2);
|
||||
maxval2 = v_max(ptXY, maxval2);
|
||||
}
|
||||
xmin = min(xmin, minval2.get0());
|
||||
xmax = max(xmax, maxval2.get0());
|
||||
ymin = min(ymin, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval2))).get0());
|
||||
ymax = max(ymax, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
v_float32 minval, maxval;
|
||||
minval = maxval = v_reinterpret_as_f32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
|
||||
for( i = 1; i <= npoints - v_float32::nlanes/2; i+= v_float32::nlanes/2 )
|
||||
{
|
||||
v_float32 ptXY2 = v_reinterpret_as_f32(vx_load(pts + i));
|
||||
minval = v_min(ptXY2, minval);
|
||||
maxval = v_max(ptXY2, maxval);
|
||||
}
|
||||
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
|
||||
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
|
||||
if( i <= npoints - v_float32::nlanes/4 )
|
||||
{
|
||||
v_float32 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
|
||||
minval = v_min(ptXY, minval);
|
||||
maxval = v_max(ptXY, maxval);
|
||||
i += v_float32::nlanes/4;
|
||||
}
|
||||
for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
|
||||
{
|
||||
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
|
||||
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
|
||||
}
|
||||
xmin = cvFloor(minval.get0());
|
||||
xmax = cvFloor(maxval.get0());
|
||||
ymin = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))).get0());
|
||||
ymax = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))).get0());
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
if( i < npoints )
|
||||
{
|
||||
v_float32x4 minval2, maxval2;
|
||||
minval2 = maxval2 = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
|
||||
for( i++; i < npoints; i++ )
|
||||
{
|
||||
v_float32x4 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
|
||||
minval2 = v_min(ptXY, minval2);
|
||||
maxval2 = v_max(ptXY, maxval2);
|
||||
}
|
||||
xmin = min(xmin, cvFloor(minval2.get0()));
|
||||
xmax = max(xmax, cvFloor(maxval2.get0()));
|
||||
ymin = min(ymin, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval2))).get0()));
|
||||
ymax = max(ymax, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0()));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
const Point* pts = points.ptr<Point>();
|
||||
Point pt = pts[0];
|
||||
|
||||
#if CV_SSE4_2
|
||||
if(cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||
if( !is_float )
|
||||
{
|
||||
if( !is_float )
|
||||
xmin = xmax = pt.x;
|
||||
ymin = ymax = pt.y;
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
__m128i minval, maxval;
|
||||
minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y
|
||||
pt = pts[i];
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
__m128i ptXY = _mm_loadl_epi64((const __m128i*)&pts[i]);
|
||||
minval = _mm_min_epi32(ptXY, minval);
|
||||
maxval = _mm_max_epi32(ptXY, maxval);
|
||||
}
|
||||
xmin = _mm_cvtsi128_si32(minval);
|
||||
ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4));
|
||||
xmax = _mm_cvtsi128_si32(maxval);
|
||||
ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
__m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps();
|
||||
minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt));
|
||||
if( xmin > pt.x )
|
||||
xmin = pt.x;
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
ptXY = _mm_loadl_pi(ptXY, (const __m64*)&pts[i]);
|
||||
if( xmax < pt.x )
|
||||
xmax = pt.x;
|
||||
|
||||
minvalf = _mm_min_ps(minvalf, ptXY);
|
||||
maxvalf = _mm_max_ps(maxvalf, ptXY);
|
||||
}
|
||||
if( ymin > pt.y )
|
||||
ymin = pt.y;
|
||||
|
||||
float xyminf[2], xymaxf[2];
|
||||
_mm_storel_pi((__m64*)xyminf, minvalf);
|
||||
_mm_storel_pi((__m64*)xymaxf, maxvalf);
|
||||
xmin = cvFloor(xyminf[0]);
|
||||
ymin = cvFloor(xyminf[1]);
|
||||
xmax = cvFloor(xymaxf[0]);
|
||||
ymax = cvFloor(xymaxf[1]);
|
||||
if( ymax < pt.y )
|
||||
ymax = pt.y;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if( !is_float )
|
||||
Cv32suf v;
|
||||
// init values
|
||||
xmin = xmax = CV_TOGGLE_FLT(pt.x);
|
||||
ymin = ymax = CV_TOGGLE_FLT(pt.y);
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
xmin = xmax = pt.x;
|
||||
ymin = ymax = pt.y;
|
||||
pt = pts[i];
|
||||
pt.x = CV_TOGGLE_FLT(pt.x);
|
||||
pt.y = CV_TOGGLE_FLT(pt.y);
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
pt = pts[i];
|
||||
if( xmin > pt.x )
|
||||
xmin = pt.x;
|
||||
|
||||
if( xmin > pt.x )
|
||||
xmin = pt.x;
|
||||
if( xmax < pt.x )
|
||||
xmax = pt.x;
|
||||
|
||||
if( xmax < pt.x )
|
||||
xmax = pt.x;
|
||||
if( ymin > pt.y )
|
||||
ymin = pt.y;
|
||||
|
||||
if( ymin > pt.y )
|
||||
ymin = pt.y;
|
||||
|
||||
if( ymax < pt.y )
|
||||
ymax = pt.y;
|
||||
}
|
||||
if( ymax < pt.y )
|
||||
ymax = pt.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
Cv32suf v;
|
||||
// init values
|
||||
xmin = xmax = CV_TOGGLE_FLT(pt.x);
|
||||
ymin = ymax = CV_TOGGLE_FLT(pt.y);
|
||||
|
||||
for( i = 1; i < npoints; i++ )
|
||||
{
|
||||
pt = pts[i];
|
||||
pt.x = CV_TOGGLE_FLT(pt.x);
|
||||
pt.y = CV_TOGGLE_FLT(pt.y);
|
||||
|
||||
if( xmin > pt.x )
|
||||
xmin = pt.x;
|
||||
|
||||
if( xmax < pt.x )
|
||||
xmax = pt.x;
|
||||
|
||||
if( ymin > pt.y )
|
||||
ymin = pt.y;
|
||||
|
||||
if( ymax < pt.y )
|
||||
ymax = pt.y;
|
||||
}
|
||||
|
||||
v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
|
||||
v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
|
||||
// because right and bottom sides of the bounding rectangle are not inclusive
|
||||
// (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
|
||||
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
|
||||
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
|
||||
}
|
||||
v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
|
||||
v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
|
||||
// because right and bottom sides of the bounding rectangle are not inclusive
|
||||
// (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
|
||||
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
|
||||
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
|
||||
}
|
||||
#endif
|
||||
|
||||
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
|
||||
}
|
||||
|
262
modules/imgproc/src/sumpixels.avx512_skx.cpp
Normal file
262
modules/imgproc/src/sumpixels.avx512_skx.cpp
Normal file
@ -0,0 +1,262 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2019, Intel Corporation, all rights reserved.
|
||||
#include "precomp.hpp"
|
||||
#include "sumpixels.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace { // Anonymous namespace to avoid exposing the implementation classes
|
||||
|
||||
//
|
||||
// NOTE: Look at the bottom of the file for the entry-point function for external callers
|
||||
//
|
||||
|
||||
// At the moment only 3 channel support untilted is supported
|
||||
// More channel support coming soon.
|
||||
// TODO: Add support for sqsum and 1,2, and 4 channels
|
||||
class IntegralCalculator_3Channel {
|
||||
public:
|
||||
IntegralCalculator_3Channel() {};
|
||||
|
||||
|
||||
void calculate_integral_avx512(const uchar *src, size_t _srcstep,
|
||||
double *sum, size_t _sumstep,
|
||||
double *sqsum, size_t _sqsumstep,
|
||||
int width, int height, int cn)
|
||||
{
|
||||
const int srcstep = (int)(_srcstep/sizeof(uchar));
|
||||
const int sumstep = (int)(_sumstep/sizeof(double));
|
||||
const int sqsumstep = (int)(_sqsumstep/sizeof(double));
|
||||
const int ops_per_line = width * cn;
|
||||
|
||||
// Clear the first line of the sum as per spec (see integral documentation)
|
||||
// Also adjust the index of sum and sqsum to be at the real 0th element
|
||||
// and not point to the border pixel so it stays in sync with the src pointer
|
||||
memset( sum, 0, (ops_per_line+cn)*sizeof(double));
|
||||
sum += cn;
|
||||
|
||||
if (sqsum) {
|
||||
memset( sqsum, 0, (ops_per_line+cn)*sizeof(double));
|
||||
sqsum += cn;
|
||||
}
|
||||
|
||||
// Now calculate the integral over the whole image one line at a time
|
||||
for(int y = 0; y < height; y++) {
|
||||
const uchar * src_line = &src[y*srcstep];
|
||||
double * sum_above = &sum[y*sumstep];
|
||||
double * sum_line = &sum_above[sumstep];
|
||||
double * sqsum_above = (sqsum) ? &sqsum[y*sqsumstep] : NULL;
|
||||
double * sqsum_line = (sqsum) ? &sqsum_above[sqsumstep] : NULL;
|
||||
|
||||
integral_line_3channel_avx512(src_line, sum_line, sum_above, sqsum_line, sqsum_above, ops_per_line);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void integral_line_3channel_avx512(const uchar *srcs,
|
||||
double *sums, double *sums_above,
|
||||
double *sqsums, double *sqsums_above,
|
||||
int num_ops_in_line)
|
||||
{
|
||||
__m512i sum_accumulator = _mm512_setzero_si512(); // holds rolling sums for the line
|
||||
__m512i sqsum_accumulator = _mm512_setzero_si512(); // holds rolling sqsums for the line
|
||||
|
||||
// The first element on each line must be zeroes as per spec (see integral documentation)
|
||||
set_border_pixel_value(sums, sqsums);
|
||||
|
||||
// Do all 64 byte chunk operations then do the last bits that don't fit in a 64 byte chunk
|
||||
aligned_integral( srcs, sums, sums_above, sqsums, sqsums_above, sum_accumulator, sqsum_accumulator, num_ops_in_line);
|
||||
post_aligned_integral(srcs, sums, sums_above, sqsums, sqsums_above, sum_accumulator, sqsum_accumulator, num_ops_in_line);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void set_border_pixel_value(double *sums, double *sqsums)
|
||||
{
|
||||
// Sets the border pixel value to 0s.
|
||||
// Note the hard coded -3 and the 0x7 mask is because we only support 3 channel right now
|
||||
__m512i zeroes = _mm512_setzero_si512();
|
||||
|
||||
_mm512_mask_storeu_epi64(&sums[-3], 0x7, zeroes);
|
||||
if (sqsums)
|
||||
_mm512_mask_storeu_epi64(&sqsums[-3], 0x7, zeroes);
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void aligned_integral(const uchar *&srcs,
|
||||
double *&sums, double *&sums_above,
|
||||
double *&sqsum, double *&sqsum_above,
|
||||
__m512i &sum_accumulator, __m512i &sqsum_accumulator,
|
||||
int num_ops_in_line)
|
||||
{
|
||||
// This function handles full 64 byte chunks of the source data at a time until it gets to the part of
|
||||
// the line that no longer contains a full 64 byte chunk. Other code will handle the last part.
|
||||
|
||||
const int num_chunks = num_ops_in_line >> 6; // quick int divide by 64
|
||||
|
||||
for (int index_64byte_chunk = 0; index_64byte_chunk < num_chunks; index_64byte_chunk++){
|
||||
integral_64_operations_avx512((__m512i *) srcs,
|
||||
(__m512i *) sums, (__m512i *) sums_above,
|
||||
(__m512i *) sqsum, (__m512i *) sqsum_above,
|
||||
0xFFFFFFFFFFFFFFFF, sum_accumulator, sqsum_accumulator);
|
||||
srcs+=64; sums+=64; sums_above+=64;
|
||||
if (sqsum){ sqsum+= 64; sqsum_above+=64; }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void post_aligned_integral(const uchar *srcs,
|
||||
const double *sums, const double *sums_above,
|
||||
const double *sqsum, const double *sqsum_above,
|
||||
__m512i &sum_accumulator, __m512i &sqsum_accumulator,
|
||||
int num_ops_in_line)
|
||||
{
|
||||
// This function handles the last few straggling operations that are not a full chunk of 64 operations
|
||||
// We use the same algorithm, but we calculate a different operation mask using (num_ops % 64).
|
||||
|
||||
const unsigned int num_operations = (unsigned int) num_ops_in_line & 0x3F; // Quick int modulo 64
|
||||
|
||||
if (num_operations > 0) {
|
||||
__mmask64 operation_mask = (1ULL << num_operations) - 1ULL;
|
||||
|
||||
integral_64_operations_avx512((__m512i *) srcs, (__m512i *) sums, (__m512i *) sums_above,
|
||||
(__m512i *) sqsum, (__m512i *) sqsum_above,
|
||||
operation_mask, sum_accumulator, sqsum_accumulator);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void integral_64_operations_avx512(const __m512i *srcs,
|
||||
__m512i *sums, const __m512i *sums_above,
|
||||
__m512i *sqsums, const __m512i *sqsums_above,
|
||||
__mmask64 data_mask,
|
||||
__m512i &sum_accumulator, __m512i &sqsum_accumulator)
|
||||
{
|
||||
__m512i src_64byte_chunk = read_64_bytes(srcs, data_mask);
|
||||
|
||||
for(int num_16byte_chunks=0; num_16byte_chunks<4; num_16byte_chunks++) {
|
||||
__m128i src_16bytes = _mm512_extracti64x2_epi64(src_64byte_chunk, 0x0); // Get lower 16 bytes of data
|
||||
|
||||
for (int num_8byte_chunks = 0; num_8byte_chunks < 2; num_8byte_chunks++) {
|
||||
|
||||
__m512i src_longs = convert_lower_8bytes_to_longs(src_16bytes);
|
||||
|
||||
// Calculate integral for the sum on the 8 entries
|
||||
integral_8_operations(src_longs, sums_above, data_mask, sums, sum_accumulator);
|
||||
sums++; sums_above++;
|
||||
|
||||
if (sqsums){ // Calculate integral for the sum on the 8 entries
|
||||
__m512i squared_source = _mm512_mullo_epi64(src_longs, src_longs);
|
||||
|
||||
integral_8_operations(squared_source, sqsums_above, data_mask, sqsums, sqsum_accumulator);
|
||||
sqsums++; sqsums_above++;
|
||||
}
|
||||
|
||||
// Prepare for next iteration of inner loop
|
||||
// shift source to align next 8 bytes to lane 0 and shift the mask
|
||||
src_16bytes = shift_right_8_bytes(src_16bytes);
|
||||
data_mask = data_mask >> 8;
|
||||
|
||||
}
|
||||
|
||||
// Prepare for next iteration of outer loop
|
||||
src_64byte_chunk = shift_right_16_bytes(src_64byte_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void integral_8_operations(const __m512i src_longs, const __m512i *above_values_ptr, __mmask64 data_mask,
|
||||
__m512i *results_ptr, __m512i &accumulator)
|
||||
{
|
||||
_mm512_mask_storeu_pd(
|
||||
results_ptr, // Store the result here
|
||||
data_mask, // Using the data mask to avoid overrunning the line
|
||||
calculate_integral( // Writing the value of the integral derived from:
|
||||
src_longs, // input data
|
||||
_mm512_maskz_loadu_pd(data_mask, above_values_ptr), // and the results from line above
|
||||
accumulator // keeping track of the accumulator
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
__m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator)
|
||||
{
|
||||
__m512i carryover_idxs = _mm512_set_epi64(6, 5, 7, 6, 5, 7, 6, 5);
|
||||
|
||||
// Align data to prepare for the adds:
|
||||
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
|
||||
// Vertical LANES: 76543210
|
||||
// src_longs : HGFEDCBA
|
||||
// shited3lanes : + EDCBA
|
||||
// shifted6lanes : + BA
|
||||
// carry_over_idxs : + 65765765 (index position of result from previous iteration)
|
||||
// = integral
|
||||
__m512i shifted3lanes = _mm512_maskz_expand_epi64(0xF8, src_longs);
|
||||
__m512i shifted6lanes = _mm512_maskz_expand_epi64(0xC0, src_longs);
|
||||
__m512i carry_over = _mm512_permutex2var_epi64(accumulator, carryover_idxs, accumulator);
|
||||
|
||||
// Do the adds in tree form (shift3 + shift 6) + (current_source_values + accumulator)
|
||||
__m512i sum_shift3and6 = _mm512_add_epi64(shifted3lanes, shifted6lanes);
|
||||
__m512i sum_src_carry = _mm512_add_epi64(src_longs, carry_over);
|
||||
accumulator = _mm512_add_epi64(sum_shift3and6, sum_src_carry);
|
||||
|
||||
// Convert to packed double and add to the line above to get the true integral value
|
||||
__m512d accumulator_pd = _mm512_cvtepu64_pd(accumulator);
|
||||
__m512d integral_pd = _mm512_add_pd(accumulator_pd, above_values);
|
||||
return integral_pd;
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
__m512i read_64_bytes(const __m512i *srcs, __mmask64 data_mask) {
|
||||
return _mm512_maskz_loadu_epi8(data_mask, srcs);
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
__m512i convert_lower_8bytes_to_longs(__m128i src_16bytes) {
|
||||
return _mm512_cvtepu8_epi64(src_16bytes);
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
__m128i shift_right_8_bytes(__m128i src_16bytes) {
|
||||
return _mm_maskz_compress_epi64(2, src_16bytes);
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
__m512i shift_right_16_bytes(__m512i src_64byte_chunk) {
|
||||
return _mm512_maskz_compress_epi64(0xFC, src_64byte_chunk);
|
||||
}
|
||||
|
||||
};
|
||||
} // end of anonymous namespace
|
||||
|
||||
namespace opt_AVX512_SKX {
|
||||
|
||||
// This is the implementation for the external callers interface entry point.
|
||||
// It should be the only function called into this file from outside
|
||||
// Any new implementations should be directed from here
|
||||
void calculate_integral_avx512(const uchar *src, size_t _srcstep,
|
||||
double *sum, size_t _sumstep,
|
||||
double *sqsum, size_t _sqsumstep,
|
||||
int width, int height, int cn)
|
||||
{
|
||||
IntegralCalculator_3Channel calculator;
|
||||
calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height, cn);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace opt_AVX512_SXK
|
||||
} // end namespace cv
|
@ -10,7 +10,7 @@
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2014, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
@ -44,6 +44,7 @@
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels_imgproc.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include "sumpixels.hpp"
|
||||
|
||||
|
||||
namespace cv
|
||||
@ -62,6 +63,37 @@ struct Integral_SIMD
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct Integral_SIMD<uchar, double, double> {
|
||||
Integral_SIMD() {};
|
||||
|
||||
|
||||
bool operator()(const uchar *src, size_t _srcstep,
|
||||
double *sum, size_t _sumstep,
|
||||
double *sqsum, size_t _sqsumstep,
|
||||
double *tilted, size_t _tiltedstep,
|
||||
int width, int height, int cn) const
|
||||
{
|
||||
#if CV_TRY_AVX512_SKX
|
||||
CV_UNUSED(_tiltedstep);
|
||||
// TODO: Add support for 1,2, and 4 channels
|
||||
if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && cn == 3){
|
||||
opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep,
|
||||
sqsum, _sqsumstep, width, height, cn);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
// Avoid warnings in some builds
|
||||
CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep);
|
||||
CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep);
|
||||
CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#if CV_SIMD && CV_SIMD_WIDTH <= 64
|
||||
|
||||
template <>
|
||||
|
25
modules/imgproc/src/sumpixels.hpp
Normal file
25
modules/imgproc/src/sumpixels.hpp
Normal file
@ -0,0 +1,25 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2019, Intel Corporation, all rights reserved.
|
||||
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
|
||||
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
namespace opt_AVX512_SKX
|
||||
{
|
||||
#if CV_TRY_AVX512_SKX
|
||||
void calculate_integral_avx512(
|
||||
const uchar *src, size_t _srcstep,
|
||||
double *sum, size_t _sumstep,
|
||||
double *sqsum, size_t _sqsumstep,
|
||||
int width, int height, int cn);
|
||||
|
||||
#endif
|
||||
} // end namespace opt_AVX512_SKX
|
||||
} // end namespace cv
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -3062,4 +3062,14 @@ TEST(ImgProc_BGR2RGBA, 3ch24ch)
|
||||
EXPECT_DOUBLE_EQ(cvtest::norm(expected - dst, NORM_INF), 0.);
|
||||
}
|
||||
|
||||
TEST(ImgProc_RGB2YUV, regression_13668)
|
||||
{
|
||||
Mat src(Size(32, 4), CV_8UC3, Scalar(9, 250, 82)); // Ensure that SIMD code path works
|
||||
Mat dst;
|
||||
cvtColor(src, dst, COLOR_RGB2YUV);
|
||||
Vec3b res = dst.at<Vec3b>(0, 0);
|
||||
Vec3b ref(159, 90, 0);
|
||||
EXPECT_EQ(res, ref);
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
@ -387,7 +387,7 @@ bool QRDetect::computeTransformationPoints()
|
||||
findNonZero(mask_roi, non_zero_elem[i]);
|
||||
newHull.insert(newHull.end(), non_zero_elem[i].begin(), non_zero_elem[i].end());
|
||||
}
|
||||
convexHull(Mat(newHull), locations);
|
||||
convexHull(newHull, locations);
|
||||
for (size_t i = 0; i < locations.size(); i++)
|
||||
{
|
||||
for (size_t j = 0; j < 3; j++)
|
||||
@ -556,7 +556,7 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
|
||||
}
|
||||
|
||||
vector<Point> integer_hull;
|
||||
convexHull(Mat(locations), integer_hull);
|
||||
convexHull(locations, integer_hull);
|
||||
int hull_size = (int)integer_hull.size();
|
||||
vector<Point2f> hull(hull_size);
|
||||
for (int i = 0; i < hull_size; i++)
|
||||
@ -901,7 +901,7 @@ bool QRDecode::versionDefinition()
|
||||
vector<Point> locations, non_zero_elem;
|
||||
Mat mask_roi = mask(Range(1, intermediate.rows - 1), Range(1, intermediate.cols - 1));
|
||||
findNonZero(mask_roi, non_zero_elem);
|
||||
convexHull(Mat(non_zero_elem), locations);
|
||||
convexHull(non_zero_elem, locations);
|
||||
Point offset = computeOffset(locations);
|
||||
|
||||
Point temp_remote = locations[0], remote_point;
|
||||
|
@ -646,8 +646,6 @@ private:
|
||||
Size size = frame0.size();
|
||||
UMat prevFlowX, prevFlowY, curFlowX, curFlowY;
|
||||
|
||||
flowx.create(size, CV_32F);
|
||||
flowy.create(size, CV_32F);
|
||||
UMat flowx0 = flowx;
|
||||
UMat flowy0 = flowy;
|
||||
|
||||
@ -1075,12 +1073,19 @@ private:
|
||||
return false;
|
||||
|
||||
std::vector<UMat> flowar;
|
||||
if (!_flow0.empty())
|
||||
|
||||
// If flag is set, check for integrity; if not set, allocate memory space
|
||||
if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
|
||||
{
|
||||
if (_flow0.empty() || _flow0.size() != _prev0.size() || _flow0.channels() != 2 ||
|
||||
_flow0.depth() != CV_32F)
|
||||
return false;
|
||||
split(_flow0, flowar);
|
||||
}
|
||||
else
|
||||
{
|
||||
flowar.push_back(UMat());
|
||||
flowar.push_back(UMat());
|
||||
flowar.push_back(UMat(_prev0.size(), CV_32FC1));
|
||||
flowar.push_back(UMat(_prev0.size(), CV_32FC1));
|
||||
}
|
||||
if(!this->operator()(_prev0.getUMat(), _next0.getUMat(), flowar[0], flowar[1])){
|
||||
return false;
|
||||
@ -1112,7 +1117,14 @@ void FarnebackOpticalFlowImpl::calc(InputArray _prev0, InputArray _next0,
|
||||
|
||||
CV_Assert( prev0.size() == next0.size() && prev0.channels() == next0.channels() &&
|
||||
prev0.channels() == 1 && pyrScale_ < 1 );
|
||||
_flow0.create( prev0.size(), CV_32FC2 );
|
||||
|
||||
// If flag is set, check for integrity; if not set, allocate memory space
|
||||
if( flags_ & OPTFLOW_USE_INITIAL_FLOW )
|
||||
CV_Assert( _flow0.size() == prev0.size() && _flow0.channels() == 2 &&
|
||||
_flow0.depth() == CV_32F );
|
||||
else
|
||||
_flow0.create( prev0.size(), CV_32FC2 );
|
||||
|
||||
Mat flow0 = _flow0.getMat();
|
||||
|
||||
for( k = 0, scale = 1; k < levels; k++ )
|
||||
|
@ -90,6 +90,18 @@ squeezenet:
|
||||
classes: "classification_classes_ILSVRC2012.txt"
|
||||
sample: "classification"
|
||||
|
||||
# Googlenet from https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet
|
||||
googlenet:
|
||||
model: "bvlc_googlenet.caffemodel"
|
||||
config: "bvlc_googlenet.prototxt"
|
||||
mean: [104, 117, 123]
|
||||
scale: 1.0
|
||||
width: 224
|
||||
height: 224
|
||||
rgb: false
|
||||
classes: "classification_classes_ILSVRC2012.txt"
|
||||
sample: "classification"
|
||||
|
||||
################################################################################
|
||||
# Semantic segmentation models.
|
||||
################################################################################
|
||||
|
@ -289,7 +289,7 @@ def removeUnusedNodesAndAttrs(to_remove, graph_def):
|
||||
op = graph_def.node[i].op
|
||||
name = graph_def.node[i].name
|
||||
|
||||
if op == 'Const' or to_remove(name, op):
|
||||
if to_remove(name, op):
|
||||
if op != 'Const':
|
||||
removedNodes.append(name)
|
||||
|
||||
|
@ -48,10 +48,42 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
||||
|
||||
removeIdentity(graph_def)
|
||||
|
||||
nodesToKeep = []
|
||||
def to_remove(name, op):
|
||||
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
|
||||
if name in nodesToKeep:
|
||||
return False
|
||||
return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
|
||||
(name.startswith('CropAndResize') and op != 'CropAndResize')
|
||||
|
||||
# Fuse atrous convolutions (with dilations).
|
||||
nodesMap = {node.name: node for node in graph_def.node}
|
||||
for node in reversed(graph_def.node):
|
||||
if node.op == 'BatchToSpaceND':
|
||||
del node.input[2]
|
||||
conv = nodesMap[node.input[0]]
|
||||
spaceToBatchND = nodesMap[conv.input[0]]
|
||||
|
||||
# Extract paddings
|
||||
stridedSlice = nodesMap[spaceToBatchND.input[2]]
|
||||
assert(stridedSlice.op == 'StridedSlice')
|
||||
pack = nodesMap[stridedSlice.input[0]]
|
||||
assert(pack.op == 'Pack')
|
||||
|
||||
padNodeH = nodesMap[nodesMap[pack.input[0]].input[0]]
|
||||
padNodeW = nodesMap[nodesMap[pack.input[1]].input[0]]
|
||||
padH = int(padNodeH.attr['value']['tensor'][0]['int_val'][0])
|
||||
padW = int(padNodeW.attr['value']['tensor'][0]['int_val'][0])
|
||||
|
||||
paddingsNode = NodeDef()
|
||||
paddingsNode.name = conv.name + '/paddings'
|
||||
paddingsNode.op = 'Const'
|
||||
paddingsNode.addAttr('value', [padH, padH, padW, padW])
|
||||
graph_def.node.insert(graph_def.node.index(spaceToBatchND), paddingsNode)
|
||||
nodesToKeep.append(paddingsNode.name)
|
||||
|
||||
spaceToBatchND.input[2] = paddingsNode.name
|
||||
|
||||
|
||||
removeUnusedNodesAndAttrs(to_remove, graph_def)
|
||||
|
||||
|
||||
@ -225,6 +257,26 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
||||
detectionOut.addAttr('variance_encoded_in_target', True)
|
||||
graph_def.node.extend([detectionOut])
|
||||
|
||||
def getUnconnectedNodes():
|
||||
unconnected = [node.name for node in graph_def.node]
|
||||
for node in graph_def.node:
|
||||
for inp in node.input:
|
||||
if inp in unconnected:
|
||||
unconnected.remove(inp)
|
||||
return unconnected
|
||||
|
||||
while True:
|
||||
unconnectedNodes = getUnconnectedNodes()
|
||||
unconnectedNodes.remove(detectionOut.name)
|
||||
if not unconnectedNodes:
|
||||
break
|
||||
|
||||
for name in unconnectedNodes:
|
||||
for i in range(len(graph_def.node)):
|
||||
if graph_def.node[i].name == name:
|
||||
del graph_def.node[i]
|
||||
break
|
||||
|
||||
# Save as text.
|
||||
graph_def.save(outputPath)
|
||||
|
||||
|
@ -55,7 +55,7 @@ graph_def = parseTextGraph(args.output)
|
||||
removeIdentity(graph_def)
|
||||
|
||||
def to_remove(name, op):
|
||||
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
|
||||
return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
|
||||
(name.startswith('CropAndResize') and op != 'CropAndResize')
|
||||
|
||||
removeUnusedNodesAndAttrs(to_remove, graph_def)
|
||||
|
@ -10,14 +10,60 @@
|
||||
# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function.
|
||||
# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
|
||||
import argparse
|
||||
import re
|
||||
from math import sqrt
|
||||
from tf_text_graph_common import *
|
||||
|
||||
class SSDAnchorGenerator:
|
||||
def __init__(self, min_scale, max_scale, num_layers, aspect_ratios,
|
||||
reduce_boxes_in_lowest_layer, image_width, image_height):
|
||||
self.min_scale = min_scale
|
||||
self.aspect_ratios = aspect_ratios
|
||||
self.reduce_boxes_in_lowest_layer = reduce_boxes_in_lowest_layer
|
||||
self.image_width = image_width
|
||||
self.image_height = image_height
|
||||
self.scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
|
||||
for i in range(num_layers)] + [1.0]
|
||||
|
||||
def get(self, layer_id):
|
||||
if layer_id == 0 and self.reduce_boxes_in_lowest_layer:
|
||||
widths = [0.1, self.min_scale * sqrt(2.0), self.min_scale * sqrt(0.5)]
|
||||
heights = [0.1, self.min_scale / sqrt(2.0), self.min_scale / sqrt(0.5)]
|
||||
else:
|
||||
widths = [self.scales[layer_id] * sqrt(ar) for ar in self.aspect_ratios]
|
||||
heights = [self.scales[layer_id] / sqrt(ar) for ar in self.aspect_ratios]
|
||||
|
||||
widths += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])]
|
||||
heights += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])]
|
||||
widths = [w * self.image_width for w in widths]
|
||||
heights = [h * self.image_height for h in heights]
|
||||
return widths, heights
|
||||
|
||||
|
||||
class MultiscaleAnchorGenerator:
|
||||
def __init__(self, min_level, aspect_ratios, scales_per_octave, anchor_scale):
|
||||
self.min_level = min_level
|
||||
self.aspect_ratios = aspect_ratios
|
||||
self.anchor_scale = anchor_scale
|
||||
self.scales = [2**(float(s) / scales_per_octave) for s in range(scales_per_octave)]
|
||||
|
||||
def get(self, layer_id):
|
||||
widths = []
|
||||
heights = []
|
||||
for a in self.aspect_ratios:
|
||||
for s in self.scales:
|
||||
base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale
|
||||
ar = sqrt(a)
|
||||
heights.append(base_anchor_size * s / ar)
|
||||
widths.append(base_anchor_size * s * ar)
|
||||
return widths, heights
|
||||
|
||||
|
||||
def createSSDGraph(modelPath, configPath, outputPath):
|
||||
# Nodes that should be kept.
|
||||
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm',
|
||||
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu', 'Relu6', 'Placeholder', 'FusedBatchNorm',
|
||||
'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity',
|
||||
'Sub']
|
||||
'Sub', 'ResizeNearestNeighbor', 'Pad']
|
||||
|
||||
# Node with which prefixes should be removed
|
||||
prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/map')
|
||||
@ -27,26 +73,50 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
config = config['model'][0]['ssd'][0]
|
||||
num_classes = int(config['num_classes'][0])
|
||||
|
||||
ssd_anchor_generator = config['anchor_generator'][0]['ssd_anchor_generator'][0]
|
||||
min_scale = float(ssd_anchor_generator['min_scale'][0])
|
||||
max_scale = float(ssd_anchor_generator['max_scale'][0])
|
||||
num_layers = int(ssd_anchor_generator['num_layers'][0])
|
||||
aspect_ratios = [float(ar) for ar in ssd_anchor_generator['aspect_ratios']]
|
||||
reduce_boxes_in_lowest_layer = True
|
||||
if 'reduce_boxes_in_lowest_layer' in ssd_anchor_generator:
|
||||
reduce_boxes_in_lowest_layer = ssd_anchor_generator['reduce_boxes_in_lowest_layer'][0] == 'true'
|
||||
|
||||
fixed_shape_resizer = config['image_resizer'][0]['fixed_shape_resizer'][0]
|
||||
image_width = int(fixed_shape_resizer['width'][0])
|
||||
image_height = int(fixed_shape_resizer['height'][0])
|
||||
|
||||
box_predictor = 'convolutional' if 'convolutional_box_predictor' in config['box_predictor'][0] else 'weight_shared_convolutional'
|
||||
|
||||
anchor_generator = config['anchor_generator'][0]
|
||||
if 'ssd_anchor_generator' in anchor_generator:
|
||||
ssd_anchor_generator = anchor_generator['ssd_anchor_generator'][0]
|
||||
min_scale = float(ssd_anchor_generator['min_scale'][0])
|
||||
max_scale = float(ssd_anchor_generator['max_scale'][0])
|
||||
num_layers = int(ssd_anchor_generator['num_layers'][0])
|
||||
aspect_ratios = [float(ar) for ar in ssd_anchor_generator['aspect_ratios']]
|
||||
reduce_boxes_in_lowest_layer = True
|
||||
if 'reduce_boxes_in_lowest_layer' in ssd_anchor_generator:
|
||||
reduce_boxes_in_lowest_layer = ssd_anchor_generator['reduce_boxes_in_lowest_layer'][0] == 'true'
|
||||
priors_generator = SSDAnchorGenerator(min_scale, max_scale, num_layers,
|
||||
aspect_ratios, reduce_boxes_in_lowest_layer,
|
||||
image_width, image_height)
|
||||
|
||||
|
||||
print('Scale: [%f-%f]' % (min_scale, max_scale))
|
||||
print('Aspect ratios: %s' % str(aspect_ratios))
|
||||
print('Reduce boxes in the lowest layer: %s' % str(reduce_boxes_in_lowest_layer))
|
||||
elif 'multiscale_anchor_generator' in anchor_generator:
|
||||
multiscale_anchor_generator = anchor_generator['multiscale_anchor_generator'][0]
|
||||
min_level = int(multiscale_anchor_generator['min_level'][0])
|
||||
max_level = int(multiscale_anchor_generator['max_level'][0])
|
||||
anchor_scale = float(multiscale_anchor_generator['anchor_scale'][0])
|
||||
aspect_ratios = [float(ar) for ar in multiscale_anchor_generator['aspect_ratios']]
|
||||
scales_per_octave = int(multiscale_anchor_generator['scales_per_octave'][0])
|
||||
num_layers = max_level - min_level + 1
|
||||
priors_generator = MultiscaleAnchorGenerator(min_level, aspect_ratios,
|
||||
scales_per_octave, anchor_scale)
|
||||
print('Levels: [%d-%d]' % (min_level, max_level))
|
||||
print('Anchor scale: %f' % anchor_scale)
|
||||
print('Scales per octave: %d' % scales_per_octave)
|
||||
print('Aspect ratios: %s' % str(aspect_ratios))
|
||||
else:
|
||||
print('Unknown anchor_generator')
|
||||
exit(0)
|
||||
|
||||
print('Number of classes: %d' % num_classes)
|
||||
print('Number of layers: %d' % num_layers)
|
||||
print('Scale: [%f-%f]' % (min_scale, max_scale))
|
||||
print('Aspect ratios: %s' % str(aspect_ratios))
|
||||
print('Reduce boxes in the lowest layer: %s' % str(reduce_boxes_in_lowest_layer))
|
||||
print('box predictor: %s' % box_predictor)
|
||||
print('Input image size: %dx%d' % (image_width, image_height))
|
||||
|
||||
@ -67,8 +137,8 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
return unconnected
|
||||
|
||||
|
||||
# Detect unfused batch normalization nodes and fuse them.
|
||||
def fuse_batch_normalization():
|
||||
def fuse_nodes(nodesToKeep):
|
||||
# Detect unfused batch normalization nodes and fuse them.
|
||||
# Add_0 <-- moving_variance, add_y
|
||||
# Rsqrt <-- Add_0
|
||||
# Mul_0 <-- Rsqrt, gamma
|
||||
@ -77,9 +147,15 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
# Sub_0 <-- beta, Mul_2
|
||||
# Add_1 <-- Mul_1, Sub_0
|
||||
nodesMap = {node.name: node for node in graph_def.node}
|
||||
subgraph = ['Add',
|
||||
subgraphBatchNorm = ['Add',
|
||||
['Mul', 'input', ['Mul', ['Rsqrt', ['Add', 'moving_variance', 'add_y']], 'gamma']],
|
||||
['Sub', 'beta', ['Mul', 'moving_mean', 'Mul_0']]]
|
||||
# Detect unfused nearest neighbor resize.
|
||||
subgraphResizeNN = ['Reshape',
|
||||
['Mul', ['Reshape', 'input', ['Pack', 'shape_1', 'shape_2', 'shape_3', 'shape_4', 'shape_5']],
|
||||
'ones'],
|
||||
['Pack', ['StridedSlice', ['Shape', 'input'], 'stack', 'stack_1', 'stack_2'],
|
||||
'out_height', 'out_width', 'out_channels']]
|
||||
def checkSubgraph(node, targetNode, inputs, fusedNodes):
|
||||
op = targetNode[0]
|
||||
if node.op == op and (len(node.input) >= len(targetNode) - 1):
|
||||
@ -100,7 +176,7 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
for node in graph_def.node:
|
||||
inputs = {}
|
||||
fusedNodes = []
|
||||
if checkSubgraph(node, subgraph, inputs, fusedNodes):
|
||||
if checkSubgraph(node, subgraphBatchNorm, inputs, fusedNodes):
|
||||
name = node.name
|
||||
node.Clear()
|
||||
node.name = name
|
||||
@ -112,15 +188,41 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
node.input.append(inputs['moving_variance'])
|
||||
node.addAttr('epsilon', 0.001)
|
||||
nodesToRemove += fusedNodes[1:]
|
||||
|
||||
inputs = {}
|
||||
fusedNodes = []
|
||||
if checkSubgraph(node, subgraphResizeNN, inputs, fusedNodes):
|
||||
name = node.name
|
||||
node.Clear()
|
||||
node.name = name
|
||||
node.op = 'ResizeNearestNeighbor'
|
||||
node.input.append(inputs['input'])
|
||||
node.input.append(name + '/output_shape')
|
||||
|
||||
out_height_node = nodesMap[inputs['out_height']]
|
||||
out_width_node = nodesMap[inputs['out_width']]
|
||||
out_height = int(out_height_node.attr['value']['tensor'][0]['int_val'][0])
|
||||
out_width = int(out_width_node.attr['value']['tensor'][0]['int_val'][0])
|
||||
|
||||
shapeNode = NodeDef()
|
||||
shapeNode.name = name + '/output_shape'
|
||||
shapeNode.op = 'Const'
|
||||
shapeNode.addAttr('value', [out_height, out_width])
|
||||
graph_def.node.insert(graph_def.node.index(node), shapeNode)
|
||||
nodesToKeep.append(shapeNode.name)
|
||||
|
||||
nodesToRemove += fusedNodes[1:]
|
||||
for node in nodesToRemove:
|
||||
graph_def.node.remove(node)
|
||||
|
||||
fuse_batch_normalization()
|
||||
nodesToKeep = []
|
||||
fuse_nodes(nodesToKeep)
|
||||
|
||||
removeIdentity(graph_def)
|
||||
|
||||
def to_remove(name, op):
|
||||
return (not op in keepOps) or name.startswith(prefixesToRemove)
|
||||
return (not name in nodesToKeep) and \
|
||||
(op == 'Const' or (not op in keepOps) or name.startswith(prefixesToRemove))
|
||||
|
||||
removeUnusedNodesAndAttrs(to_remove, graph_def)
|
||||
|
||||
@ -169,19 +271,15 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
graph_def.node.extend([flatten])
|
||||
addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten')
|
||||
|
||||
idx = 0
|
||||
num_matched_layers = 0
|
||||
for node in graph_def.node:
|
||||
if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx) or \
|
||||
node.name == ('WeightSharedConvolutionalBoxPredictor_%d/BoxPredictor/Conv2D' % idx) or \
|
||||
node.name == 'WeightSharedConvolutionalBoxPredictor/BoxPredictor/Conv2D':
|
||||
if re.match('BoxPredictor_\d/BoxEncodingPredictor/Conv2D', node.name) or \
|
||||
re.match('WeightSharedConvolutionalBoxPredictor(_\d)*/BoxPredictor/Conv2D', node.name):
|
||||
node.addAttr('loc_pred_transposed', True)
|
||||
idx += 1
|
||||
assert(idx == num_layers)
|
||||
num_matched_layers += 1
|
||||
assert(num_matched_layers == num_layers)
|
||||
|
||||
# Add layers that generate anchors (bounding boxes proposals).
|
||||
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
|
||||
for i in range(num_layers)] + [1.0]
|
||||
|
||||
priorBoxes = []
|
||||
for i in range(num_layers):
|
||||
priorBox = NodeDef()
|
||||
@ -199,17 +297,8 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
priorBox.addAttr('flip', False)
|
||||
priorBox.addAttr('clip', False)
|
||||
|
||||
if i == 0 and reduce_boxes_in_lowest_layer:
|
||||
widths = [0.1, min_scale * sqrt(2.0), min_scale * sqrt(0.5)]
|
||||
heights = [0.1, min_scale / sqrt(2.0), min_scale / sqrt(0.5)]
|
||||
else:
|
||||
widths = [scales[i] * sqrt(ar) for ar in aspect_ratios]
|
||||
heights = [scales[i] / sqrt(ar) for ar in aspect_ratios]
|
||||
widths, heights = priors_generator.get(i)
|
||||
|
||||
widths += [sqrt(scales[i] * scales[i + 1])]
|
||||
heights += [sqrt(scales[i] * scales[i + 1])]
|
||||
widths = [w * image_width for w in widths]
|
||||
heights = [h * image_height for h in heights]
|
||||
priorBox.addAttr('width', widths)
|
||||
priorBox.addAttr('height', heights)
|
||||
priorBox.addAttr('variance', [0.1, 0.1, 0.2, 0.2])
|
||||
@ -217,6 +306,7 @@ def createSSDGraph(modelPath, configPath, outputPath):
|
||||
graph_def.node.extend([priorBox])
|
||||
priorBoxes.append(priorBox.name)
|
||||
|
||||
# Compare this layer's output with Postprocessor/Reshape
|
||||
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
|
||||
|
||||
# Sigmoid for classes predictions and DetectionOutput layer
|
||||
|
Loading…
Reference in New Issue
Block a user