Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2020-05-18 17:50:16 +00:00
commit 593af7287b
28 changed files with 375 additions and 173 deletions

View File

@ -146,7 +146,7 @@ npm install canvas jsdom
@code{.js} @code{.js}
const { Canvas, createCanvas, Image, ImageData, loadImage } = require('canvas'); const { Canvas, createCanvas, Image, ImageData, loadImage } = require('canvas');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const { writeFileSync } = require('fs'); const { writeFileSync, existsSync, mkdirSync } = require("fs");
// This is our program. This time we use JavaScript async / await and promises to handle asynchronicity. // This is our program. This time we use JavaScript async / await and promises to handle asynchronicity.
(async () => { (async () => {

View File

@ -4,20 +4,20 @@ OCR of Hand-written Data using kNN {#tutorial_py_knn_opencv}
Goal Goal
---- ----
In this chapter In this chapter:
- We will use our knowledge on kNN to build a basic OCR application. - We will use our knowledge on kNN to build a basic OCR (Optical Character Recognition) application.
- We will try with Digits and Alphabets data available that comes with OpenCV. - We will try our application on Digits and Alphabets data that comes with OpenCV.
OCR of Hand-written Digits OCR of Hand-written Digits
-------------------------- --------------------------
Our goal is to build an application which can read the handwritten digits. For this we need some Our goal is to build an application which can read handwritten digits. For this we need some
train_data and test_data. OpenCV comes with an image digits.png (in the folder training data and some test data. OpenCV comes with an image digits.png (in the folder
opencv/samples/data/) which has 5000 handwritten digits (500 for each digit). Each digit is opencv/samples/data/) which has 5000 handwritten digits (500 for each digit). Each digit is
a 20x20 image. So our first step is to split this image into 5000 different digits. For each digit, a 20x20 image. So our first step is to split this image into 5000 different digit images. Then for each digit (20x20 image),
we flatten it into a single row with 400 pixels. That is our feature set, ie intensity values of all we flatten it into a single row with 400 pixels. That is our feature set, i.e. intensity values of all
pixels. It is the simplest feature set we can create. We use first 250 samples of each digit as pixels. It is the simplest feature set we can create. We use the first 250 samples of each digit as
train_data, and next 250 samples as test_data. So let's prepare them first. training data, and the other 250 samples as test data. So let's prepare them first.
@code{.py} @code{.py}
import numpy as np import numpy as np
import cv2 as cv import cv2 as cv
@ -28,10 +28,10 @@ gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# Now we split the image to 5000 cells, each 20x20 size # Now we split the image to 5000 cells, each 20x20 size
cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)] cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]
# Make it into a Numpy array. It size will be (50,100,20,20) # Make it into a Numpy array: its size will be (50,100,20,20)
x = np.array(cells) x = np.array(cells)
# Now we prepare train_data and test_data. # Now we prepare the training data and test data
train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400) train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400)
test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400) test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400)
@ -40,7 +40,7 @@ k = np.arange(10)
train_labels = np.repeat(k,250)[:,np.newaxis] train_labels = np.repeat(k,250)[:,np.newaxis]
test_labels = train_labels.copy() test_labels = train_labels.copy()
# Initiate kNN, train the data, then test it with test data for k=1 # Initiate kNN, train it on the training data, then test it with the test data with k=1
knn = cv.ml.KNearest_create() knn = cv.ml.KNearest_create()
knn.train(train, cv.ml.ROW_SAMPLE, train_labels) knn.train(train, cv.ml.ROW_SAMPLE, train_labels)
ret,result,neighbours,dist = knn.findNearest(test,k=5) ret,result,neighbours,dist = knn.findNearest(test,k=5)
@ -52,13 +52,15 @@ correct = np.count_nonzero(matches)
accuracy = correct*100.0/result.size accuracy = correct*100.0/result.size
print( accuracy ) print( accuracy )
@endcode @endcode
So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option to
improve accuracy is to add more data for training, especially the wrong ones. So instead of finding improve accuracy is to add more data for training, especially for the digits where we had more errors.
this training data every time I start application, I better save it, so that next time, I directly
read this data from a file and start classification. You can do it with the help of some Numpy Instead of finding
functions like np.savetxt, np.savez, np.load etc. Please check their docs for more details. this training data every time I start the application, I better save it, so that the next time, I can directly
read this data from a file and start classification. This can be done with the help of some Numpy
functions like np.savetxt, np.savez, np.load, etc. Please check the NumPy docs for more details.
@code{.py} @code{.py}
# save the data # Save the data
np.savez('knn_data.npz',train=train, train_labels=train_labels) np.savez('knn_data.npz',train=train, train_labels=train_labels)
# Now load the data # Now load the data
@ -71,36 +73,36 @@ In my system, it takes around 4.4 MB of memory. Since we are using intensity val
features, it would be better to convert the data to np.uint8 first and then save it. It takes only features, it would be better to convert the data to np.uint8 first and then save it. It takes only
1.1 MB in this case. Then while loading, you can convert back into float32. 1.1 MB in this case. Then while loading, you can convert back into float32.
OCR of English Alphabets OCR of the English Alphabet
------------------------ ------------------------
Next we will do the same for English alphabets, but there is a slight change in data and feature Next we will do the same for the English alphabet, but there is a slight change in data and feature
set. Here, instead of images, OpenCV comes with a data file, letter-recognition.data in set. Here, instead of images, OpenCV comes with a data file, letter-recognition.data in
opencv/samples/cpp/ folder. If you open it, you will see 20000 lines which may, on first sight, look opencv/samples/cpp/ folder. If you open it, you will see 20000 lines which may, on first sight, look
like garbage. Actually, in each row, first column is an alphabet which is our label. Next 16 numbers like garbage. Actually, in each row, the first column is a letter which is our label. The next 16 numbers
following it are its different features. These features are obtained from [UCI Machine Learning following it are the different features. These features are obtained from the [UCI Machine Learning
Repository](http://archive.ics.uci.edu/ml/). You can find the details of these features in [this Repository](http://archive.ics.uci.edu/ml/). You can find the details of these features in [this
page](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition). page](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition).
There are 20000 samples available, so we take first 10000 data as training samples and remaining There are 20000 samples available, so we take the first 10000 as training samples and the remaining
10000 as test samples. We should change the alphabets to ascii characters because we can't work with 10000 as test samples. We should change the letters to ascii characters because we can't work with
alphabets directly. letters directly.
@code{.py} @code{.py}
import cv2 as cv import cv2 as cv
import numpy as np import numpy as np
# Load the data, converters convert the letter to a number # Load the data and convert the letters to numbers
data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',', data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',',
converters= {0: lambda ch: ord(ch)-ord('A')}) converters= {0: lambda ch: ord(ch)-ord('A')})
# split the data to two, 10000 each for train and test # Split the dataset in two, with 10000 samples each for training and test sets
train, test = np.vsplit(data,2) train, test = np.vsplit(data,2)
# split trainData and testData to features and responses # Split trainData and testData into features and responses
responses, trainData = np.hsplit(train,[1]) responses, trainData = np.hsplit(train,[1])
labels, testData = np.hsplit(test,[1]) labels, testData = np.hsplit(test,[1])
# Initiate the kNN, classify, measure accuracy. # Initiate the kNN, classify, measure accuracy
knn = cv.ml.KNearest_create() knn = cv.ml.KNearest_create()
knn.train(trainData, cv.ml.ROW_SAMPLE, responses) knn.train(trainData, cv.ml.ROW_SAMPLE, responses)
ret, result, neighbours, dist = knn.findNearest(testData, k=5) ret, result, neighbours, dist = knn.findNearest(testData, k=5)
@ -110,10 +112,12 @@ accuracy = correct*100.0/10000
print( accuracy ) print( accuracy )
@endcode @endcode
It gives me an accuracy of 93.22%. Again, if you want to increase accuracy, you can iteratively add It gives me an accuracy of 93.22%. Again, if you want to increase accuracy, you can iteratively add
error data in each level. more data.
Additional Resources Additional Resources
-------------------- --------------------
1. [Wikipedia article on Optical character recognition](https://en.wikipedia.org/wiki/Optical_character_recognition)
Exercises Exercises
--------- ---------
1. Here we used k=5. What happens if you try other values of k? Can you find a value that maximizes accuracy (minimizes the number of errors)?

View File

@ -4,61 +4,55 @@ Understanding k-Nearest Neighbour {#tutorial_py_knn_understanding}
Goal Goal
---- ----
In this chapter, we will understand the concepts of k-Nearest Neighbour (kNN) algorithm. In this chapter, we will understand the concepts of the k-Nearest Neighbour (kNN) algorithm.
Theory Theory
------ ------
kNN is one of the simplest of classification algorithms available for supervised learning. The idea kNN is one of the simplest classification algorithms available for supervised learning. The idea
is to search for closest match of the test data in feature space. We will look into it with below is to search for the closest match(es) of the test data in the feature space. We will look into it with the below
image. image.
![image](images/knn_theory.png) ![image](images/knn_theory.png)
In the image, there are two families, Blue Squares and Red Triangles. We call each family as In the image, there are two families: Blue Squares and Red Triangles. We refer to each family as
**Class**. Their houses are shown in their town map which we call feature space. *(You can consider a **Class**. Their houses are shown in their town map which we call the **Feature Space**. You can consider
a feature space as a space where all datas are projected. For example, consider a 2D coordinate a feature space as a space where all data are projected. For example, consider a 2D coordinate
space. Each data has two features, x and y coordinates. You can represent this data in your 2D space. Each datum has two features, a x coordinate and a y coordinate. You can represent this datum in your 2D
coordinate space, right? Now imagine if there are three features, you need 3D space. Now consider N coordinate space, right? Now imagine that there are three features, you will need 3D space. Now consider N
features, where you need N-dimensional space, right? This N-dimensional space is its feature space. features: you need N-dimensional space, right? This N-dimensional space is its feature space.
In our image, you can consider it as a 2D case with two features)*. In our image, you can consider it as a 2D case with two features.
Now a new member comes into the town and creates a new home, which is shown as green circle. He Now consider what happens if a new member comes into the town and creates a new home, which is shown as the green circle. He
should be added to one of these Blue/Red families. We call that process, **Classification**. What we should be added to one of these Blue or Red families (or *classes*). We call that process, **Classification**. How exactly should this new member be classified? Since we are dealing with kNN, let us apply the algorithm.
do? Since we are dealing with kNN, let us apply this algorithm.
One method is to check who is his nearest neighbour. From the image, it is clear it is the Red One simple method is to check who is his nearest neighbour. From the image, it is clear that it is a member of the Red
Triangle family. So he is also added into Red Triangle. This method is called simply **Nearest Triangle family. So he is classified as a Red Triangle. This method is called simply **Nearest Neighbour** classification, because classification depends only on the *nearest neighbour*.
Neighbour**, because classification depends only on the nearest neighbour.
But there is a problem with that. Red Triangle may be the nearest. But what if there are lot of Blue But there is a problem with this approach! Red Triangle may be the nearest neighbour, but what if there are also a lot of Blue
Squares near to him? Then Blue Squares have more strength in that locality than Red Triangle. So Squares nearby? Then Blue Squares have more strength in that locality than Red Triangles, so
just checking nearest one is not sufficient. Instead we check some k nearest families. Then whoever just checking the nearest one is not sufficient. Instead we may want to check some **k** nearest families. Then whichever family is the majority amongst them, the new guy should belong to that family. In our image, let's take k=3, i.e. consider the 3 nearest
is majority in them, the new guy belongs to that family. In our image, let's take k=3, ie 3 nearest neighbours. The new member has two Red neighbours and one Blue neighbour (there are two Blues equidistant, but since k=3, we can take only
families. He has two Red and one Blue (there are two Blues equidistant, but since k=3, we take only
one of them), so again he should be added to Red family. But what if we take k=7? Then he has 5 Blue one of them), so again he should be added to Red family. But what if we take k=7? Then he has 5 Blue
families and 2 Red families. Great!! Now he should be added to Blue family. So it all changes with neighbours and 2 Red neighbours and should be added to the Blue family. The result will vary with the selected
value of k. More funny thing is, what if k = 4? He has 2 Red and 2 Blue neighbours. It is a tie !!! value of k. Note that if k is not an odd number, we can get a tie, as would happen in the above case with k=4. We would see that our new member has 2 Red and 2 Blue neighbours as his four nearest neighbours and we would need to choose a method for breaking the tie to perform classification. So to reiterate, this method is called **k-Nearest Neighbour** since
So better take k as an odd number. So this method is called **k-Nearest Neighbour** since classification depends on the *k nearest neighbours*.
classification depends on k nearest neighbours.
Again, in kNN, it is true we are considering k neighbours, but we are giving equal importance to Again, in kNN, it is true we are considering k neighbours, but we are giving equal importance to
all, right? Is it justice? For example, take the case of k=4. We told it is a tie. But see, the 2 all, right? Is this justified? For example, take the tied case of k=4. As we can see, the 2
Red families are more closer to him than the other 2 Blue families. So he is more eligible to be Red neighbours are actually closer to the new member than the other 2 Blue neighbours, so he is more eligible to be
added to Red. So how do we mathematically explain that? We give some weights to each family added to the Red family. How do we mathematically explain that? We give some weights to each neighbour
depending on their distance to the new-comer. For those who are near to him get higher weights while depending on their distance to the new-comer: those who are nearer to him get higher weights, while
those are far away get lower weights. Then we add total weights of each family separately. Whoever those that are farther away get lower weights. Then we add the total weights of each family separately and classify the new-comer as part of whichever family
gets highest total weights, new-comer goes to that family. This is called **modified kNN**. received higher total weights. This is called **modified kNN** or **weighted kNN**.
So what are some important things you see here? So what are some important things you see here?
- You need to have information about all the houses in town, right? Because, we have to check - Because we have to check
the distance from new-comer to all the existing houses to find the nearest neighbour. If there the distance from the new-comer to all the existing houses to find the nearest neighbour(s), you need to have information about all of the houses in town, right? If there are plenty of houses and families, it takes a lot of memory, and also more time for calculation.
are plenty of houses and families, it takes lots of memory, and more time for calculation - There is almost zero time for any kind of "training" or preparation. Our "learning" involves only memorizing (storing) the data, before testing and classifying.
also.
- There is almost zero time for any kind of training or preparation.
Now let's see it in OpenCV. Now let's see this algorithm at work in OpenCV.
kNN in OpenCV kNN in OpenCV
------------- -------------
@ -67,11 +61,11 @@ We will do a simple example here, with two families (classes), just like above.
chapter, we will do an even better example. chapter, we will do an even better example.
So here, we label the Red family as **Class-0** (so denoted by 0) and Blue family as **Class-1** So here, we label the Red family as **Class-0** (so denoted by 0) and Blue family as **Class-1**
(denoted by 1). We create 25 families or 25 training data, and label them either Class-0 or Class-1. (denoted by 1). We create 25 neighbours or 25 training data, and label each of them as either part of Class-0 or Class-1.
We do all these with the help of Random Number Generator in Numpy. We can do this with the help of a Random Number Generator from NumPy.
Then we plot it with the help of Matplotlib. Red families are shown as Red Triangles and Blue Then we can plot it with the help of Matplotlib. Red neighbours are shown as Red Triangles and Blue
families are shown as Blue Squares. neighbours are shown as Blue Squares.
@code{.py} @code{.py}
import cv2 as cv import cv2 as cv
import numpy as np import numpy as np
@ -80,36 +74,36 @@ import matplotlib.pyplot as plt
# Feature set containing (x,y) values of 25 known/training data # Feature set containing (x,y) values of 25 known/training data
trainData = np.random.randint(0,100,(25,2)).astype(np.float32) trainData = np.random.randint(0,100,(25,2)).astype(np.float32)
# Labels each one either Red or Blue with numbers 0 and 1 # Label each one either Red or Blue with numbers 0 and 1
responses = np.random.randint(0,2,(25,1)).astype(np.float32) responses = np.random.randint(0,2,(25,1)).astype(np.float32)
# Take Red families and plot them # Take Red neighbours and plot them
red = trainData[responses.ravel()==0] red = trainData[responses.ravel()==0]
plt.scatter(red[:,0],red[:,1],80,'r','^') plt.scatter(red[:,0],red[:,1],80,'r','^')
# Take Blue families and plot them # Take Blue neighbours and plot them
blue = trainData[responses.ravel()==1] blue = trainData[responses.ravel()==1]
plt.scatter(blue[:,0],blue[:,1],80,'b','s') plt.scatter(blue[:,0],blue[:,1],80,'b','s')
plt.show() plt.show()
@endcode @endcode
You will get something similar to our first image. Since you are using random number generator, you You will get something similar to our first image. Since you are using a random number generator, you
will be getting different data each time you run the code. will get different data each time you run the code.
Next initiate the kNN algorithm and pass the trainData and responses to train the kNN (It constructs Next initiate the kNN algorithm and pass the trainData and responses to train the kNN. (Underneath the hood, it constructs
a search tree). a search tree: see the Additional Resources section below for more information on this.)
Then we will bring one new-comer and classify him to a family with the help of kNN in OpenCV. Before Then we will bring one new-comer and classify him as belonging to a family with the help of kNN in OpenCV. Before
going to kNN, we need to know something on our test data (data of new comers). Our data should be a running kNN, we need to know something about our test data (data of new comers). Our data should be a
floating point array with size \f$number \; of \; testdata \times number \; of \; features\f$. Then we floating point array with size \f$number \; of \; testdata \times number \; of \; features\f$. Then we
find the nearest neighbours of new-comer. We can specify how many neighbours we want. It returns: find the nearest neighbours of the new-comer. We can specify *k*: how many neighbours we want. (Here we used 3.) It returns:
-# The label given to new-comer depending upon the kNN theory we saw earlier. If you want Nearest 1. The label given to the new-comer depending upon the kNN theory we saw earlier. If you want the *Nearest
Neighbour algorithm, just specify k=1 where k is the number of neighbours. Neighbour* algorithm, just specify k=1.
2. The labels of k-Nearest Neighbours. 2. The labels of the k-Nearest Neighbours.
3. Corresponding distances from new-comer to each nearest neighbour. 3. The corresponding distances from the new-comer to each nearest neighbour.
So let's see how it works. New comer is marked in green color. So let's see how it works. The new-comer is marked in green.
@code{.py} @code{.py}
newcomer = np.random.randint(0,100,(1,2)).astype(np.float32) newcomer = np.random.randint(0,100,(1,2)).astype(np.float32)
plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o') plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o')
@ -124,21 +118,21 @@ print( "distance: {}\n".format(dist) )
plt.show() plt.show()
@endcode @endcode
I got the result as follows: I got the following results:
@code{.py} @code{.py}
result: [[ 1.]] result: [[ 1.]]
neighbours: [[ 1. 1. 1.]] neighbours: [[ 1. 1. 1.]]
distance: [[ 53. 58. 61.]] distance: [[ 53. 58. 61.]]
@endcode @endcode
It says our new-comer got 3 neighbours, all from Blue family. Therefore, he is labelled as Blue It says that our new-comer's 3 nearest neighbours are all from the Blue family. Therefore, he is labelled as part of the Blue
family. It is obvious from plot below: family. It is obvious from the plot below:
![image](images/knn_simple.png) ![image](images/knn_simple.png)
If you have large number of data, you can just pass it as array. Corresponding results are also If you have multiple new-comers (test data), you can just pass them as an array. Corresponding results are also
obtained as arrays. obtained as arrays.
@code{.py} @code{.py}
# 10 new comers # 10 new-comers
newcomers = np.random.randint(0,100,(10,2)).astype(np.float32) newcomers = np.random.randint(0,100,(10,2)).astype(np.float32)
ret, results,neighbours,dist = knn.findNearest(newcomer, 3) ret, results,neighbours,dist = knn.findNearest(newcomer, 3)
# The results also will contain 10 labels. # The results also will contain 10 labels.
@ -146,8 +140,11 @@ ret, results,neighbours,dist = knn.findNearest(newcomer, 3)
Additional Resources Additional Resources
-------------------- --------------------
-# [NPTEL notes on Pattern Recognition, Chapter 1. [NPTEL notes on Pattern Recognition, Chapter
11](http://www.nptel.iitm.ac.in/courses/106108057/12) 11](https://nptel.ac.in/courses/106/108/106108057/)
2. [Wikipedia article on Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search)
3. [Wikipedia article on k-d tree](https://en.wikipedia.org/wiki/K-d_tree)
Exercises Exercises
--------- ---------
1. Try repeating the above with more classes and different choices of k. Does choosing k become harder with more classes in the same 2D feature space?

View File

@ -347,7 +347,8 @@ public:
htext(nstripes, NULL), htext(nstripes, NULL),
cbuf0(nstripes, NULL), cbuf0(nstripes, NULL),
sad_short(nstripes, NULL), sad_short(nstripes, NULL),
hsad_short(nstripes, NULL) hsad_short(nstripes, NULL),
prefilter()
{ {
const int wsz = params.SADWindowSize; const int wsz = params.SADWindowSize;
const int ndisp = params.numDisparities; const int ndisp = params.numDisparities;
@ -379,7 +380,7 @@ public:
if (params.useNormPrefilter()) if (params.useNormPrefilter())
{ {
for (size_t i = 0; i < 2; ++i) for (size_t i = 0; i < 2; ++i)
area.allocate(prefilter[0], width + params.preFilterSize + 2); area.allocate(prefilter[i], width + params.preFilterSize + 2);
} }
area.commit(); area.commit();

View File

@ -809,6 +809,55 @@ protected:
} }
}; };
TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); }
/* < preFilter, < preFilterCap, SADWindowSize > >*/
typedef tuple < int, tuple < int, int > > BufferBM_Params_t;
typedef testing::TestWithParam< BufferBM_Params_t > Calib3d_StereoBM_BufferBM;
const int preFilters[] =
{
StereoBM::PREFILTER_NORMALIZED_RESPONSE,
StereoBM::PREFILTER_XSOBEL
};
const tuple < int, int > useShortsConditions[] =
{
make_tuple(30, 19),
make_tuple(32, 23)
};
TEST_P(Calib3d_StereoBM_BufferBM, memAllocsTest)
{
const int preFilter = get<0>(GetParam());
const int preFilterCap = get<0>(get<1>(GetParam()));
const int SADWindowSize = get<1>(get<1>(GetParam()));
String path = cvtest::TS::ptr()->get_data_path() + "cv/stereomatching/datasets/teddy/";
Mat leftImg = imread(path + "im2.png", 0);
ASSERT_FALSE(leftImg.empty());
Mat rightImg = imread(path + "im6.png", 0);
ASSERT_FALSE(rightImg.empty());
Mat leftDisp;
{
Ptr<StereoBM> bm = StereoBM::create(16,9);
bm->setPreFilterType(preFilter);
bm->setPreFilterCap(preFilterCap);
bm->setBlockSize(SADWindowSize);
bm->compute( leftImg, rightImg, leftDisp);
ASSERT_FALSE(leftDisp.empty());
}
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Calib3d_StereoBM_BufferBM,
testing::Combine(
testing::ValuesIn(preFilters),
testing::ValuesIn(useShortsConditions)
)
);
//----------------------------------- StereoSGBM test ----------------------------------------------------- //----------------------------------- StereoSGBM test -----------------------------------------------------
class CV_StereoSGBMTest : public CV_StereoMatchingTest class CV_StereoSGBMTest : public CV_StereoMatchingTest
@ -869,8 +918,6 @@ protected:
} }
}; };
TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); }
TEST(Calib3d_StereoSGBM, regression) { CV_StereoSGBMTest test; test.safe_run(); } TEST(Calib3d_StereoSGBM, regression) { CV_StereoSGBMTest test; test.safe_run(); }
TEST(Calib3d_StereoSGBM_HH4, regression) TEST(Calib3d_StereoSGBM_HH4, regression)

View File

@ -210,8 +210,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); }
#define IPP_DISABLE_HOUGH 1 // improper integration/results #define IPP_DISABLE_HOUGH 1 // improper integration/results
#define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7 #define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7
#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3)
// Temporary disabled named IPP region. Performance // Temporary disabled named IPP region. Performance
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) #define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)

View File

@ -100,6 +100,15 @@ public:
dstWeightsData[i] = w; dstWeightsData[i] = w;
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
} }
// We will use blobs to store origin weights and bias to restore them in case of reinitialization.
weights_.copyTo(blobs[0].reshape(1, 1));
bias_.copyTo(blobs[1].reshape(1, 1));
}
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
blobs[0].reshape(1, 1).copyTo(weights_);
blobs[1].reshape(1, 1).copyTo(bias_);
} }
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
@ -392,7 +401,7 @@ public:
shape[1] = weights_.total(); shape[1] = weights_.total();
auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data); auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data);
auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data); auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data);
auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); auto scale_node = std::make_shared<ngraph::op::v0::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY); auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift)); return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
} }

View File

@ -25,8 +25,8 @@ namespace cv { namespace dnn {
class ResizeLayerImpl : public ResizeLayer class ResizeLayerImpl : public ResizeLayer
{ {
public: public:
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get<int>("zoom_factor_x", params.get<int>("zoom_factor", 0))), ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get<float>("zoom_factor_x", params.get<float>("zoom_factor", 0))),
zoomFactorHeight(params.get<int>("zoom_factor_y", params.get<int>("zoom_factor", 0))), zoomFactorHeight(params.get<float>("zoom_factor_y", params.get<float>("zoom_factor", 0))),
scaleWidth(0), scaleHeight(0) scaleWidth(0), scaleHeight(0)
{ {
setParamsFrom(params); setParamsFrom(params);
@ -256,7 +256,7 @@ public:
protected: protected:
int outWidth, outHeight; int outWidth, outHeight;
const int zoomFactorWidth, zoomFactorHeight; const float zoomFactorWidth, zoomFactorHeight;
String interpolation; String interpolation;
float scaleWidth, scaleHeight; float scaleWidth, scaleHeight;
bool alignCorners; bool alignCorners;

View File

@ -313,7 +313,7 @@ public:
auto weight = blobs.empty() ? ieInpNode1 : auto weight = blobs.empty() ? ieInpNode1 :
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY); node = std::make_shared<ngraph::op::v0::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
} }
if (hasBias || !hasWeights) if (hasBias || !hasWeights)
{ {

View File

@ -495,8 +495,9 @@ public:
ResizeBilinearSubgraph() ResizeBilinearSubgraph()
{ {
int input = addNodeToMatch(""); int input = addNodeToMatch("");
int shapeSource = addNodeToMatch("");
int shape = addNodeToMatch("Shape", input); int shape = addNodeToMatch("Shape", shapeSource);
int stack = addNodeToMatch("Const"); int stack = addNodeToMatch("Const");
int stack_1 = addNodeToMatch("Const"); int stack_1 = addNodeToMatch("Const");
int stack_2 = addNodeToMatch("Const"); int stack_2 = addNodeToMatch("Const");
@ -504,7 +505,7 @@ public:
int factorY = addNodeToMatch("Const"); int factorY = addNodeToMatch("Const");
int mul = addNodeToMatch("Mul", strided_slice, factorY); int mul = addNodeToMatch("Mul", strided_slice, factorY);
shape = addNodeToMatch("Shape", input); shape = addNodeToMatch("Shape", shapeSource);
stack = addNodeToMatch("Const"); stack = addNodeToMatch("Const");
stack_1 = addNodeToMatch("Const"); stack_1 = addNodeToMatch("Const");
stack_2 = addNodeToMatch("Const"); stack_2 = addNodeToMatch("Const");
@ -519,6 +520,51 @@ public:
} }
}; };
// In case of resizing by factor.
class ResizeBilinearSubgraphDown : public TFSubgraph
{
public:
ResizeBilinearSubgraphDown()
{
int input = addNodeToMatch("");
int shapeSource = addNodeToMatch("");
int shape = addNodeToMatch("Shape", shapeSource);
int stack = addNodeToMatch("Const");
int stack_1 = addNodeToMatch("Const");
int stack_2 = addNodeToMatch("Const");
int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
int factorY = addNodeToMatch("Const");
int div = addNodeToMatch("RealDiv", addNodeToMatch("Cast", strided_slice), factorY);
int cast = addNodeToMatch("Cast", div);
shape = addNodeToMatch("Shape", shapeSource);
stack = addNodeToMatch("Const");
stack_1 = addNodeToMatch("Const");
stack_2 = addNodeToMatch("Const");
strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
int factorX = addNodeToMatch("Const");
int div_1 = addNodeToMatch("RealDiv", addNodeToMatch("Cast", strided_slice), factorX);
int cast_1 = addNodeToMatch("Cast", div_1);
int pack = addNodeToMatch("Pack", cast, cast_1);
addNodeToMatch("ResizeBilinear", input, pack);
setFusedNode("ResizeBilinear", input, factorY, factorX);
}
virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
{
for (int i = 1; i < 3; ++i)
{
tensorflow::TensorProto* factor = inputNodes[i]->mutable_attr()->at("value").mutable_tensor();
factor->set_double_val(0, 1.0 / factor->double_val(0));
}
}
};
// In case of resizing by factor. // In case of resizing by factor.
class UpsamplingKerasSubgraph : public TFSubgraph class UpsamplingKerasSubgraph : public TFSubgraph
{ {
@ -702,6 +748,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(true))); subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(true)));
subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false))); subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false)));
subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown()));
for (int i = 0; i < net.node_size(); ++i) for (int i = 0; i < net.node_size(); ++i)
{ {

View File

@ -1932,10 +1932,10 @@ void TFImporter::populateNet(Net dstNet)
{ {
Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
CV_CheckTypeEQ(factorHeight.type(), CV_32SC1, ""); CV_CheckEQ(factorHeight.total(), (size_t)1, ""); factorHeight.convertTo(factorHeight, CV_32F);
CV_CheckTypeEQ(factorWidth.type(), CV_32SC1, ""); CV_CheckEQ(factorWidth.total(), (size_t)1, ""); factorWidth.convertTo(factorWidth, CV_32F);
layerParams.set("zoom_factor_x", factorWidth.at<int>(0)); layerParams.set("zoom_factor_x", factorWidth.at<float>(0));
layerParams.set("zoom_factor_y", factorHeight.at<int>(0)); layerParams.set("zoom_factor_y", factorHeight.at<float>(0));
} }
else else
CV_Assert(layer.input_size() == 2 || layer.input_size() == 3); CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);

View File

@ -110,6 +110,7 @@ void normAssertDetections(
int testClassId = testClassIds[i]; int testClassId = testClassIds[i];
const cv::Rect2d& testBox = testBoxes[i]; const cv::Rect2d& testBox = testBoxes[i];
bool matched = false; bool matched = false;
double topIoU = 0;
for (int j = 0; j < refBoxes.size() && !matched; ++j) for (int j = 0; j < refBoxes.size() && !matched; ++j)
{ {
if (!matchedRefBoxes[j] && testClassId == refClassIds[j] && if (!matchedRefBoxes[j] && testClassId == refClassIds[j] &&
@ -117,7 +118,8 @@ void normAssertDetections(
{ {
double interArea = (testBox & refBoxes[j]).area(); double interArea = (testBox & refBoxes[j]).area();
double iou = interArea / (testBox.area() + refBoxes[j].area() - interArea); double iou = interArea / (testBox.area() + refBoxes[j].area() - interArea);
if (std::abs(iou - 1.0) < boxes_iou_diff) topIoU = std::max(topIoU, iou);
if (1.0 - iou < boxes_iou_diff)
{ {
matched = true; matched = true;
matchedRefBoxes[j] = true; matchedRefBoxes[j] = true;
@ -125,8 +127,11 @@ void normAssertDetections(
} }
} }
if (!matched) if (!matched)
{
std::cout << cv::format("Unmatched prediction: class %d score %f box ", std::cout << cv::format("Unmatched prediction: class %d score %f box ",
testClassId, testScore) << testBox << std::endl; testClassId, testScore) << testBox << std::endl;
std::cout << "Highest IoU: " << topIoU << std::endl;
}
EXPECT_TRUE(matched) << comment; EXPECT_TRUE(matched) << comment;
} }

View File

@ -485,7 +485,7 @@ TEST_P(Test_Darknet_nets, YOLOv3)
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{ {
scoreDiff = 0.006; scoreDiff = 0.006;
iouDiff = 0.018; iouDiff = 0.042;
} }
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
@ -513,15 +513,10 @@ TEST_P(Test_Darknet_nets, YOLOv3)
#if defined(INF_ENGINE_RELEASE) #if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{ {
if (INF_ENGINE_VER_MAJOR_LE(2018050000) && target == DNN_TARGET_OPENCL) if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (INF_ENGINE_VER_MAJOR_EQ(2019020000)) else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
{ applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
}
else if (target == DNN_TARGET_MYRIAD && else if (target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);

View File

@ -1825,4 +1825,61 @@ TEST_P(Layer_Test_Slice, variable_input_shape)
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets()); INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets());
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_BatchNorm;
TEST_P(Layer_Test_BatchNorm, fusion)
{
// This tests reinitializes network by forwarding different batch size input.
// We check BatchNorm layer weights restoring after fusion.
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());
const int ch = 4;
Mat mean(1, ch, CV_32F), var(1, ch, CV_32F), weights(1, ch, CV_32F);
randu(mean, 0, 1);
randu(var, 0, 1);
randu(weights, 0, 1);
Net net;
{
LayerParams lp;
lp.type = "BatchNorm";
lp.name = "bn";
lp.set("has_weight", false);
lp.set("has_bias", false);
lp.blobs.push_back(mean);
lp.blobs.push_back(var);
net.addLayerToPrev(lp.name, lp.type, lp);
}
{
LayerParams lp;
lp.type = "Scale";
lp.name = "scale";
lp.set("has_bias", false);
lp.blobs.push_back(weights);
net.addLayerToPrev(lp.name, lp.type, lp);
}
Mat inp(4, 5, CV_32FC(ch));
randu(inp, 0, 1);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
net.setInput(blobFromImage(inp));
Mat ref = net.forward();
net.setInput(blobFromImages(std::vector<Mat>(2, inp)));
Mat out = net.forward();
for (int i = 0; i < 2; ++i)
{
std::vector<Range> ranges(4, Range::all());
ranges[0].start = i;
ranges[0].end = i + 1;
normAssert(out(ranges), ref);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets());
}} // namespace }} // namespace

View File

@ -323,17 +323,11 @@ TEST_P(Test_ONNX_layers, BatchNormalization3D)
TEST_P(Test_ONNX_layers, BatchNormalizationUnfused) TEST_P(Test_ONNX_layers, BatchNormalizationUnfused)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
testONNXModels("frozenBatchNorm2d"); testONNXModels("frozenBatchNorm2d");
} }
TEST_P(Test_ONNX_layers, BatchNormalizationSubgraph) TEST_P(Test_ONNX_layers, BatchNormalizationSubgraph)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
testONNXModels("batch_norm_subgraph"); testONNXModels("batch_norm_subgraph");
} }
@ -375,9 +369,8 @@ TEST_P(Test_ONNX_layers, Expand)
TEST_P(Test_ONNX_layers, ExpandHW) TEST_P(Test_ONNX_layers, ExpandHW)
{ {
// ngraph::op::v1::Multiply bug if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
testONNXModels("expand_hw"); testONNXModels("expand_hw");
} }

View File

@ -1048,6 +1048,7 @@ TEST_P(Test_TensorFlow_layers, resize_bilinear)
{ {
runTensorFlowNet("resize_bilinear"); runTensorFlowNet("resize_bilinear");
runTensorFlowNet("resize_bilinear_factor"); runTensorFlowNet("resize_bilinear_factor");
runTensorFlowNet("resize_bilinear_down");
} }
TEST_P(Test_TensorFlow_layers, tf2_dense) TEST_P(Test_TensorFlow_layers, tf2_dense)

View File

@ -51,7 +51,7 @@ OCL_TEST_P(Feature2DFixture, KeypointsSame)
for (size_t i = 0; i < keypoints.size(); ++i) for (size_t i = 0; i < keypoints.size(); ++i)
{ {
EXPECT_GE(KeyPoint::overlap(keypoints[i], ukeypoints[i]), 0.95); EXPECT_GE(KeyPoint::overlap(keypoints[i], ukeypoints[i]), 0.95);
EXPECT_NEAR(keypoints[i].angle, ukeypoints[i].angle, 0.001); EXPECT_NEAR(keypoints[i].angle, ukeypoints[i].angle, 0.05);
} }
} }

View File

@ -115,7 +115,7 @@ file(GLOB imgcodecs_ext_hdrs
if(IOS) if(IOS)
list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/ios_conversions.mm) list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/ios_conversions.mm)
list(APPEND IMGCODECS_LIBRARIES "-framework Accelerate" "-framework CoreGraphics" "-framework QuartzCore" "-framework AssetsLibrary") list(APPEND IMGCODECS_LIBRARIES "-framework Accelerate" "-framework CoreGraphics" "-framework QuartzCore")
endif() endif()
if(APPLE_FRAMEWORK) if(APPLE_FRAMEWORK)
list(APPEND IMGCODECS_LIBRARIES "-framework UIKit") list(APPEND IMGCODECS_LIBRARIES "-framework UIKit")

View File

@ -11,3 +11,9 @@ ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
ocv_define_module(imgproc opencv_core WRAP java python js) ocv_define_module(imgproc opencv_core WRAP java python js)
ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR)
option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF)
if(OPENCV_IPP_GAUSSIAN_BLUR)
ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1")
endif()

View File

@ -238,15 +238,13 @@ OCL_PERF_TEST_P(ScharrFixture, Scharr,
///////////// GaussianBlur //////////////////////// ///////////// GaussianBlur ////////////////////////
typedef FilterFixture GaussianBlurFixture; typedef FilterFixture OCL_GaussianBlurFixture;
OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur, PERF_TEST_P_(OCL_GaussianBlurFixture, GaussianBlur)
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, OCL_PERF_ENUM(3, 5, 7)))
{ {
const FilterParams params = GetParam(); const FilterParams& params = GetParam();
const Size srcSize = get<0>(params); const Size srcSize = get<0>(params);
const int type = get<1>(params), ksize = get<2>(params); const int type = get<1>(params), ksize = get<2>(params);
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 2 + DBL_EPSILON : 3e-4;
checkDeviceMaxMemoryAllocSize(srcSize, type); checkDeviceMaxMemoryAllocSize(srcSize, type);
@ -255,9 +253,42 @@ OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
OCL_TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 1, 1, cv::BORDER_CONSTANT); OCL_TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 1, 1, cv::BORDER_CONSTANT);
SANITY_CHECK(dst, eps); SANITY_CHECK_NOTHING();
} }
INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_GaussianBlurFixture,
::testing::Combine(
OCL_TEST_SIZES,
OCL_TEST_TYPES,
OCL_PERF_ENUM(3, 5, 7)
)
);
INSTANTIATE_TEST_CASE_P(SIFT, OCL_GaussianBlurFixture,
::testing::Combine(
::testing::Values(sz1080p),
::testing::Values(CV_32FC1),
OCL_PERF_ENUM(11, 13, 17, 21, 27)
)
);
INSTANTIATE_TEST_CASE_P(DISABLED_FULL, OCL_GaussianBlurFixture,
::testing::Combine(
::testing::Values(sz1080p),
::testing::Values(
CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
CV_8SC1, CV_8SC2, CV_8SC3, CV_8SC4,
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
CV_32SC1, CV_32SC2, CV_32SC3, CV_32SC4,
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4,
CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4
),
OCL_PERF_ENUM(3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29)
)
);
///////////// Filter2D //////////////////////// ///////////// Filter2D ////////////////////////
typedef FilterFixture Filter2DFixture; typedef FilterFixture Filter2DFixture;

View File

@ -470,9 +470,14 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#endif #endif
#if 0 //defined HAVE_IPP #if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1
// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL #if IPP_VERSION_X100 < 201900
#define IPP_GAUSSIANBLUR_PARALLEL 0 #define IPP_GAUSSIANBLUR_PARALLEL 0
#else #else
#define IPP_GAUSSIANBLUR_PARALLEL 1 #define IPP_GAUSSIANBLUR_PARALLEL 1
@ -555,6 +560,14 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return false; return false;
const int threads = ippiSuggestThreadsNum(iwDst, 2); const int threads = ippiSuggestThreadsNum(iwDst, 2);
if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25))
return false;
if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4))
return false;
if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4))
return false;
if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
bool ok; bool ok;
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
@ -655,8 +668,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
CV_OVX_RUN(true, CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
//CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix())) if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
{ {
std::vector<ufixedpoint16> fkx, fky; std::vector<ufixedpoint16> fkx, fky;
@ -681,6 +692,11 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
} }
} }
#if defined ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
#endif
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
} }

View File

@ -519,8 +519,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (pts != null) ? pts.size() : 0; int lCount = (pts != null) ? pts.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfPoint vpt : pts) mats.addAll(pts);
mats.add(vpt);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();
@ -568,8 +567,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (pts != null) ? pts.size() : 0; int lCount = (pts != null) ? pts.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfPoint2f vpt : pts) mats.addAll(pts);
mats.add(vpt);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();
@ -600,8 +598,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (pts != null) ? pts.size() : 0; int lCount = (pts != null) ? pts.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfPoint3f vpt : pts) mats.addAll(pts);
mats.add(vpt);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();
@ -614,8 +611,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (kps != null) ? kps.size() : 0; int lCount = (kps != null) ? kps.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfKeyPoint vkp : kps) mats.addAll(kps);
mats.add(vkp);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();
@ -714,8 +710,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (lvdm != null) ? lvdm.size() : 0; int lCount = (lvdm != null) ? lvdm.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfDMatch vdm : lvdm) mats.addAll(lvdm);
mats.add(vdm);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();
@ -746,8 +741,7 @@ public class Converters {
Mat res; Mat res;
int lCount = (lvb != null) ? lvb.size() : 0; int lCount = (lvb != null) ? lvb.size() : 0;
if (lCount > 0) { if (lCount > 0) {
for (MatOfByte vb : lvb) mats.addAll(lvb);
mats.add(vb);
res = vector_Mat_to_Mat(mats); res = vector_Mat_to_Mat(mats);
} else { } else {
res = new Mat(); res = new Mat();

View File

@ -392,7 +392,7 @@ int CvCaptureCAM::startCaptureDevice(int cameraNum) {
[mCaptureDecompressedVideoOutput setVideoSettings:pixelBufferOptions]; [mCaptureDecompressedVideoOutput setVideoSettings:pixelBufferOptions];
mCaptureDecompressedVideoOutput.alwaysDiscardsLateVideoFrames = YES; mCaptureDecompressedVideoOutput.alwaysDiscardsLateVideoFrames = YES;
#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR #if (TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR) && !TARGET_OS_MACCATALYST
mCaptureDecompressedVideoOutput.minFrameDuration = CMTimeMake(1, 30); mCaptureDecompressedVideoOutput.minFrameDuration = CMTimeMake(1, 30);
#endif #endif

View File

@ -299,11 +299,13 @@
} }
else else
{ {
#if !TARGET_OS_MACCATALYST
// Deprecated in 6.0; here for backward compatibility // Deprecated in 6.0; here for backward compatibility
if ([self.captureVideoPreviewLayer isOrientationSupported]) if ([self.captureVideoPreviewLayer isOrientationSupported])
{ {
[self.captureVideoPreviewLayer setOrientation:self.defaultAVCaptureVideoOrientation]; [self.captureVideoPreviewLayer setOrientation:self.defaultAVCaptureVideoOrientation];
} }
#endif
} }
if (parentView != nil) { if (parentView != nil) {

View File

@ -78,7 +78,7 @@ IF ERRORLEVEL 1 (
POPD POPD
cl /? >NUL 2>NUL <NUL cl /? >NUL 2>NUL <NUL
if %ERRORLEVEL% NEQ 0 ( if %ERRORLEVEL% NEQ 0 (
set "MSG=Can't detect Microsoft Visial Studio C++ compiler (cl.exe). MSVS 2015/2017/2019 are supported only from standard locations" set "MSG=Can't detect Microsoft Visual Studio C++ compiler (cl.exe). MSVS 2015/2017/2019 are supported only from standard locations"
goto die goto die
) )

View File

@ -234,6 +234,12 @@ def createSSDGraph(modelPath, configPath, outputPath):
# Connect input node to the first layer # Connect input node to the first layer
assert(graph_def.node[0].op == 'Placeholder') assert(graph_def.node[0].op == 'Placeholder')
try:
input_shape = graph_def.node[0].attr['shape']['shape'][0]['dim']
input_shape[1]['size'] = image_height
input_shape[2]['size'] = image_width
except:
print("Input shapes are undefined")
# assert(graph_def.node[1].op == 'Conv2D') # assert(graph_def.node[1].op == 'Conv2D')
weights = graph_def.node[1].input[-1] weights = graph_def.node[1].input[-1]
for i in range(len(graph_def.node[1].input)): for i in range(len(graph_def.node[1].input)):

View File

@ -468,7 +468,6 @@ private:
cl_kernel m_kernelImg; cl_kernel m_kernelImg;
cl_mem m_img_src; // used as src in case processing of cl image cl_mem m_img_src; // used as src in case processing of cl image
cl_mem m_mem_obj; cl_mem m_mem_obj;
cl_event m_event;
}; };
@ -498,7 +497,6 @@ App::App(CommandLineParser& cmd)
m_kernelImg = 0; m_kernelImg = 0;
m_img_src = 0; m_img_src = 0;
m_mem_obj = 0; m_mem_obj = 0;
m_event = 0;
} // ctor } // ctor
@ -529,11 +527,6 @@ App::~App()
m_mem_obj = 0; m_mem_obj = 0;
} }
if (m_event)
{
clReleaseEvent(m_event);
}
if (m_kernelBuf) if (m_kernelBuf)
{ {
clReleaseKernel(m_kernelBuf); clReleaseKernel(m_kernelBuf);
@ -775,11 +768,13 @@ int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem
size_t origin[] = { 0, 0, 0 }; size_t origin[] = { 0, 0, 0 };
size_t region[] = { (size_t)frame.cols, (size_t)frame.rows, 1 }; size_t region[] = { (size_t)frame.cols, (size_t)frame.rows, 1 };
res = clEnqueueCopyImage(m_queue, m_img_src, mem, origin, origin, region, 0, 0, &m_event); cl_event asyncEvent = 0;
res = clEnqueueCopyImage(m_queue, m_img_src, mem, origin, origin, region, 0, 0, &asyncEvent);
if (CL_SUCCESS != res) if (CL_SUCCESS != res)
return -1; return -1;
res = clWaitForEvents(1, &m_event); res = clWaitForEvents(1, &asyncEvent);
clReleaseEvent(asyncEvent);
if (CL_SUCCESS != res) if (CL_SUCCESS != res)
return -1; return -1;
@ -795,19 +790,17 @@ int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem
} }
} }
m_event = clCreateUserEvent(m_context, &res);
if (0 == m_event || CL_SUCCESS != res)
return -1;
// process left half of frame in OpenCL // process left half of frame in OpenCL
size_t size[] = { (size_t)frame.cols / 2, (size_t)frame.rows }; size_t size[] = { (size_t)frame.cols / 2, (size_t)frame.rows };
res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &m_event); cl_event asyncEvent = 0;
res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &asyncEvent);
if (CL_SUCCESS != res) if (CL_SUCCESS != res)
return -1; return -1;
res = clWaitForEvents(1, &m_event); res = clWaitForEvents(1, &asyncEvent);
clReleaseEvent(asyncEvent);
if (CL_SUCCESS != res) if (CL_SUCCESS != res)
return - 1; return -1;
mem_obj[0] = mem; mem_obj[0] = mem;

View File

@ -31,7 +31,7 @@ def Hist_and_Backproj(val):
histImg = np.zeros((h, w, 3), dtype=np.uint8) histImg = np.zeros((h, w, 3), dtype=np.uint8)
for i in range(bins): for i in range(bins):
cv.rectangle(histImg, (i*bin_w, h), ( (i+1)*bin_w, h - int(round( hist[i]*h/255.0 )) ), (0, 0, 255), cv.FILLED) cv.rectangle(histImg, (i*bin_w, h), ( (i+1)*bin_w, h - int(np.round( hist[i]*h/255.0 )) ), (0, 0, 255), cv.FILLED)
cv.imshow('Histogram', histImg) cv.imshow('Histogram', histImg)
## [Draw the histogram] ## [Draw the histogram]