format files to ANSI C style with coolformat

change the download channels to oclchannles()
fix bugs of arithm functions
perf fix of bilateral
bug fix of split test case
add build_warps functions
This commit is contained in:
niko 2012-10-11 16:22:47 +08:00
parent 69fbc6102c
commit 97156897b2
78 changed files with 15433 additions and 12118 deletions

View File

@ -55,22 +55,22 @@ namespace cv
//////////////////////////////// oclMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) {}
inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {}
inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type );
}
inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type );
}
inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if(_rows > 0 && _cols > 0)
{
@ -80,7 +80,7 @@ namespace cv
}
inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _size.height > 0 && _size.width > 0 )
{
@ -91,7 +91,7 @@ namespace cv
inline oclMat::oclMat(const oclMat &m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data),
refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
{
if( refcount )
CV_XADD(refcount, 1);
@ -99,7 +99,7 @@ namespace cv
inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
cv::Mat m(_rows, _cols, _type, _data, _step);
upload(m);
@ -121,7 +121,7 @@ namespace cv
inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0),
step(0), data(0), refcount(0),
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
cv::Mat m(_size, _type, _data, _step);
upload(m);
@ -152,7 +152,6 @@ namespace cv
wholerows = m.wholerows;
wholecols = m.wholecols;
offset = m.offset;
download_channels = m.download_channels;
if( rowRange == Range::all() )
rows = m.rows;
else
@ -184,7 +183,7 @@ namespace cv
inline oclMat::oclMat(const oclMat &m, const Rect &roi)
: flags(m.flags), rows(roi.height), cols(roi.width),
step(m.step), data(m.data), refcount(m.refcount),
datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
{
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
offset += roi.y * step + roi.x * elemSize();
@ -197,7 +196,7 @@ namespace cv
}
inline oclMat::oclMat(const Mat &m)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0), download_channels(0)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0)
{
//clCxt = Context::getContext();
upload(m);
@ -227,7 +226,6 @@ namespace cv
wholerows = m.wholerows;
wholecols = m.wholecols;
refcount = m.refcount;
download_channels = m.download_channels;
}
return *this;
}
@ -330,7 +328,6 @@ namespace cv
std::swap( clCxt, b.clCxt );
std::swap( wholerows, b.wholerows );
std::swap( wholecols, b.wholecols );
std::swap( download_channels, b.download_channels);
}
inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
@ -388,7 +385,7 @@ namespace cv
}
inline size_t oclMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels())));
}
inline size_t oclMat::elemSize1() const
{
@ -398,6 +395,10 @@ namespace cv
{
return CV_MAT_TYPE(flags);
}
inline int oclMat::ocltype() const
{
return CV_MAKE_TYPE(depth(), oclchannels());
}
inline int oclMat::depth() const
{
return CV_MAT_DEPTH(flags);
@ -406,6 +407,10 @@ namespace cv
{
return CV_MAT_CN(flags);
}
inline int oclMat::oclchannels() const
{
return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags));
}
inline size_t oclMat::step1() const
{
return step / elemSize1();
@ -473,6 +478,8 @@ namespace cv
{
ensureSizeIsEnough(size.height, size.width, type, m);
}
} /* end of namespace ocl */
} /* end of namespace cv */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -211,7 +211,8 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
src1y = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -237,7 +238,8 @@ TEST_P(Blur, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -269,7 +271,14 @@ TEST_P(Blur, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -281,7 +290,14 @@ TEST_P(Blur, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype);
};
#endif
@ -346,7 +362,8 @@ PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
srcy = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat.cols;
roirows = mat.rows;
@ -375,7 +392,8 @@ TEST_P(Laplacian, Accuracy)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -407,7 +425,14 @@ TEST_P(Laplacian, Accuracy)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -421,7 +446,14 @@ TEST_P(Laplacian, Accuracy)
gmat = mat_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
};
#endif
@ -491,7 +523,8 @@ PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
src1y = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -521,7 +554,8 @@ TEST_P(Erode, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -554,7 +588,14 @@ TEST_P(Erode, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -567,7 +608,14 @@ TEST_P(Erode, Mat)
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::erode(gmat1, gdst, kernel);
};
#endif
@ -588,7 +636,8 @@ TEST_P(Dilate, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -619,7 +668,14 @@ TEST_P(Dilate, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -631,7 +687,14 @@ TEST_P(Dilate, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::dilate(gmat1, gdst, kernel);
};
#endif
@ -676,7 +739,8 @@ PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
dy = GET_PARAM(2);
ksize = GET_PARAM(3);
bordertype = GET_PARAM(4);
dx = 2; dy=0;
dx = 2;
dy = 0;
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
@ -700,7 +764,8 @@ PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
src1y = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -726,7 +791,8 @@ TEST_P(Sobel, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -758,7 +824,14 @@ TEST_P(Sobel, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -770,7 +843,14 @@ TEST_P(Sobel, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype);
};
#endif
@ -814,7 +894,8 @@ PARAM_TEST_CASE(Scharr, MatType, int, int, int)
dx = GET_PARAM(1);
dy = GET_PARAM(2);
bordertype = GET_PARAM(3);
dx = 1; dy=0;
dx = 1;
dy = 0;
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
@ -838,7 +919,8 @@ PARAM_TEST_CASE(Scharr, MatType, int, int, int)
src1y = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -863,7 +945,8 @@ TEST_P(Scharr, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -895,7 +978,14 @@ TEST_P(Scharr, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -908,7 +998,14 @@ TEST_P(Scharr, Mat)
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype);
};
#endif
@ -980,7 +1077,8 @@ PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
src1y = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -1006,7 +1104,8 @@ TEST_P(GaussianBlur, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -1039,7 +1138,14 @@ TEST_P(GaussianBlur, Mat)
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -1051,7 +1157,14 @@ TEST_P(GaussianBlur, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
};
#endif

View File

@ -53,7 +53,13 @@ using namespace testing;
using namespace std;
using namespace cv;
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
@ -113,7 +119,8 @@ TEST_F(Haar, FaceDetect)
CV_RGB(255, 128, 0),
CV_RGB(255, 255, 0),
CV_RGB(255, 0, 0),
CV_RGB(255,0,255)} ;
CV_RGB(255, 0, 255)
} ;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));

View File

@ -181,7 +181,8 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool)
dst1y = 1;
maskx = 1;
masky = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -289,7 +290,8 @@ TEST_P(equalizeHist, MatType)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -322,7 +324,14 @@ TEST_P(equalizeHist, MatType)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -335,7 +344,14 @@ TEST_P(equalizeHist, MatType)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
}
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
};
#endif
@ -353,16 +369,19 @@ TEST_P(bilateralFilter, Mat)
int radius = 9;
int d = 2 * radius + 1;
double sigmaspace = 20.0;
int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
//const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101*/};
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
if (mat1.depth() != CV_8U || mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
}
else
{
for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++)
{
cout << borderstr[i] << endl;
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
@ -370,14 +389,18 @@ TEST_P(bilateralFilter, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
{
continue;
}
t0 = (double)cvGetTickCount();//cpu start
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]);
t0 = (double)cvGetTickCount() - t0;//cpu end
@ -402,7 +425,14 @@ TEST_P(bilateralFilter, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -416,7 +446,14 @@ TEST_P(bilateralFilter, Mat)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
};
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]);
};
@ -445,7 +482,8 @@ TEST_P(CopyMakeBorder, Mat)
}
else
{
for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
@ -453,7 +491,8 @@ TEST_P(CopyMakeBorder, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < 1; k++) //don't support roi perf test
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -485,7 +524,14 @@ TEST_P(CopyMakeBorder, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -498,7 +544,14 @@ TEST_P(CopyMakeBorder, Mat)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
};
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
};
#endif
@ -519,7 +572,8 @@ TEST_P(cornerMinEigenVal, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -552,7 +606,14 @@ TEST_P(cornerMinEigenVal, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -567,7 +628,14 @@ TEST_P(cornerMinEigenVal, Mat)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
};
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
};
#endif
@ -587,7 +655,8 @@ TEST_P(cornerHarris, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -621,7 +690,14 @@ TEST_P(cornerHarris, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -637,7 +713,14 @@ TEST_P(cornerHarris, Mat)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
};
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
};
#endif
@ -658,7 +741,8 @@ TEST_P(integral, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -691,7 +775,14 @@ TEST_P(integral, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -704,7 +795,14 @@ TEST_P(integral, Mat)
{
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
};
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
};
#endif
@ -779,7 +877,8 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int)
dstx = 1;
dsty = 1;
}else
}
else
{
src_roicols = mat1.cols;
src_roirows = mat1.rows;
@ -819,7 +918,8 @@ TEST_P(WarpAffine, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -851,7 +951,14 @@ TEST_P(WarpAffine, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -863,7 +970,14 @@ TEST_P(WarpAffine, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
};
#endif
@ -892,7 +1006,8 @@ TEST_P(WarpPerspective, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -924,7 +1039,14 @@ TEST_P(WarpPerspective, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -936,7 +1058,14 @@ TEST_P(WarpPerspective, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
};
#endif
@ -1124,7 +1253,8 @@ TEST_P(Remap, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = 0; k < 2; k++){
for(int k = 0; k < 2; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -1157,7 +1287,14 @@ TEST_P(Remap, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -1169,7 +1306,14 @@ TEST_P(Remap, Mat)
gdst = dst;
gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
gsrc_roi = src_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
};
#endif
@ -1260,7 +1404,8 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
dstx = 1;
dsty = 1;
}else
}
else
{
src_roicols = mat1.cols;
src_roirows = mat1.rows;
@ -1289,7 +1434,8 @@ TEST_P(Resize, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -1321,7 +1467,14 @@ TEST_P(Resize, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -1333,7 +1486,14 @@ TEST_P(Resize, Mat)
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
};
#endif
@ -1401,7 +1561,8 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -1427,7 +1588,8 @@ TEST_P(Threshold, Mat)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -1462,7 +1624,14 @@ TEST_P(Threshold, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -1477,7 +1646,14 @@ TEST_P(Threshold, Mat)
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
};
#endif
@ -1554,7 +1730,8 @@ PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
srcy = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = src.cols;
roirows = src.rows;
@ -1611,7 +1788,14 @@ TEST_P(meanShiftFiltering, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
@ -1623,7 +1807,14 @@ TEST_P(meanShiftFiltering, Mat)
gsrc_roi = src_roi;
gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
};
#endif
@ -1669,7 +1860,14 @@ TEST_P(meanShiftProc, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
@ -1682,7 +1880,14 @@ TEST_P(meanShiftProc, Mat)
gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
};
#endif
@ -1753,7 +1958,8 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType)
roirows = src.rows - 1;
srcx = 1;
srcy = 1;
}else
}
else
{
roicols = src.cols;
roirows = src.rows;
@ -1807,7 +2013,14 @@ TEST_P(calcHist, Mat)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -1819,7 +2032,14 @@ TEST_P(calcHist, Mat)
gsrc_roi = src_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::calcHist(gsrc_roi, gdst_hist);
};
#endif
@ -1836,15 +2056,15 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
// ONE_TYPE(CV_8UC1),
// NULL_TYPE,
// ONE_TYPE(CV_8UC1),
// NULL_TYPE,
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
//
//
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
NULL_TYPE,

View File

@ -109,7 +109,8 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
srcy = 1;
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat.cols;
roirows = mat.rows;
@ -141,7 +142,8 @@ TEST_P(ConvertTo, Accuracy)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -171,7 +173,14 @@ TEST_P(ConvertTo, Accuracy)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -184,7 +193,14 @@ TEST_P(ConvertTo, Accuracy)
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.convertTo(gdst, dst_type);
};
#endif
@ -258,7 +274,8 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
dsty = 1;
maskx = 1;
masky = 1;
}else
}
else
{
roicols = mat.cols;
roirows = mat.rows;
@ -293,7 +310,8 @@ TEST_P(CopyTo, Without_mask)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -323,7 +341,14 @@ TEST_P(CopyTo, Without_mask)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -337,7 +362,14 @@ TEST_P(CopyTo, Without_mask)
gmat = mat_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.copyTo(gdst);
};
#endif
@ -352,7 +384,8 @@ TEST_P(CopyTo, With_mask)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -383,7 +416,14 @@ TEST_P(CopyTo, With_mask)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -398,7 +438,14 @@ TEST_P(CopyTo, With_mask)
gmat = mat_roi;
gmask = mask_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.copyTo(gdst, gmask);
};
#endif
@ -464,7 +511,8 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool)
srcy = 1;
maskx = 1;
masky = 1;
}else
}
else
{
roicols = mat.cols;
roirows = mat.rows;
@ -495,7 +543,8 @@ TEST_P(SetTo, Without_mask)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -523,7 +572,14 @@ TEST_P(SetTo, Without_mask)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -535,7 +591,14 @@ TEST_P(SetTo, Without_mask)
gmat_whole = mat;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.setTo(val);
};
#endif
@ -550,7 +613,8 @@ TEST_P(SetTo, With_mask)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -580,7 +644,14 @@ TEST_P(SetTo, With_mask)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -594,12 +665,61 @@ TEST_P(SetTo, With_mask)
gmask = mask_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.setTo(val, gmask);
};
#endif
}
PARAM_TEST_CASE(DataTransfer, MatType, bool)
{
int type;
cv::Mat mat;
cv::ocl::oclMat gmat_whole;
virtual void SetUp()
{
type = GET_PARAM(0);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat = randomMat(rng, size, type, 5, 16, false);
}
};
TEST_P(DataTransfer, perf)
{
double totaluploadtick = 0;
double totaldownloadtick = 0;
double totaltick = 0;
double t0 = 0;
double t1 = 0;
cv::Mat cpu_dst;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t0 = (double)cvGetTickCount();
gmat_whole.upload(mat);//upload
t0 = (double)cvGetTickCount() - t0;
t1 = (double)cvGetTickCount();
gmat_whole.download(cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;
if(j == 0)
continue;
totaluploadtick = t0 + totaluploadtick;
totaldownloadtick = t1 + totaldownloadtick;
}
EXPECT_MAT_SIMILAR(mat, cpu_dst, 0.0);
totaltick = totaluploadtick + totaldownloadtick;
cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
//**********test************
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
@ -613,4 +733,7 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
#endif

View File

@ -136,7 +136,8 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
dstx = 1;
dsty = 1;
}else
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
@ -174,7 +175,8 @@ TEST_P(Merge, Accuracy)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -217,7 +219,14 @@ TEST_P(Merge, Accuracy)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -238,7 +247,14 @@ TEST_P(Merge, Accuracy)
dev_gsrc.push_back(gmat3);
dev_gsrc.push_back(gmat4);
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::merge(dev_gsrc, gdst);
};
#endif
@ -333,7 +349,8 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int)
dst3y = 1;
dst4x = 1;
dst4y = 1;
}else
}
else
{
roicols = mat.cols;
roirows = mat.rows;
@ -370,7 +387,8 @@ TEST_P(Split, Accuracy)
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k=LOOPROISTART;k<LOOPROIEND;k++){
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
@ -416,7 +434,14 @@ TEST_P(Split, Accuracy)
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
@ -439,7 +464,14 @@ TEST_P(Split, Accuracy)
gdst4_whole = dst4;
gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
gmat = mat_roi;
if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::split(gmat, dev_gdst);
};
#endif

View File

@ -42,4 +42,3 @@
#include "precomp.hpp"

View File

@ -127,7 +127,10 @@ class Inverse
public:
inline Inverse(bool val = false) : val_(val) {}
inline operator bool() const { return val_; }
inline operator bool() const
{
return val_;
}
private:
bool val_;

View File

@ -319,7 +319,7 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string
CV_Assert(src1.depth() != CV_8S);
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
@ -352,11 +352,11 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
T scalar;
if(_scalar != NULL)
{
double scalar1 = *((double *)_scalar);
T scalar = (T)scalar1;
scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
@ -384,7 +384,7 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const o
CV_Assert(mask.type() == CV_8U);
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
@ -445,13 +445,10 @@ typedef void (*MulDivFunc)(const oclMat &src1, const oclMat &src2, oclMat &dst,
void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
static MulDivFunc tab[] =
{
arithmetic_run<float>, 0, arithmetic_run<float>, arithmetic_run<float>,
arithmetic_run<float>, arithmetic_run<float>, arithmetic_run<double>,
};
tab[src1.depth()](src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
if((src1.clCxt -> impl -> double_support != 0) && (src1.depth() == CV_64F))
arithmetic_run<double>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
else
arithmetic_run<float>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
}
void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
@ -482,7 +479,7 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols);
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
WT s[4] = { saturate_cast<WT>(src2.val[0]), saturate_cast<WT>(src2.val[1]),
@ -548,7 +545,7 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co
CV_Assert(src.depth() != CV_8S);
Context *clCxt = src.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
@ -666,7 +663,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst)
void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
{
dst.create(src1.size(), CV_8UC1);
CV_Assert(src1.channels() == 1);
CV_Assert(src1.oclchannels() == 1);
CV_Assert(src1.type() == src2.type());
Context *clCxt = src1.clCxt;
int depth = src1.depth();
@ -752,7 +749,7 @@ void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int gr
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[512];
CV_Assert(type == 0 || type == 1 || type == 2);
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type);
@ -764,18 +761,18 @@ void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int gr
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
if(src.channels() != 3)
if(src.oclchannels() != 3)
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options);
else
openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options);
}
template <typename T>
Scalar arithmetic_sum(const oclMat &src)
Scalar arithmetic_sum(const oclMat &src, int type = 0)
{
size_t groupnum = src.clCxt->impl->maxComputeUnits;
CV_Assert(groupnum != 0);
int vlen = src.channels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status;
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status;
Context *clCxt = src.clCxt;
T *p = new T[dbsize];
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
@ -784,13 +781,13 @@ Scalar arithmetic_sum(const oclMat &src)
s.val[1] = 0.0;
s.val[2] = 0.0;
s.val[3] = 0.0;
arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum);
arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type);
memset(p, 0, dbsize * sizeof(T));
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
for(int i = 0; i < dbsize;)
{
for(int j = 0; j < src.channels(); j++, i++)
for(int j = 0; j < src.oclchannels(); j++, i++)
s.val[j] += p[i];
}
delete[] p;
@ -798,7 +795,7 @@ Scalar arithmetic_sum(const oclMat &src)
return s;
}
typedef Scalar (*sumFunc)(const oclMat &src);
typedef Scalar (*sumFunc)(const oclMat &src, int type);
Scalar cv::ocl::sum(const oclMat &src)
{
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
@ -813,7 +810,25 @@ Scalar cv::ocl::sum(const oclMat &src)
sumFunc func;
func = functab[src.clCxt->impl->double_support];
return func(src);
return func(src, 0);
}
Scalar cv::ocl::sqrSum(const oclMat &src)
{
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
static sumFunc functab[2] =
{
arithmetic_sum<float>,
arithmetic_sum<double>
};
sumFunc func;
func = functab[src.clCxt->impl->double_support];
return func(src, 2);
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////// meanStdDev //////////////////////////////////
@ -822,7 +837,7 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
{
CV_Assert(src.depth() <= CV_32S);
cv::Size sz(1, 1);
int channels = src.channels();
int channels = src.oclchannels();
Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)),
m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
oclMat dst1(m1), dst2(m2);
@ -851,7 +866,7 @@ void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, i
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[50];
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
@ -883,7 +898,7 @@ void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &d
vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50];
if(src.channels() == 1)
if(src.oclchannels() == 1)
{
int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
@ -945,7 +960,7 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal,
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
CV_Assert(src.channels() == 1);
CV_Assert(src.oclchannels() == 1);
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
@ -979,7 +994,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
bool isRelative = (normType & NORM_RELATIVE) != 0;
normType &= 7;
CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
int channels = src1.channels(), i = 0, *p;
int channels = src1.oclchannels(), i = 0, *p;
double r = 0;
oclMat gm1(src1.size(), src1.type());
int min_int = (normType == NORM_INF ? CL_INT_MIN : 0);
@ -1041,7 +1056,7 @@ void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
@ -1089,7 +1104,7 @@ void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName,
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{1, 1, 1, 1, 1, 1, 1},
@ -1130,7 +1145,7 @@ void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName,
const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip;
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.channels(), depth);
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
{
@ -1151,7 +1166,7 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
Context *clCxt = src1.clCxt;
int channels = src1.channels();
int channels = src1.oclchannels();
int rows = src1.rows;
int cols = src1.cols;
//int step = src1.step;
@ -1187,7 +1202,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src1.cols == dst.cols);
CV_Assert(src1.rows == dst.rows);
CV_Assert(src1.channels() == dst.channels());
CV_Assert(src1.oclchannels() == dst.oclchannels());
// CV_Assert(src1.step == dst.step);
vector<pair<size_t , const void *> > args;
@ -1206,7 +1221,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str
args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.channels(), src1.depth());
openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.oclchannels(), src1.depth());
}
if(channels == 1 && (left_col != 0 || right_col != 0))
{
@ -1231,7 +1246,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str
args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.channels(), src1.depth());
openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.oclchannels(), src1.depth());
}
}
@ -1239,7 +1254,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
{
int cn = src.channels();
CV_Assert(src.depth() == CV_8U);
CV_Assert((lut.channels() == 1 || lut.channels() == cn) && lut.rows == 1 && lut.cols == 256);
CV_Assert((lut.oclchannels() == 1 || lut.oclchannels() == cn) && lut.rows == 1 && lut.cols == 256);
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
//oclMat _lut(lut);
string kernelName = "LUT";
@ -1264,7 +1279,7 @@ void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, c
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
//int channels = dst.channels();
//int channels = dst.oclchannels();
int depth = dst.depth();
size_t localThreads[3] = { 64, 4, 1 };
@ -1307,7 +1322,7 @@ void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclM
}
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
@ -1358,7 +1373,7 @@ void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, s
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
@ -1419,7 +1434,7 @@ void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &
}
Context *clCxt = src1.clCxt;
int channels = src1.channels();
int channels = src1.oclchannels();
int depth = src1.depth();
int cols = src1.cols * channels;
@ -1474,7 +1489,7 @@ void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oc
}
Context *clCxt = src2.clCxt;
int channels = src2.channels();
int channels = src2.oclchannels();
int depth = src2.depth();
int cols = src2.cols * channels;
@ -1558,7 +1573,7 @@ void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem
vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50];
if(src.channels() == 1)
if(src.oclchannels() == 1)
{
int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
@ -1587,7 +1602,7 @@ template<typename T>
void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
CV_Assert(src.channels() == 1);
CV_Assert(src.oclchannels() == 1);
size_t groupnum = src.clCxt->impl->maxComputeUnits;
CV_Assert(groupnum != 0);
int minloc = -1 , maxloc = -1;
@ -1677,7 +1692,7 @@ void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[50];
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
@ -1730,7 +1745,7 @@ void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
@ -1775,7 +1790,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
@ -1833,7 +1848,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclM
CV_Assert(mask.type() == CV_8U);
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
@ -1887,7 +1902,7 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, con
CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols);
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
WT s[4] = { saturate_cast<WT>(src2.val[0]), saturate_cast<WT>(src2.val[1]),
@ -2129,7 +2144,7 @@ void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
CV_Assert(src.cols == dst.rows && src.rows == dst.cols);
Context *clCxt = src.clCxt;
int channels = src.channels();
int channels = src.oclchannels();
int depth = src.depth();
int vector_lengths[4][7] = {{1, 0, 0, 0, 1, 1, 0},
@ -2163,7 +2178,7 @@ void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
void cv::ocl::transpose(const oclMat &src, oclMat &dst)
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 ||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_8SC3 || src.type() == CV_8SC4 ||
src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
stringstream idxstr;
@ -2186,7 +2201,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
@ -2249,7 +2264,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst)
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
@ -2297,7 +2312,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst)
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
@ -2339,7 +2354,7 @@ void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernel
CV_Assert(src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;

View File

@ -52,7 +52,10 @@ using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
oclMat& result){throw_nogpu();}
oclMat &result)
{
throw_nogpu();
}
#else
namespace cv
{
@ -68,7 +71,7 @@ void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat&
{
cv::ocl::Context *ctx = img1.clCxt;
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
int channels = img1.channels();
int channels = img1.oclchannels();
int depth = img1.depth();
int rows = img1.rows;
int cols = img1.cols;

View File

@ -52,37 +52,133 @@ using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat>&) { throw_nogpu(); }
const vector<oclMat>& cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const { throw_nogpu(); return trainDescCollection; }
void cv::ocl::BruteForceMatcher_OCL_base::clear() { throw_nogpu(); }
bool cv::ocl::BruteForceMatcher_OCL_base::empty() const { throw_nogpu(); return true; }
bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const { throw_nogpu(); return true; }
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, vector<DMatch>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, const oclMat&, vector<DMatch>&, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat&, oclMat&, const vector<oclMat>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, const oclMat&, vector<DMatch>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, vector<DMatch>&, const vector<oclMat>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, int, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, const oclMat&, vector< vector<DMatch> >&, int, const oclMat&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, vector< vector<DMatch> >&, int, const vector<oclMat>&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, float, const oclMat&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, const oclMat&, vector< vector<DMatch> >&, float, const oclMat&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat&, oclMat&, oclMat&, oclMat&, oclMat&, float, const vector<oclMat>&) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, vector< vector<DMatch> >&, float, const vector<oclMat>&, bool) { throw_nogpu(); }
cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat> &)
{
throw_nogpu();
}
const vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
{
throw_nogpu();
return trainDescCollection;
}
void cv::ocl::BruteForceMatcher_OCL_base::clear()
{
throw_nogpu();
}
bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
{
throw_nogpu();
return true;
}
bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
{
throw_nogpu();
return true;
}
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, vector<DMatch> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, vector<DMatch> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, const oclMat &, vector<DMatch> &, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &, oclMat &, const vector<oclMat> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, const oclMat &, vector<DMatch> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, const Mat &, vector<DMatch> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, vector<DMatch> &, const vector<oclMat> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, int, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, int, const oclMat &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, vector< vector<DMatch> > &, int, const vector<oclMat> &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, float, const oclMat &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, float, const oclMat &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &, oclMat &, oclMat &, oclMat &, oclMat &, float, const vector<oclMat> &)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
{
throw_nogpu();
}
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, vector< vector<DMatch> > &, float, const vector<oclMat> &, bool)
{
throw_nogpu();
}
#else /* !defined (HAVE_OPENCL) */
using namespace std;
@ -1417,7 +1513,10 @@ namespace
struct ImgIdxSetter
{
explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}
inline void operator()(DMatch &m) const
{
m.imgIdx = imgIdx;
}
int imgIdx;
};
}

View File

@ -0,0 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
{
throw_nogpu();
}
void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
{
throw_nogpu();
}
void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
{
throw_nogpu();
}
#else
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *build_warps;
}
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpPlaneMaps
void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
float scale, oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());
Mat K_Rinv = K * R.t();
CV_Assert(K_Rinv.isContinuous());
Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
KRT_mat(Range::all(), Range(9, 11)) = T;
oclMat KRT_oclMat(KRT_mat);
// transfer K_Rinv and T into a single cl_mem
map_x.create(dst_roi.size(), CV_32F);
map_y.create(dst_roi.size(), CV_32F);
int tl_u = dst_roi.tl().x;
int tl_v = dst_roi.tl().y;
Context *clCxt = Context::getContext();
string kernelName = "buildWarpPlaneMaps";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpCylyndricalMaps
void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
Mat K_Rinv = K * R.t();
CV_Assert(K_Rinv.isContinuous());
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
map_x.create(dst_roi.size(), CV_32F);
map_y.create(dst_roi.size(), CV_32F);
int tl_u = dst_roi.tl().x;
int tl_v = dst_roi.tl().y;
Context *clCxt = Context::getContext();
string kernelName = "buildWarpCylindricalMaps";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpSphericalMaps
void cv::ocl::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
Mat K_Rinv = K * R.t();
CV_Assert(K_Rinv.isContinuous());
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
// transfer K_Rinv, R_Kinv into a single cl_mem
map_x.create(dst_roi.size(), CV_32F);
map_y.create(dst_roi.size(), CV_32F);
int tl_u = dst_roi.tl().x;
int tl_v = dst_roi.tl().y;
Context *clCxt = Context::getContext();
string kernelName = "buildWarpSphericalMaps";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
{
CV_Assert(M.rows == 2 && M.cols == 3);
xmap.create(dsize, CV_32FC1);
ymap.create(dsize, CV_32FC1);
float coeffs[2 * 3];
Mat coeffsMat(2, 3, CV_32F, (void *)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invertAffineTransform(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
Context *clCxt = Context::getContext();
string kernelName = "buildWarpAffineMaps";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
{
CV_Assert(M.rows == 3 && M.cols == 3);
xmap.create(dsize, CV_32FC1);
ymap.create(dsize, CV_32FC1);
float coeffs[3 * 3];
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invert(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
Context *clCxt = Context::getContext();
string kernelName = "buildWarpPerspectiveMaps";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
#endif // HAVE_OPENCL

View File

@ -52,10 +52,22 @@ using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); }
void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); }
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
{
throw_nogpu();
}
void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
{
throw_nogpu();
}
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
{
throw_nogpu();
}
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
{
throw_nogpu();
}
#else
namespace cv
@ -123,7 +135,10 @@ void cv::ocl::CannyBuf::release()
openCLFree(counter);
}
namespace cv { namespace ocl {
namespace cv
{
namespace ocl
{
namespace canny
{
void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
@ -139,7 +154,8 @@ namespace cv { namespace ocl {
void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
}
}}// cv::ocl
}
}// cv::ocl
namespace
{
@ -210,7 +226,8 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
dst.create(dx.size(), CV_8U);
dst.setTo(Scalar::all(0));
buf.dx = dx; buf.dy = dy;
buf.dx = dx;
buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);

View File

@ -81,9 +81,9 @@ namespace
void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
{
vector<pair<size_t , const void *> > args;
int channels = src.channels();
int channels = src.oclchannels();
char build_options[50];
//printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.channels(),bidx);
//printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx);
sprintf(build_options, "-D DEPTH_%d", src.depth());
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
@ -99,7 +99,7 @@ namespace
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
{
Size sz = src.size();
int scn = src.channels(), depth = src.depth(), bidx;
int scn = src.oclchannels(), depth = src.depth(), bidx;
CV_Assert(depth == CV_8U || depth == CV_16U);

View File

@ -53,7 +53,10 @@ using namespace std;
#if !defined(HAVE_OPENCL)
void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{
throw_nogpu();
}
#else /*!HAVE_OPENCL */

View File

@ -52,12 +52,18 @@ using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
{
throw_nogpu();
}
#else
#include <clAmdFft.h>
namespace cv{ namespace ocl {
namespace cv
{
namespace ocl
{
enum FftType
{
C2R = 1, // complex to complex
@ -85,7 +91,8 @@ namespace cv{ namespace ocl {
// if not, bake a new one, put it into the planStore and return it.
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
};
}}
}
}
bool cv::ocl::FftPlan::started = false;
vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;

View File

@ -328,10 +328,10 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
CV_Assert( (src.channels() == dst.channels()) );
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1() / src.channels();
int dstStep = dst.step1() / dst.channels();
int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
@ -400,10 +400,10 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
CV_Assert( (src.channels() == dst.channels()) );
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1() / src.channels();
int dstStep = dst.step1() / dst.channels();
int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
@ -467,12 +467,12 @@ Ptr<BaseFilter_GPU> cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat
{
static const GPUMorfFilter_t GPUMorfFilter_callers[2][5] =
{
{0, GPUErode, 0, 0, GPUErode },
{0, GPUDilate, 0, 0, GPUDilate}
{0, GPUErode, 0, GPUErode, GPUErode },
{0, GPUDilate, 0, GPUDilate, GPUDilate}
};
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
CV_Assert(type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4);
CV_Assert(type == CV_8UC1 || type == CV_8UC3 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC1 || type == CV_32FC4);
oclMat gpu_krnl;
normalizeKernel(kernel, gpu_krnl);
@ -670,12 +670,12 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
CV_Assert( (src.channels() == dst.channels()) );
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
CV_Assert( (borderType != 0) );
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
Context *clCxt = src.clCxt;
int cn = src.channels();
int cn = src.oclchannels();
int depth = src.depth();
string kernelName = "filter2D";
@ -723,9 +723,9 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType)
{
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, 0, GPUFilter2D};
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4) && dstType == srcType);
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
oclMat gpu_krnl;
int nDivisor;
@ -780,7 +780,7 @@ namespace
Size src_size = src.size();
int src_type = src.type();
int cn = src.channels();
int cn = src.oclchannels();
//dst.create(src_size, src_type);
dst = Scalar(0.0);
//dstBuf.create(src_size, src_type);
@ -1071,12 +1071,12 @@ void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst,
Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
const Size &ksize, Point anchor, int borderType)
{
static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, 0, GPUFilterBox_8u_C4R},
{0, GPUFilterBox_32F_C1R, 0, 0, GPUFilterBox_32F_C4R}
static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, GPUFilterBox_8u_C4R, GPUFilterBox_8u_C4R},
{0, GPUFilterBox_32F_C1R, 0, GPUFilterBox_32F_C4R, GPUFilterBox_32F_C4R}
};
//Remove this check if more data types need to be supported.
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4)
&& dstType == srcType);
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 ||
srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
normalizeAnchor(anchor, ksize);
@ -1155,7 +1155,7 @@ template <typename T>
void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
{
Context *clCxt = src.clCxt;
int channels = src.channels();
int channels = src.oclchannels();
size_t localThreads[3] = {16, 16, 1};
string kernelName = "row_filter";
@ -1208,7 +1208,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
//sanity checks
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src.cols == dst.cols);
CV_Assert(src.channels() == dst.channels());
CV_Assert(src.oclchannels() == dst.oclchannels());
CV_Assert(ksize == (anchor << 1) + 1);
int src_pix_per_row, dst_pix_per_row;
int src_offset_x, src_offset_y, dst_offset_in_pixel;
@ -1283,7 +1283,7 @@ template <typename T>
void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
{
Context *clCxt = src.clCxt;
int channels = src.channels();
int channels = src.oclchannels();
size_t localThreads[3] = {16, 16, 1};
string kernelName = "col_filter";
@ -1364,7 +1364,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
//sanity checks
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src.cols == dst.cols);
CV_Assert(src.channels() == dst.channels());
CV_Assert(src.oclchannels() == dst.oclchannels());
CV_Assert(ksize == (anchor << 1) + 1);
int src_pix_per_row, dst_pix_per_row;
int src_offset_x, src_offset_y, dst_offset_in_pixel;

View File

@ -51,7 +51,10 @@
#include "clAmdBlas.h"
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
{
throw_nogpu();
}
#else
using namespace cv;

View File

@ -52,6 +52,7 @@
#include "precomp.hpp"
#include <stdio.h>
#include <string>
#ifdef EMU
#include "runCL.h"
#endif
@ -888,6 +889,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
//the Intel HD Graphics is unsupported
if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
{
cout << " Intel HD GPU device unsupported " << endl;
return NULL;
}
//double t = 0;
if( maxSize.height == 0 || maxSize.width == 0 )
{

View File

@ -51,19 +51,65 @@ using namespace std;
#if !defined (HAVE_OPENCL)
cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_nogpu(); }
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const { throw_nogpu(); return 0; }
size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); return 0; }
double cv::ocl::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
bool cv::ocl::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
void cv::ocl::HOGDescriptor::detect(const oclMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat&) { throw_nogpu(); }
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat&, Size, oclMat&, int) { throw_nogpu(); }
std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }
cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int)
{
throw_nogpu();
}
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
{
throw_nogpu();
return 0;
}
size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
{
throw_nogpu();
return 0;
}
double cv::ocl::HOGDescriptor::getWinSigma() const
{
throw_nogpu();
return 0;
}
bool cv::ocl::HOGDescriptor::checkDetectorSize() const
{
throw_nogpu();
return false;
}
void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &)
{
throw_nogpu();
}
void cv::ocl::HOGDescriptor::detect(const oclMat &, vector<Point> &, double, Size, Size)
{
throw_nogpu();
}
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &, vector<Rect> &, double, Size, Size, double, int)
{
throw_nogpu();
}
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &)
{
throw_nogpu();
}
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &, Size, oclMat &, int)
{
throw_nogpu();
}
std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
{
throw_nogpu();
return std::vector<float>();
}
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
{
throw_nogpu();
return std::vector<float>();
}
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
{
throw_nogpu();
return std::vector<float>();
}
#else
@ -73,13 +119,20 @@ std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nog
#define CELLS_PER_BLOCK_Y 2
#define NTHREADS 256
namespace cv { namespace ocl
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *objdetect_hog;
}}
}
}
namespace cv { namespace ocl { namespace device
namespace cv
{
namespace ocl
{
namespace device
{
namespace hog
{
@ -122,7 +175,9 @@ namespace cv { namespace ocl { namespace device
void resize( const oclMat &src, oclMat &dst, const Size sz);
}
}}}
}
}
}
using namespace ::cv::ocl::device;
@ -386,7 +441,8 @@ std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
{
static const float detector[] = {
static const float detector[] =
{
0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
@ -717,7 +773,8 @@ std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
-0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
-0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
-9.063785f };
-9.063785f
};
return vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
}
@ -726,7 +783,8 @@ std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
{
static const float detector[] = {
static const float detector[] =
{
0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
@ -1531,7 +1589,8 @@ std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
-0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
-0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
-0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
-0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
-0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f
};
return vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
}

View File

@ -77,7 +77,10 @@ void cv::ocl::resize(const oclMat &, oclMat &, Size, double, double, int)
{
throw_nogpu();
}
void cv::ocl::remap(const oclMat&, oclMat&, oclMat&, oclMat&, int, int ,const Scalar&) { throw_nogpu(); }
void cv::ocl::remap(const oclMat &, oclMat &, oclMat &, oclMat &, int, int , const Scalar &)
{
throw_nogpu();
}
void cv::ocl::copyMakeBorder(const oclMat &, oclMat &, int, int, int, int, const Scalar &)
{
@ -196,7 +199,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar));
args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val));
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
@ -233,7 +236,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f));
args.push_back( make_pair(sizeof(cl_float), (void *)&max_val));
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
@ -293,7 +296,7 @@ namespace cv
kernelName = "remapNNF1Constant";
}
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
int type = src.type();
size_t blkSizeX = 16, blkSizeY = 16;
@ -305,7 +308,7 @@ namespace cv
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
else if(src.type() == CV_8UC4 || src.type() == CV_32FC1)
else if(src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1)
{
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
@ -448,7 +451,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}
openCLExecuteKernel(clCxt,&imgproc_remap,kernelName,globalThreads,localThreads,args,src.channels(),src.depth());
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
////////////////////////////////////////////////////////////////////////////////////////////
@ -462,9 +465,9 @@ namespace cv
float ify = 1. / fy;
double ifx_d = 1. / fx;
double ify_d = 1. / fy;
int srcStep_in_pixel = src.step1() / src.channels();
int srcStep_in_pixel = src.step1() / src.oclchannels();
int srcoffset_in_pixel = src.offset / src.elemSize();
int dstStep_in_pixel = dst.step1() / dst.channels();
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
int dstoffset_in_pixel = dst.offset / dst.elemSize();
//printf("%d %d\n",src.step1() , dst.elemSize());
string kernelName;
@ -529,15 +532,15 @@ namespace cv
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
}
openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
void resize(const oclMat &src, oclMat &dst, Size dsize,
double fx, double fy, int interpolation)
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4
|| src.type() == CV_32FC1 || src.type() == CV_32FC4);
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
|| src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
CV_Assert( src.size().area() > 0 );
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
@ -585,10 +588,10 @@ namespace cv
return medianFilter(src1, dst, m);
}
int srcStep = src.step1() / src.channels();
int dstStep = dst.step1() / dst.channels();
int srcOffset = src.offset / src.channels() / src.elemSize1();
int dstOffset = dst.offset / dst.channels() / dst.elemSize1();
int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.oclchannels() / src.elemSize1();
int dstOffset = dst.offset / dst.oclchannels() / dst.elemSize1();
Context *clCxt = src.clCxt;
string kernelName = "medianFilter";
@ -610,12 +613,12 @@ namespace cv
if(m == 3)
{
string kernelName = "medianFilter3";
openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
else if(m == 5)
{
string kernelName = "medianFilter5";
openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
else
{
@ -623,7 +626,7 @@ namespace cv
//string kernelName = "medianFilter";
//args.push_back( make_pair( sizeof(cl_int),(void*)&m));
//openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.channels(),-1);
//openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.oclchannels(),-1);
}
}
@ -632,7 +635,7 @@ namespace cv
// copyMakeBorder
void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar)
{
//CV_Assert(src.channels() != 2);
//CV_Assert(src.oclchannels() != 2);
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
{
@ -653,8 +656,8 @@ namespace cv
CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom));
}
dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
int srcStep = src.step1() / src.channels();
int dstStep = dst.step1() / dst.channels();
int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101};
@ -672,7 +675,8 @@ namespace cv
string kernelName = "copymakeborder";
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {(dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
(dst.rows + localThreads[1]-1) / localThreads[1] * localThreads[1], 1};
(dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1
};
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
@ -705,7 +709,7 @@ namespace cv
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar -D %s", borderstr[bordertype_index]);
@ -729,7 +733,7 @@ namespace cv
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char -D %s", borderstr[bordertype_index]);
@ -748,7 +752,7 @@ namespace cv
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort -D %s", borderstr[bordertype_index]);
@ -767,7 +771,7 @@ namespace cv
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short -D %s", borderstr[bordertype_index]);
@ -786,7 +790,7 @@ namespace cv
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int -D %s", borderstr[bordertype_index]);
@ -812,7 +816,7 @@ namespace cv
val.fval.s[1] = scalar.val[1];
val.fval.s[2] = scalar.val[2];
val.fval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float -D %s", borderstr[bordertype_index]);
@ -831,7 +835,7 @@ namespace cv
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double -D %s", borderstr[bordertype_index]);
@ -931,7 +935,7 @@ namespace cv
void warpAffine_gpu(const oclMat &src, oclMat &dst, F coeffs[2][3], int interpolation)
{
CV_Assert( (src.channels() == dst.channels()) );
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1();
int dstStep = dst.step1();
float float_coeffs[2][3];
@ -948,7 +952,9 @@ namespace cv
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
openCLVerifyCall(st);
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
}else{
}
else
{
cl_int st;
for(int m = 0; m < 2; m++)
for(int n = 0; n < 3; n++)
@ -993,14 +999,14 @@ namespace cv
args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
openCLSafeCall(clReleaseMemObject(coeffs_cm));
}
void warpPerspective_gpu(const oclMat &src, oclMat &dst, double coeffs[3][3], int interpolation)
{
CV_Assert( (src.channels() == dst.channels()) );
CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1();
int dstStep = dst.step1();
float float_coeffs[3][3];
@ -1016,7 +1022,9 @@ namespace cv
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
openCLVerifyCall(st);
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
}else{
}
else
{
cl_int st;
for(int m = 0; m < 3; m++)
for(int n = 0; n < 3; n++)
@ -1061,7 +1069,7 @@ namespace cv
args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
openCLSafeCall(clReleaseMemObject(coeffs_cm));
}
}
@ -1070,7 +1078,7 @@ namespace cv
{
int interpolation = flags & INTER_MAX;
CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3);
CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
@ -1092,7 +1100,7 @@ namespace cv
{
int interpolation = flags & INTER_MAX;
CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3);
CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
@ -1213,10 +1221,13 @@ namespace cv
if (ksize < 0)
scale *= 2.;
if (src.depth() == CV_8U){
if (src.depth() == CV_8U)
{
scale *= 255.;
scale = 1. / scale;
}else{
}
else
{
scale = 1. / scale;
}
if (ksize > 0)
@ -1355,7 +1366,7 @@ namespace cv
if( src.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src.depth() != CV_8U || src.channels() != 4 )
if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
if(src.clCxt->impl->double_support == 0)
@ -1423,7 +1434,7 @@ namespace cv
if( src.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src.depth() != CV_8U || src.channels() != 4 )
if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
if(src.clCxt->impl->double_support == 0)
@ -1472,7 +1483,7 @@ namespace cv
int dataWidth_bits = 4;
int mask = dataWidth - 1;
int cols = mat_src.cols * mat_src.channels();
int cols = mat_src.cols * mat_src.oclchannels();
int src_offset = mat_src.offset;
int hist_step = mat_sub_hist.step >> 2;
int left_col = 0, right_col = 0;
@ -1595,7 +1606,7 @@ oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
int i, j, k, maxk, radius;
Size size = src.size();
CV_Assert( (src.type() == CV_8UC1 || src.download_channels == 3) &&
CV_Assert( (src.channels() == 1 || src.channels() == 3) &&
src.type() == dst.type() && src.size() == dst.size() &&
src.data != dst.data );
@ -1623,7 +1634,9 @@ oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
float *color_weight = &_color_weight[0];
float *space_weight = &_space_weight[0];
int *space_ofs = &_space_ofs[0];
int dst_step_in_pixel = dst.step / dst.elemSize();
int dst_offset_in_pixel = dst.offset / dst.elemSize();
int temp_step_in_pixel = temp.step / temp.elemSize();
// initialize color-related bilateral filter coefficients
for( i = 0; i < 256 * cn; i++ )
color_weight[i] = (float)std::exp(i * i * gauss_color_coeff);
@ -1636,7 +1649,7 @@ oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
if( r > radius )
continue;
space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff);
space_ofs[maxk++] = (int)(i*temp.step + j*cn);
space_ofs[maxk++] = (int)(i * temp_step_in_pixel + j);
}
oclMat oclcolor_weight(1, cn * 256, CV_32FC1, color_weight);
oclMat oclspace_weight(1, d * d, CV_32FC1, space_weight);
@ -1646,7 +1659,13 @@ oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { (dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
(dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1],
1};
1
};
if((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0))
{
kernelName = "bilateral2";
globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
}
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp.data ));
@ -1654,15 +1673,15 @@ oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&maxk ));
args.push_back( make_pair( sizeof(cl_int), (void *)&radius ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step_in_pixel ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_in_pixel ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp_step_in_pixel ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp.cols ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data ));
openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth());
}
void bilateralFilter(const oclMat &src, oclMat &dst, int radius, double sigmaclr, double sigmaspc, int borderType)
{
@ -1694,7 +1713,7 @@ void convolve_run(const oclMat &src, const oclMat &temp1,oclMat &dst,string kern
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
int channels = dst.channels();
int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
@ -1705,7 +1724,8 @@ void convolve_run(const oclMat &src, const oclMat &temp1,oclMat &dst,string kern
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
divUp(rows, localThreads[1]) *localThreads[1],
1};
1
};
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));

View File

@ -288,6 +288,7 @@ namespace cv
ocltmpinfo.impl->devices.push_back(devices[j]);
openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL));
ocltmpinfo.impl->devName.push_back(std::string(deviceName));
ocltmpinfo.DeviceName.push_back(std::string(deviceName));
}
delete[] devices;
oclinfo.push_back(ocltmpinfo);
@ -348,9 +349,13 @@ namespace cv
Context::setContext(oclinfo);
}
void *getoclContext()
{
return &(Context::getContext()->impl->clContext);
}
void *getoclCommandQueue()
{
return &(Context::getContext()->impl->clCmdQueue);
@ -873,6 +878,7 @@ namespace cv
//}
impl->devices.clear();
impl->devName.clear();
DeviceName.clear();
}
Info::~Info()
{
@ -895,6 +901,7 @@ namespace cv
{
impl->devices.push_back(m.impl->devices[i]);
impl->devName.push_back(m.impl->devName[i]);
DeviceName.push_back(m.DeviceName[i]);
}
return *this;
}

View File

@ -0,0 +1,315 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Comuter Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular urpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::ocl;
#if !defined (HAVE_OPENCL)
void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
const oclMat &fu, const oclMat &fv,
const oclMat &bu, const oclMat &bv,
float pos, oclMat &newFrame, oclMat &buf)
{
throw_nogpu();
}
#else
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *interpolate_frames;
namespace interpolate
{
//The following are ported from NPP_staging.cu
// As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer,
// we may have to do this on kernel
void memsetKernel(float val, oclMat &img, int height, int offset);
void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset);
void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
int b_offset, int d_offset); // buffer, dst offset
//OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1
void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
oclMat &buffer, int buf_offset, float timeScale, int dst_offset);
//OpenCL conversion of BlendFrames
void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer,
float pos, oclMat &newFrame, cl_mem &, cl_mem &);
// bind a buffer to an image
void bindImgTex(const oclMat &img, cl_mem &tex);
}
}
}
void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
const oclMat &fu, const oclMat &fv,
const oclMat &bu, const oclMat &bv,
float pos, oclMat &newFrame, oclMat &buf)
{
CV_Assert(frame0.type() == CV_32FC1);
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
newFrame.create(frame0.size(), frame0.type());
buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
buf.setTo(Scalar::all(0));
size_t step = frame0.step;
CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
cl_mem tex_src0 = 0, tex_src1 = 0;
// warp flow
using namespace interpolate;
bindImgTex(frame0, tex_src0);
bindImgTex(frame1, tex_src1);
// CUDA Offsets
enum
{
cov0 = 0,
cov1,
fwdU,
fwdV,
bwdU,
bwdV
};
vectorWarp(fu, fu, fv, buf, cov0, pos, fwdU);
vectorWarp(fv, fu, fv, buf, cov0, pos, fwdV);
vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU);
vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU);
blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1);
openCLFree(tex_src0);
openCLFree(tex_src1);
}
void interpolate::memsetKernel(float val, oclMat &img, int height, int offset)
{
Context *clCxt = Context::getContext();
string kernelName = "memsetKernel";
vector< pair<size_t, const void *> > args;
int step = img.step / sizeof(float);
offset = step * height * offset;
args.push_back( make_pair( sizeof(cl_float), (void *)&val));
args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&img.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
args.push_back( make_pair( sizeof(cl_int), (void *)&offset));
size_t globalThreads[3] = {img.cols, height, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
}
void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset)
{
Context *clCxt = Context::getContext();
string kernelName = "normalizeKernel";
vector< pair<size_t, const void *> > args;
int step = buffer.step / sizeof(float);
factor_offset = step * height * factor_offset;
dst_offset = step * height * dst_offset;
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&buffer.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
args.push_back( make_pair( sizeof(cl_int), (void *)&factor_offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {buffer.cols, height, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
}
void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
int b_offset, int d_offset)
{
Context *clCxt = Context::getContext();
string kernelName = "forwardWarpKernel";
vector< pair<size_t, const void *> > args;
int f_step = u.step / sizeof(float); // flow step
int b_step = buffer.step / sizeof(float);
b_offset = b_step * src.rows * b_offset;
d_offset = b_step * src.rows * d_offset;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&f_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&b_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&b_offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&d_offset));
args.push_back( make_pair( sizeof(cl_float), (void *)&time_scale));
size_t globalThreads[3] = {src.cols, src.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
}
void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
oclMat &buffer, int b_offset, float timeScale, int d_offset)
{
memsetKernel(0, buffer, src.rows, b_offset);
forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset);
normalizeKernel(buffer, src.rows, b_offset, d_offset);
}
void interpolate::blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
{
int step = buffer.step / sizeof(float);
Context *clCxt = Context::getContext();
string kernelName = "blendFramesKernel";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src0));
args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src1));
args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&newFrame.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&step));
args.push_back( make_pair( sizeof(cl_float), (void *)&pos));
size_t globalThreads[3] = {frame0.cols, frame0.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
}
void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
{
cl_image_format format;
int err;
int depth = img.depth();
int channels = img.channels();
switch(depth)
{
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
break;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
break;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
break;
default:
throw std::exception();
break;
}
switch(channels)
{
case 1:
format.image_channel_order = CL_R;
break;
case 3:
format.image_channel_order = CL_RGB;
break;
case 4:
format.image_channel_order = CL_RGBA;
break;
default:
throw std::exception();
break;
}
if(texture)
{
openCLFree(texture);
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = img.step / img.elemSize();
desc.image_height = img.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
texture = clCreateImage2D(
Context::getContext()->impl->clContext,
CL_MEM_READ_WRITE,
&format,
img.step / img.elemSize(),
img.rows,
0,
NULL,
&err);
#endif
size_t origin[] = { 0, 0, 0 };
size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
openCLSafeCall(err);
}
#endif//(HAVE_OPENCL)

View File

@ -70,9 +70,22 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 src2_data = vload4(0, src2 + src2_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = abs_diff(src1_data, src2_data);
@ -242,9 +255,15 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data));

View File

@ -71,10 +71,22 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 src2_data = vload4(0, src2 + src2_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
@ -248,10 +260,30 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 src2_data = vload4(0, src2 + src2_index);
uchar4 mask_data = vload4(0, mask + mask_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
uchar4 mask_data = vload4(0, mask + mask_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(mask_index < 0)
{
uchar4 tmp;
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);

View File

@ -65,9 +65,15 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
int4 tmp = convert_int4_sat(src1_data) + src2_data;

View File

@ -68,10 +68,23 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data = vload4(0, src1 + src1_index);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int mask_index_fix = mask_index < 0 ? 0 : mask_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
uchar4 mask_data = vload4(0, mask + mask_index);
uchar4 mask_data = vload4(0, mask + mask_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(mask_index < 0)
{
uchar4 tmp;
tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
int4 tmp = convert_int4_sat(src1_data) + src2_data;

View File

@ -71,9 +71,22 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1);
int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src_data_0 = vload4(0, src + src_index_0);
uchar4 src_data_1 = vload4(0, src + src_index_1);
int src1_index_fix = src_index_0 < 0 ? 0 : src_index_0;
int src2_index_fix = src_index_1 < 0 ? 0 : src_index_1;
uchar4 src_data_0 = vload4(0, src + src1_index_fix);
uchar4 src_data_1 = vload4(0, src + src2_index_fix);
if(src_index_0 < 0)
{
uchar4 tmp;
tmp.xyzw = (src_index_0 == -2) ? src_data_0.zwxy:src_data_0.yzwx;
src_data_0.xyzw = (src_index_0 == -1) ? src_data_0.wxyz:tmp.xyzw;
}
if(src_index_1 < 0)
{
uchar4 tmp;
tmp.xyzw = (src_index_1 == -2) ? src_data_1.zwxy:src_data_1.yzwx;
src_data_1.xyzw = (src_index_1 == -1) ? src_data_1.wxyz:tmp.xyzw;
}
uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0));
uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1));

View File

@ -0,0 +1,237 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
__kernel
void buildWarpPlaneMaps
(
__global float * map_x,
__global float * map_y,
__constant float * KRT,
int tl_u,
int tl_v,
int cols,
int rows,
int step_x,
int step_y,
float scale
)
{
int du = get_global_id(0);
int dv = get_global_id(1);
step_x /= sizeof(float);
step_y /= sizeof(float);
__constant float * ck_rinv = KRT;
__constant float * ct = KRT + 9;
if (du < cols && dv < rows)
{
float u = tl_u + du;
float v = tl_v + dv;
float x, y;
float x_ = u / scale - ct[0];
float y_ = v / scale - ct[1];
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
x /= z;
y /= z;
map_x[dv * step_x + du] = x;
map_y[dv * step_y + du] = y;
}
}
__kernel
void buildWarpCylindricalMaps
(
__global float * map_x,
__global float * map_y,
__constant float * ck_rinv,
int tl_u,
int tl_v,
int cols,
int rows,
int step_x,
int step_y,
float scale
)
{
int du = get_global_id(0);
int dv = get_global_id(1);
step_x /= sizeof(float);
step_y /= sizeof(float);
if (du < cols && dv < rows)
{
float u = tl_u + du;
float v = tl_v + dv;
float x, y;
u /= scale;
float x_ = sin(u);
float y_ = v / scale;
float z_ = cos(u);
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
if (z > 0) { x /= z; y /= z; }
else x = y = -1;
map_x[dv * step_x + du] = x;
map_y[dv * step_y + du] = y;
}
}
__kernel
void buildWarpSphericalMaps
(
__global float * map_x,
__global float * map_y,
__constant float * ck_rinv,
int tl_u,
int tl_v,
int cols,
int rows,
int step_x,
int step_y,
float scale
)
{
int du = get_global_id(0);
int dv = get_global_id(1);
step_x /= sizeof(float);
step_y /= sizeof(float);
if (du < cols && dv < rows)
{
float u = tl_u + du;
float v = tl_v + dv;
float x, y;
v /= scale;
u /= scale;
float sinv = sin(v);
float x_ = sinv * sin(u);
float y_ = - cos(v);
float z_ = sinv * cos(u);
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
if (z > 0) { x /= z; y /= z; }
else x = y = -1;
map_x[dv * step_x + du] = x;
map_y[dv * step_y + du] = y;
}
}
__kernel
void buildWarpAffineMaps
(
__global float * xmap,
__global float * ymap,
__constant float * c_warpMat,
int cols,
int rows,
int step_x,
int step_y
)
{
int x = get_global_id(0);
int y = get_global_id(1);
step_x /= sizeof(float);
step_y /= sizeof(float);
if (x < cols && y < rows)
{
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
map_x[y * step_x + x] = xcoo;
map_y[y * step_y + x] = ycoo;
}
}
__kernel
void buildWarpPerspectiveMaps
(
__global float * xmap,
__global float * ymap,
__constant float * c_warpMat,
int cols,
int rows,
int step_x,
int step_y
)
{
int x = get_global_id(0);
int y = get_global_id(1);
step_x /= sizeof(float);
step_y /= sizeof(float);
if (x < cols && y < rows)
{
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
map_x[y * step_x + x] = xcoo;
map_y[y * step_y + x] = ycoo;
}
}

View File

@ -254,6 +254,7 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
//ss = convert_uint4(src[cur_addr]);
int cur_col = clamp(startX + col, 0, src_whole_cols);
if(con)
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
data[i] = con ? ss : 0;
@ -269,6 +270,7 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
}
@ -338,7 +340,8 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
//ss = src[cur_addr];
int cur_col = clamp(startX + col, 0, src_whole_cols);
ss = src[(startY+i)*(src_step>>2) + cur_col];
//ss = src[(startY+i)*(src_step>>2) + cur_col];
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:0;
data[i] = con ? ss : 0.f;
}
@ -422,7 +425,8 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
//ss = src[cur_addr];
int cur_col = clamp(startX + col, 0, src_whole_cols);
ss = src[(startY+i)*(src_step>>4) + cur_col];
//ss = src[(startY+i)*(src_step>>4) + cur_col];
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:0;
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
}

View File

@ -31,84 +31,8 @@
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
//#pragma OPENCL EXTENSION cl_amd_printf :enable
__kernel
void bilateral4(__global uchar4 *dst,
__global uchar4 *src,
int rows,
int cols,
int channels,
int radius,
int wholerows,
int wholecols,
int src_step,
int dst_step,
int src_offset,
int dst_offset,
__constant float *sigClr,
__constant float *sigSpc)
{
uint lidx = get_local_id(0);
uint lidy = get_local_id(1);
uint gdx = get_global_id(0);
uint gdy = get_global_id(1);
uint gidx = gdx >=cols?cols-1:gdx;
uint gidy = gdy >=rows?rows-1:gdy;
uchar4 p,q,tmp;
float4 pf = 0,pq = 0,pd = 0;
float wt =0;
int r = radius;
int ij = 0;
int ct = 0;
uint index_src = src_offset/4 + gidy*src_step/4 + gidx;
uint index_dst = dst_offset/4 + gidy*dst_step/4 + gidx;
p = src[index_src];
uint gx,gy;
uint src_index,dst_index;
for(int ii = -r;ii<r+1;ii++)
{
for(int jj =-r;jj<r+1;jj++)
{
ij = ii*ii+jj*jj;
if(ij > mul24(radius,radius)) continue;
gx = gidx + jj;
gy = gidy + ii;
src_index = src_offset/4 + gy * src_step/4 + gx;
q = src[src_index];
ct = abs(p.x-q.x)+abs(p.y-q.y)+abs(p.z-q.z);
wt =sigClr[ct]*sigSpc[(ii+radius)*(2*radius+1)+jj+radius];
pf.x += q.x*wt;
pf.y += q.y*wt;
pf.z += q.z*wt;
// pf.w += q.w*wt;
pq += wt;
}
}
pd = pf/pq;
dst[index_dst] = convert_uchar4_rte(pd);
}
__kernel void bilateral(__global uchar *dst,
__kernel void bilateral_C1_D0(__global uchar *dst,
__global const uchar *src,
const int dst_rows,
const int dst_cols,
@ -128,8 +52,8 @@ __kernel void bilateral(__global uchar *dst,
if((gidy<dst_rows) && (gidx<dst_cols))
{
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
int dst_addr = mad24(gidy,src_step,gidx+dst_offset);
float sum = 0, wsum = 0;
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
float sum = 0.f, wsum = 0.f;
int val0 = (int)src[src_addr];
for(int k = 0; k < maxk; k++ )
@ -142,4 +66,73 @@ __kernel void bilateral(__global uchar *dst,
dst[dst_addr] = convert_uchar_rtz(sum/wsum+0.5f);
}
}
__kernel void bilateral2_C1_D0(__global uchar *dst,
__global const uchar *src,
const int dst_rows,
const int dst_cols,
const int maxk,
const int radius,
const int dst_step,
const int dst_offset,
const int src_step,
const int src_rows,
const int src_cols,
__constant float *color_weight,
__constant float *space_weight,
__constant int *space_ofs)
{
int gidx = get_global_id(0)<<2;
int gidy = get_global_id(1);
if((gidy<dst_rows) && (gidx<dst_cols))
{
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
float4 sum = (float4)(0.f), wsum = (float4)(0.f);
int4 val0 = convert_int4(vload4(0,src+src_addr));
for(int k = 0; k < maxk; k++ )
{
int4 val = convert_int4(vload4(0,src+src_addr + space_ofs[k]));
float4 w = (float4)(space_weight[k])*(float4)(color_weight[abs(val.x - val0.x)],color_weight[abs(val.y - val0.y)],color_weight[abs(val.z - val0.z)],color_weight[abs(val.w - val0.w)]);
sum += convert_float4(val)*w;
wsum += w;
}
*(__global uchar4*)(dst+dst_addr) = convert_uchar4_rtz(sum/wsum+0.5f);
}
}
__kernel void bilateral_C4_D0(__global uchar4 *dst,
__global const uchar4 *src,
const int dst_rows,
const int dst_cols,
const int maxk,
const int radius,
const int dst_step,
const int dst_offset,
const int src_step,
const int src_rows,
const int src_cols,
__constant float *color_weight,
__constant float *space_weight,
__constant int *space_ofs)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if((gidy<dst_rows) && (gidx<dst_cols))
{
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
float4 sum = (float4)0.f;
float wsum = 0.f;
int4 val0 = convert_int4(src[src_addr]);
for(int k = 0; k < maxk; k++ )
{
int4 val = convert_int4(src[src_addr + space_ofs[k]]);
float w = space_weight[k]*color_weight[abs(val.x - val0.x)+abs(val.y - val0.y)+abs(val.z - val0.z)];
sum += convert_float4(val)*(float4)w;
wsum += w;
}
wsum=1.f/wsum;
dst[dst_addr] = convert_uchar4_rtz(sum*(float4)wsum+(float4)0.5f);
}
}

View File

@ -144,16 +144,18 @@ __kernel void __attribute__((reqd_work_group_size(1,HISTOGRAM256_BIN_COUNT,1)))c
int rowIndex = mad24(gy, gn, gx);
// rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE + 1];
__local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE];
subhist[lidy] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
if(gidy<rows)
{
int src_index = src_offset + mad24(gidy, src_step, gidx);
barrier(CLK_LOCAL_MEM_FENCE);
int p = (int)src[src_index];
p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
// p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
atomic_inc(subhist + p);
}
barrier(CLK_LOCAL_MEM_FENCE);
globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];

View File

@ -0,0 +1,252 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
// atomic add for 32bit floating point
inline void atomic_addf(volatile __global float *source, const float operand) {
union {
unsigned int intVal;
float floatVal;
} newVal;
union {
unsigned int intVal;
float floatVal;
} prevVal;
do {
prevVal.floatVal = *source;
newVal.floatVal = prevVal.floatVal + operand;
} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
}
__kernel void memsetKernel(
float val,
__global float * image,
int width,
int height,
int step, // in element
int offset
)
{
if(get_global_id(0) >= width || get_global_id(1) >= height)
{
return;
}
image += offset;
image[get_global_id(0) + get_global_id(1) * step] = val;
}
__kernel void normalizeKernel(
__global float * buffer,
int width,
int height,
int step,
int f_offset,
int d_offset
)
{
__global float * factors = buffer + f_offset;
__global float * dst = buffer + d_offset;
int j = get_global_id(0);
int i = get_global_id(1);
if(j >= width || i >= height)
{
return;
}
float scale = factors[step * i + j];
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
dst[step * i + j] *= invScale;
}
__kernel void forwardWarpKernel(
__global const float * src,
__global float * buffer,
__global const float * u,
__global const float * v,
const int w,
const int h,
const int flow_stride,
const int image_stride,
const int factor_offset,
const int dst_offset,
const float time_scale
)
{
int j = get_global_id(0);
int i = get_global_id(1);
if (i >= h || j >= w) return;
volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
int flow_row_offset = i * flow_stride;
int image_row_offset = i * image_stride;
//bottom left corner of a target pixel
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
// pixel containing bottom left corner
float px;
float py;
float dx = modf(cx, &px);
float dy = modf(cy, &py);
// target pixel integer coords
int tx;
int ty;
tx = (int) px;
ty = (int) py;
float value = src[image_row_offset + j];
float weight;
// fill pixel containing bottom right corner
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = dx * dy;
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing bottom left corner
tx -= 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = (1.0f - dx) * dy;
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing upper left corner
ty -= 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = (1.0f - dx) * (1.0f - dy);
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing upper right corner
tx += 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = dx * (1.0f - dy);
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
}
// define buffer offsets
enum
{
O0_OS = 0,
O1_OS,
U_OS,
V_OS,
UR_OS,
VR_OS
};
__kernel void blendFramesKernel(
image2d_t tex_src0,
image2d_t tex_src1,
__global float * buffer,
__global float * out,
int w,
int h,
int step,
float theta
)
{
__global float * u = buffer + h * step * U_OS;
__global float * v = buffer + h * step * V_OS;
__global float * ur = buffer + h * step * UR_OS;
__global float * vr = buffer + h * step * VR_OS;
__global float * o0 = buffer + h * step * O0_OS;
__global float * o1 = buffer + h * step * O1_OS;
int ix = get_global_id(0);
int iy = get_global_id(1);
if(ix >= w || iy >= h) return;
int pos = ix + step * iy;
float _u = u[pos];
float _v = v[pos];
float _ur = ur[pos];
float _vr = vr[pos];
float x = (float)ix + 0.5f;
float y = (float)iy + 0.5f;
bool b0 = o0[pos] > 1e-4f;
bool b1 = o1[pos] > 1e-4f;
float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
if (b0 && b1)
{
// pixel is visible on both frames
out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
read_imagef(tex_src1, sampler, coord1).x * theta;
}
else if (b0)
{
// visible on the first frame only
out[pos] = read_imagef(tex_src0, sampler, coord0).x;
}
else
{
// visible on the second frame only
out[pos] = read_imagef(tex_src1, sampler, coord1).x;
}
}

View File

@ -52,7 +52,10 @@ using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &)
{
throw_nogpu();
}
#else
//helper routines
namespace cv
@ -64,7 +67,9 @@ namespace cv
}
}
namespace cv { namespace ocl
namespace cv
{
namespace ocl
{
void matchTemplate_SQDIFF(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
@ -138,12 +143,7 @@ namespace cv { namespace ocl
integral(image.reshape(1), buf.image_sums[0]);
#if SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else
Mat sqr_mat = templ.reshape(1);
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
#endif
Context *clCxt = image.clCxt;
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
@ -172,7 +172,7 @@ namespace cv { namespace ocl
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
@ -198,7 +198,7 @@ namespace cv { namespace ocl
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
//////////////////////////////////////////////////////////////////////
@ -235,13 +235,9 @@ namespace cv { namespace ocl
buf.image_sqsums.resize(1);
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
#if SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else
oclMat templ_c1 = templ.reshape(1);
multiply(templ_c1, templ_c1, templ_c1);
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
#endif
Context *clCxt = image.clCxt;
string kernelName = "normalizeKernel";
vector< pair<size_t, const void *> > args;
@ -269,7 +265,7 @@ namespace cv { namespace ocl
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(image.channels() == templ.channels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
@ -295,7 +291,7 @@ namespace cv { namespace ocl
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
//////////////////////////////////////////////////////////////////////
// CCOFF
@ -348,7 +344,7 @@ namespace cv { namespace ocl
{
integral(buf.images[i], buf.image_sums[i]);
}
switch(image.channels())
switch(image.oclchannels())
{
case 4:
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
@ -367,7 +363,7 @@ namespace cv { namespace ocl
break;
}
}
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
void matchTemplate_CCOFF_NORMED(
@ -406,13 +402,9 @@ namespace cv { namespace ocl
float templ_sum = 0;
float templ_sqsum = 0;
templ_sum = (float)sum(templ)[0];
#if SQRSUM_FIXED
templ_sqsum = sqrSum(templ)[0];
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = saturate_cast<float>(sum(templ_sqr)[0]);
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
templ_sum *= scale;
@ -432,17 +424,13 @@ namespace cv { namespace ocl
split(image, buf.images);
templ_sum = sum(templ);
#if SQRSUM_FIXED
templ_sqsum = sqrSum(templ);
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = sum(templ_sqr);
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
float templ_sqsum_sum = 0;
for(int i = 0; i < image.channels(); i ++)
for(int i = 0; i < image.oclchannels(); i ++)
{
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
}
@ -450,12 +438,12 @@ namespace cv { namespace ocl
buf.image_sums.resize(buf.images.size());
buf.image_sqsums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++)
for(int i = 0; i < image.oclchannels(); i ++)
{
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
}
switch(image.channels())
switch(image.oclchannels())
{
case 4:
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
@ -481,10 +469,11 @@ namespace cv { namespace ocl
break;
}
}
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
}/*ocl*/} /*cv*/
}/*ocl*/
} /*cv*/
void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
{
@ -498,7 +487,8 @@ void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& re
typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
const Caller callers[] = {
const Caller callers[] =
{
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED

View File

@ -128,7 +128,7 @@ namespace cv
// convert_C3C4
void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep)
{
int dstStep_in_pixel = dst.step1() / dst.channels();
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
int pixel_end = dst.wholecols * dst.wholerows - 1;
Context *clCxt = dst.clCxt;
string kernelName = "convertC3C4";
@ -176,7 +176,7 @@ void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep)
// convert_C4C3
void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep)
{
int srcStep_in_pixel = src.step1() / src.channels();
int srcStep_in_pixel = src.step1() / src.oclchannels();
int pixel_end = src.wholecols * src.wholerows - 1;
Context *clCxt = src.clCxt;
string kernelName = "convertC4C3";
@ -228,12 +228,12 @@ void cv::ocl::oclMat::upload(const Mat &m)
Size wholeSize;
Point ofs;
m.locateROI(wholeSize, ofs);
int type = m.type();
if(m.channels() == 3)
{
type = CV_MAKETYPE(m.depth(), 4);
}
create(wholeSize, type);
// int type = m.type();
// if(m.oclchannels() == 3)
//{
// type = CV_MAKETYPE(m.depth(), 4);
//}
create(wholeSize, m.type());
if(m.channels() == 3)
{
@ -274,20 +274,20 @@ void cv::ocl::oclMat::upload(const Mat &m)
rows = m.rows;
cols = m.cols;
offset = ofs.y * step + ofs.x * elemSize();
download_channels = m.channels();
//download_channels = m.channels();
}
void cv::ocl::oclMat::download(cv::Mat &m) const
{
CV_DbgAssert(!this->empty());
int t = type();
if(download_channels == 3)
{
t = CV_MAKETYPE(depth(), 3);
}
m.create(wholerows, wholecols, t);
// int t = type();
// if(download_channels == 3)
//{
// t = CV_MAKETYPE(depth(), 3);
//}
m.create(wholerows, wholecols, type());
if(download_channels == 3)
if(m.channels() == 3)
{
int pitch = wholecols * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
@ -350,7 +350,7 @@ void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, strin
{"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
};
char compile_option[32];
sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.channels()-1][dst.depth()].c_str());
sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3];
@ -432,7 +432,7 @@ void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
localThreads, args, dst.channels(), dst.depth());
localThreads, args, dst.oclchannels(), dst.depth());
}
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
{
@ -504,7 +504,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar");
@ -523,7 +523,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char");
@ -542,7 +542,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort");
@ -561,7 +561,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short");
@ -580,7 +580,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int");
@ -606,7 +606,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.fval.s[1] = scalar.val[1];
val.fval.s[2] = scalar.val[2];
val.fval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float");
@ -625,7 +625,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double");
@ -696,7 +696,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar");
@ -715,7 +715,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char");
@ -734,7 +734,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort");
@ -753,7 +753,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short");
@ -772,7 +772,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int");
@ -791,7 +791,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.fval.s[1] = scalar.val[1];
val.fval.s[2] = scalar.val[2];
val.fval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float");
@ -810,7 +810,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double");
@ -875,50 +875,91 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
{
if( new_rows != 0 && new_rows != rows)
{
CV_Error( CV_StsBadFunc,
"oclMat's number of rows can not be changed for current version" );
}
oclMat hdr = *this;
int cn = channels();
int cn = oclchannels();
if (new_cn == 0)
new_cn = cn;
int total_width = cols * cn;
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
new_rows = rows * total_width / new_cn;
if (new_rows != 0 && new_rows != rows)
{
int total_size = total_width * rows;
if (!isContinuous())
CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
if ((unsigned)new_rows > (unsigned)total_size)
CV_Error(CV_StsOutOfRange, "Bad new number of rows");
total_width = total_size / new_rows;
if (total_width * new_rows != total_size)
CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
hdr.rows = new_rows;
hdr.step = total_width * elemSize1();
}
int new_width = total_width / new_cn;
if (new_width * new_cn != total_width)
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
hdr.cols = new_width;
hdr.wholecols = new_width;
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
return hdr;
}
@ -926,15 +967,13 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
{
clCxt = Context::getContext();
//cout << "cv::ocl::oclMat::create()." << endl;
/* core logic */
_type &= TYPE_MASK;
download_channels = CV_MAT_CN(_type);
if(download_channels==3)
{
_type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
}
//download_channels = CV_MAT_CN(_type);
//if(download_channels==3)
//{
// _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
//}
if( rows == _rows && cols == _cols && type() == _type && data )
return;
if( data )
@ -979,7 +1018,6 @@ void cv::ocl::oclMat::release()
step = rows = cols = 0;
offset = wholerows = wholecols = 0;
refcount = 0;
download_channels=0;
}
#endif /* !defined (HAVE_OPENCL) */

View File

@ -100,7 +100,7 @@ void pyrdown_run(const oclMat &src, const oclMat &dst)
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
//////////////////////////////////////////////////////////////////////////////
// pyrDown
@ -111,8 +111,6 @@ void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
dst.download_channels=src.download_channels;
pyrdown_run(src, dst);
}

View File

@ -144,7 +144,7 @@ void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
localThreads, args, dst.channels(), dst.depth(), CLFLUSH);
localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
}
void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 );
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
@ -157,7 +157,7 @@ void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double
if( rtype < 0 )
rtype = src.type();
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
@ -216,7 +216,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar");
@ -235,7 +235,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char");
@ -254,7 +254,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort");
@ -273,7 +273,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short");
@ -292,7 +292,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int");
@ -318,7 +318,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.fval.s[1] = (float)scalar.val[1];
val.fval.s[2] = (float)scalar.val[2];
val.fval.s[3] = (float)scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float");
@ -337,7 +337,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.channels())
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double");
@ -489,7 +489,7 @@ void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth(), CLFLUSH);
openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
}
void pyrDown_cus(const oclMat &src, oclMat &dst)
@ -679,7 +679,7 @@ void lkSparse_run(oclMat& I, oclMat& J,
size_t localThreads[3] = { 8, 32, 1 };
size_t globalThreads[3] = { 8 * ptcount, 32, 1};
int cn = I.channels();
int cn = I.oclchannels();
bool calcErr;
if (err)
@ -718,7 +718,7 @@ void lkSparse_run(oclMat& I, oclMat& J,
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
releaseTexture(ITex);
releaseTexture(JTex);
@ -738,7 +738,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
iters = std::min(std::max(iters, 0), 100);
const int cn = prevImg.channels();
const int cn = prevImg.oclchannels();
dim3 block, patch;
calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
@ -817,7 +817,7 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { I.cols, I.rows, 1};
int cn = I.channels();
int cn = I.oclchannels();
bool calcErr;
if (err)
@ -859,7 +859,7 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
releaseTexture(ITex);
releaseTexture(JTex);

View File

@ -55,16 +55,21 @@ using namespace cv::ocl;
using namespace std;
#ifndef HAVE_OPENCL
void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &)
{
throw_nogpu();
}
#else
namespace cv { namespace ocl
namespace cv
{
namespace ocl
{
extern const char *pyr_up;
void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
{
dst.create(src.rows * 2, src.cols * 2, src.type());
dst.download_channels=src.download_channels;
Context *clCxt = src.clCxt;
const std::string kernelName = "pyrUp";
@ -84,7 +89,9 @@ namespace cv { namespace ocl
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
}};
}
};
#endif // HAVE_OPENCL

View File

@ -114,7 +114,7 @@ namespace cv
void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{
Context *clCxt = mat_dst.clCxt;
int channels = mat_dst.channels();
int channels = mat_dst.oclchannels();
int depth = mat_dst.depth();
string kernelName = "merge_vector";
@ -165,7 +165,7 @@ namespace cv
}
Context *clCxt = mat_dst.clCxt;
int channels = mat_dst.channels();
int channels = mat_dst.oclchannels();
int depth = mat_dst.depth();
string kernelName = "merge_vector";
@ -243,7 +243,7 @@ namespace cv
CV_Assert(depth == mat_src[i].depth());
CV_Assert(size == mat_src[i].size());
total_channels += mat_src[i].channels();
total_channels += mat_src[i].oclchannels();
}
CV_Assert(total_channels <= 4);
@ -263,7 +263,7 @@ namespace cv
void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
{
Context *clCxt = mat_src.clCxt;
int channels = mat_src.channels();
int channels = mat_src.oclchannels();
int depth = mat_src.depth();
string kernelName = "split_vector";
@ -314,7 +314,7 @@ namespace cv
}
Context *clCxt = mat_src.clCxt;
int channels = mat_src.channels();
int channels = mat_src.oclchannels();
int depth = mat_src.depth();
string kernelName = "split_vector";
@ -379,7 +379,7 @@ namespace cv
CV_Assert(mat_dst);
int depth = mat_src.depth();
int num_channels = mat_src.channels();
int num_channels = mat_src.oclchannels();
Size size = mat_src.size();
if(num_channels == 1)
@ -413,8 +413,8 @@ void cv::ocl::split(const oclMat &src, oclMat *dst)
}
void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
{
dst.resize(src.channels());
if(src.channels() > 0)
dst.resize(src.oclchannels());
if(src.oclchannels() > 0)
split_merge::split(src, &dst[0]);
}
#endif /* !defined (HAVE_OPENCL) */

View File

@ -44,7 +44,7 @@
//M*/
#include <iomanip>
#include "precomp.hpp"
#include "opencv2/highgui/highgui.hpp"
//#include "opencv2/highgui/highgui.hpp"
using namespace cv;
using namespace cv::ocl;
@ -52,25 +52,65 @@ using namespace std;
#if !defined (HAVE_OPENCL)
cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); }
cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); }
int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;}
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>&, oclMat&) { throw_nogpu(); }
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector<float>&) { throw_nogpu(); }
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); }
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, oclMat&, bool) { throw_nogpu(); }
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); }
cv::ocl::SURF_OCL::SURF_OCL()
{
throw_nogpu();
}
cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool)
{
throw_nogpu();
}
int cv::ocl::SURF_OCL::descriptorSize() const
{
throw_nogpu();
return 0;
}
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &, oclMat &)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector<KeyPoint> &)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector<float> &)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, oclMat &, bool)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, vector<float> &, bool)
{
throw_nogpu();
}
void cv::ocl::SURF_OCL::releaseMemory()
{
throw_nogpu();
}
#else /* !defined (HAVE_OPENCL) */
namespace cv { namespace ocl
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *nonfree_surf;
}}
}
}
static inline int divUp(int total, int grain)
@ -540,10 +580,12 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, i
args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {
size_t globalThreads[3] =
{
divUp(max_samples_j, localThreads[0]) *localThreads[0],
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
1};
1
};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
@ -580,7 +622,8 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat&
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0],
divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1],
1};
1
};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}

View File

@ -88,7 +88,7 @@ int main(int argc, char **argv)
std::cout << "no device found\n";
return -1;
}
//setDevice(oclinfo[2]);
//setDevice(oclinfo[1]);
return RUN_ALL_TESTS();
}

View File

@ -143,6 +143,10 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
@ -150,11 +154,11 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
src1y = 0;
dstx = 0;
dsty = 0;
maskx = 0;
masky = 0;
src2x = 0;
src2y = 0;
#endif
maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
@ -1525,20 +1529,20 @@ INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
Values(CV_8UC1,CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
@ -1558,7 +1562,7 @@ INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
@ -1583,19 +1587,19 @@ INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3,CV_32FC
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1,CV_32FC3, CV_32FC4), Values(false)));
Values(CV_8UC1, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false)));

View File

@ -6,7 +6,7 @@ using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifdef HAVE_OPENCL
template <typename T>
void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
{
@ -81,3 +81,4 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
));
#endif

View File

@ -40,8 +40,9 @@
//M*/
#include "precomp.hpp"
namespace {
#ifdef HAVE_OPENCL
namespace
{
/////////////////////////////////////////////////////////////////////////////////////////////////
// BruteForceMatcher
@ -216,4 +217,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
} // namespace
#endif

View File

@ -44,8 +44,12 @@
//M*/
#include "precomp.hpp"
#define FILTER_IMAGE "../../../samples/gpu/road.png"
#ifdef HAVE_OPENCL
#ifdef WIN32
#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg"
#else
#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path
#endif
#define SHOW_RESULT 0
////////////////////////////////////////////////////////
@ -106,3 +110,4 @@ TEST_P(Canny, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif

View File

@ -828,7 +828,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, C
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(1, 3)));
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
@ -840,7 +840,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
(MatType)cv::BORDER_REPLICATE)));

View File

@ -53,7 +53,13 @@ using namespace testing;
using namespace std;
using namespace cv;
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
@ -113,7 +119,8 @@ TEST_F(Haar, FaceDetect)
CV_RGB(255, 128, 0),
CV_RGB(255, 255, 0),
CV_RGB(255, 0, 0),
CV_RGB(255,0,255)} ;
CV_RGB(255, 0, 255)
} ;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));

View File

@ -491,7 +491,7 @@ TEST_P(bilateralFilter, Mat)
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
if (mat1.depth() != CV_8U || mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
@ -502,47 +502,41 @@ TEST_P(bilateralFilter, Mat)
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
#ifdef RANDOMROI
if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
{
continue;
}
if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
{
dst_roi.adjustROI(radius, radius, radius, radius);
cldst_roi.adjustROI(radius, radius, radius, radius);
}
else
{
continue;
}
#endif
//if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
//{
// dst_roi.adjustROI(radius, radius, radius, radius);
// cldst_roi.adjustROI(radius, radius, radius, radius);
//}
//else
//{
// continue;
//}
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
cv::Mat cpu_cldst;
#ifndef RANDOMROI
cldst_roi.download(cpu_cldst);
#else
cldst.download(cpu_cldst);
#endif
char sss[1024];
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]);
#ifndef RANDOMROI
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
#else
//for(int i=0;i<dst_roi.rows;i++)
//for(int i=0;i<dst.rows;i++)
//{
// for(int j=0;j<dst_roi.cols;j++)
// for(int j=0;j<dst.cols*dst.channels();j++)
// {
// cout<< (int)dst_roi.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
// if(dst.at<uchar>(i,j)!=cpu_cldst.at<uchar>(i,j))
// cout<< i <<" "<< j <<" "<< (int)dst.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
// }
// cout<<endl;
//}
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
#endif
EXPECT_MAT_NEAR(dst, cpu_cldst, 1.0, sss);
}
}
}
@ -1661,12 +1655,19 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
// NULL_TYPE,
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
Values(CV_8UC1, CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
NULL_TYPE,
Values(CV_8UC1,CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
@ -1697,21 +1698,21 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(cv::Size()),
Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));

View File

@ -44,7 +44,8 @@
#include "precomp.hpp"
#define PERF_TEST 0
#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
@ -156,18 +157,18 @@ TEST_P(MatchTemplate32F, Accuracy)
#endif // PERF_TEST
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(3),Channels(4)),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(3),Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
// testing::Combine(
// MTEMP_SIZES,
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
// testing::Values(Channels(1), Channels(3), Channels(4)),
// ALL_TEMPLATE_METHODS
// )
// );
//
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
// MTEMP_SIZES,
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
// testing::Values(Channels(1), Channels(3), Channels(4)),
// testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif

View File

@ -497,11 +497,11 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4)));
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(

View File

@ -130,8 +130,8 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
src3x = rng.uniform(0, mat3.cols - roicols);
src3y = rng.uniform(0, mat3.cols - roirows);
src4x = rng.uniform(0, mat4.rows - roicols);
src3y = rng.uniform(0, mat3.rows - roirows);
src4x = rng.uniform(0, mat4.cols - roicols);
src4y = rng.uniform(0, mat4.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);