mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 13:47:32 +08:00
added to gpu module linear filters for int and float source types.
refactored gpu module.
This commit is contained in:
parent
ea040ce71a
commit
b08f60828b
@ -50,7 +50,7 @@ namespace cv
|
||||
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
|
||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||
|
||||
template<typename T = unsigned char>
|
||||
template <typename T>
|
||||
struct DevMem2D_
|
||||
{
|
||||
typedef T elem_t;
|
||||
@ -60,16 +60,21 @@ namespace cv
|
||||
int rows;
|
||||
T* ptr;
|
||||
size_t step;
|
||||
size_t elem_step;
|
||||
|
||||
DevMem2D_() : cols(0), rows(0), ptr(0), step(0) {}
|
||||
DevMem2D_() : cols(0), rows(0), ptr(0), step(0), elem_step(0) {}
|
||||
|
||||
DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_)
|
||||
: cols(cols_), rows(rows_), ptr(ptr_), step(step_) {}
|
||||
: cols(cols_), rows(rows_), ptr(ptr_), step(step_), elem_step(step_ / sizeof(T)) {}
|
||||
|
||||
template <typename U>
|
||||
explicit DevMem2D_(const DevMem2D_<U>& d)
|
||||
: cols(d.cols), rows(d.rows), ptr((T*)d.ptr), step(d.step), elem_step(d.step / sizeof(T)) {}
|
||||
|
||||
size_t elemSize() const { return elem_size; }
|
||||
};
|
||||
|
||||
typedef DevMem2D_<> DevMem2D;
|
||||
typedef DevMem2D_<unsigned char> DevMem2D;
|
||||
typedef DevMem2D_<float> DevMem2Df;
|
||||
typedef DevMem2D_<int> DevMem2Di;
|
||||
}
|
||||
|
@ -636,7 +636,7 @@ namespace cv
|
||||
|
||||
//! returns the separable filter engine with the specified filters
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter, bool rowFilterFirst = true);
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter);
|
||||
|
||||
//! returns horizontal 1D box filter
|
||||
//! supports only CV_8UC1 source type and CV_32FC1 sum type
|
||||
@ -658,7 +658,7 @@ namespace cv
|
||||
//! only MORPH_ERODE and MORPH_DILATE are supported
|
||||
//! supports CV_8UC1 and CV_8UC4 types
|
||||
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
|
||||
CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const GpuMat& kernel, const Size& ksize,
|
||||
CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
|
||||
Point anchor=Point(-1,-1));
|
||||
|
||||
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
|
||||
@ -667,25 +667,24 @@ namespace cv
|
||||
|
||||
//! returns 2D filter with the specified kernel
|
||||
//! supports CV_8UC1 and CV_8UC4 types
|
||||
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
|
||||
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const GpuMat& kernel, const Size& ksize,
|
||||
Point anchor = Point(-1, -1), int nDivisor = 1);
|
||||
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize,
|
||||
Point anchor = Point(-1, -1));
|
||||
|
||||
//! returns the non-separable linear filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
|
||||
const Point& anchor = Point(-1,-1));
|
||||
|
||||
//! returns the primitive row filter with the specified kernel
|
||||
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const GpuMat& rowKernel,
|
||||
int anchor = -1, int nDivisor = 1);
|
||||
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
|
||||
int anchor = -1);
|
||||
|
||||
//! returns the primitive column filter with the specified kernel
|
||||
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const GpuMat& columnKernel,
|
||||
int anchor = -1, int nDivisor = 1);
|
||||
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
|
||||
int anchor = -1);
|
||||
|
||||
//! returns the separable linear filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
|
||||
const Mat& columnKernel, const Point& anchor = Point(-1,-1), bool rowFilterFirst = true);
|
||||
const Mat& columnKernel, const Point& anchor = Point(-1,-1));
|
||||
|
||||
//! returns filter engine for the generalized Sobel operator
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);
|
||||
@ -720,7 +719,7 @@ namespace cv
|
||||
|
||||
//! applies separable 2D linear filter to the image
|
||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
||||
Point anchor = Point(-1,-1), bool rowFilterFirst = true);
|
||||
Point anchor = Point(-1,-1));
|
||||
|
||||
//! applies generalized Sobel operator to the image
|
||||
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
|
||||
|
@ -316,9 +316,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// compare
|
||||
|
||||
namespace cv { namespace gpu { namespace matrix_operations
|
||||
namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
|
||||
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
|
||||
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
|
||||
}}}
|
||||
|
||||
@ -346,7 +346,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
||||
}
|
||||
else
|
||||
{
|
||||
matrix_operations::compare_ne_8u(src1, src2, dst);
|
||||
mathfunc::compare_ne_8uc4(src1, src2, dst);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -359,7 +359,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
||||
}
|
||||
else
|
||||
{
|
||||
matrix_operations::compare_ne_32f(src1, src2, dst);
|
||||
mathfunc::compare_ne_32f(src1, src2, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,233 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
#ifndef FLT_MAX
|
||||
#define FLT_MAX 3.402823466e+30F
|
||||
#endif
|
||||
|
||||
namespace bf_krnls
|
||||
{
|
||||
__constant__ float* ctable_color;
|
||||
__constant__ float* ctable_space;
|
||||
__constant__ size_t ctable_space_step;
|
||||
|
||||
__constant__ int cndisp;
|
||||
__constant__ int cradius;
|
||||
|
||||
__constant__ short cedge_disc;
|
||||
__constant__ short cmax_disc;
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace bf
|
||||
{
|
||||
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.ptr, sizeof(table_space.ptr)) );
|
||||
size_t table_space_step = table_space.step / sizeof(float);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace bf_krnls
|
||||
{
|
||||
template <int channels>
|
||||
struct DistRgbMax
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
uchar x = abs(a[0] - b[0]);
|
||||
uchar y = abs(a[1] - b[1]);
|
||||
uchar z = abs(a[2] - b[2]);
|
||||
return (max(max(x, y), z));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DistRgbMax<1>
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
return abs(a[0] - b[0]);
|
||||
}
|
||||
};
|
||||
|
||||
template <int channels, typename T>
|
||||
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||
|
||||
T dp[5];
|
||||
|
||||
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
|
||||
{
|
||||
dp[0] = *(disp + (y ) * disp_step + x + 0);
|
||||
dp[1] = *(disp + (y-1) * disp_step + x + 0);
|
||||
dp[2] = *(disp + (y ) * disp_step + x - 1);
|
||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||
|
||||
if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc)
|
||||
{
|
||||
const int ymin = max(0, y - cradius);
|
||||
const int xmin = max(0, x - cradius);
|
||||
const int ymax = min(h - 1, y + cradius);
|
||||
const int xmax = min(w - 1, x + cradius);
|
||||
|
||||
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
const uchar* ic = img + y * img_step + channels * x;
|
||||
|
||||
for(int yi = ymin; yi <= ymax; yi++)
|
||||
{
|
||||
const T* disp_y = disp + yi * disp_step;
|
||||
|
||||
for(int xi = xmin; xi <= xmax; xi++)
|
||||
{
|
||||
const uchar* in = img + yi * img_step + channels * xi;
|
||||
|
||||
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
|
||||
|
||||
const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];
|
||||
|
||||
const T disp_reg = disp_y[xi];
|
||||
|
||||
cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;
|
||||
cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;
|
||||
cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;
|
||||
cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;
|
||||
cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = FLT_MAX;
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
{
|
||||
minimum = cost[0];
|
||||
id = 0;
|
||||
}
|
||||
if (cost[1] < minimum)
|
||||
{
|
||||
minimum = cost[1];
|
||||
id = 1;
|
||||
}
|
||||
if (cost[2] < minimum)
|
||||
{
|
||||
minimum = cost[2];
|
||||
id = 2;
|
||||
}
|
||||
if (cost[3] < minimum)
|
||||
{
|
||||
minimum = cost[3];
|
||||
id = 3;
|
||||
}
|
||||
if (cost[4] < minimum)
|
||||
{
|
||||
minimum = cost[4];
|
||||
id = 4;
|
||||
}
|
||||
|
||||
*(disp + y * disp_step + x) = dp[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace bf
|
||||
{
|
||||
template <typename T>
|
||||
void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(disp.cols, threads.x << 1);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
if (stream != 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||
}
|
||||
|
||||
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||
}
|
||||
}}}
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "cuda_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vecmath.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
@ -53,16 +54,8 @@ using namespace cv::gpu;
|
||||
#define FLT_EPSILON 1.192092896e-07F
|
||||
#endif
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template<typename T, int N> struct TypeVec {};
|
||||
template<> struct TypeVec<uchar, 3> { typedef uchar3 vec_t; };
|
||||
template<> struct TypeVec<uchar, 4> { typedef uchar4 vec_t; };
|
||||
template<> struct TypeVec<ushort, 3> { typedef ushort3 vec_t; };
|
||||
template<> struct TypeVec<ushort, 4> { typedef ushort4 vec_t; };
|
||||
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
|
||||
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
|
||||
|
||||
template<typename T> struct ColorChannel {};
|
||||
template<> struct ColorChannel<uchar>
|
||||
{
|
||||
@ -106,7 +99,7 @@ namespace imgproc
|
||||
|
||||
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template <int SRCCN, int DSTCN, typename T>
|
||||
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
@ -132,7 +125,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN, int DSTCN>
|
||||
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||
@ -143,7 +136,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -189,7 +182,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
|
||||
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
|
||||
@ -281,7 +274,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <int GREEN_BITS, int DSTCN>
|
||||
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||
@ -292,7 +285,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -320,7 +313,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -342,7 +335,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
///////////////////////////////// Grayscale to Color ////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template <int DSTCN, typename T>
|
||||
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
|
||||
@ -396,7 +389,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int DSTCN>
|
||||
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||
@ -407,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols);
|
||||
|
||||
if (stream == 0)
|
||||
@ -447,7 +440,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols);
|
||||
|
||||
if (stream == 0)
|
||||
@ -468,7 +461,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
///////////////////////////////// Color to Grayscale ////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
#undef R2Y
|
||||
#undef G2Y
|
||||
@ -550,7 +543,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN>
|
||||
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||
@ -561,7 +554,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -601,7 +594,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols);
|
||||
|
||||
if (stream == 0)
|
||||
@ -622,7 +615,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
__constant__ float cYCrCbCoeffs_f[5];
|
||||
__constant__ int cYCrCbCoeffs_i[5];
|
||||
@ -721,7 +714,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN, int DSTCN>
|
||||
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||
@ -732,7 +725,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -748,7 +741,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||
|
||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -762,7 +755,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||
|
||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -776,7 +769,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
|
||||
|
||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -790,7 +783,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -806,7 +799,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||
|
||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -820,7 +813,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||
|
||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -834,7 +827,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
|
||||
|
||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||
}
|
||||
@ -842,7 +835,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
__constant__ float cXYZ_D65f[9];
|
||||
__constant__ int cXYZ_D65i[9];
|
||||
@ -931,7 +924,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN, int DSTCN>
|
||||
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||
@ -942,7 +935,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols);
|
||||
|
||||
if (stream == 0)
|
||||
@ -958,7 +951,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
|
||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -972,7 +965,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
|
||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -986,7 +979,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||
|
||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -1000,7 +993,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols);
|
||||
|
||||
if (stream == 0)
|
||||
@ -1016,7 +1009,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
|
||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -1030,7 +1023,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||
|
||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -1044,7 +1037,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
|
||||
};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||
|
||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||
}
|
||||
@ -1052,7 +1045,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
__constant__ int cHsvDivTable[256];
|
||||
|
||||
@ -1229,7 +1222,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN, int DSTCN>
|
||||
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
||||
@ -1241,10 +1234,10 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
if (hrange == 180)
|
||||
imgproc::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
else
|
||||
imgproc::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -1295,7 +1288,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,
|
||||
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096
|
||||
};
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvDivTable, div_table, sizeof(div_table)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvDivTable, div_table, sizeof(div_table)) );
|
||||
|
||||
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||
}
|
||||
@ -1323,10 +1316,10 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
if (hrange == 180)
|
||||
imgproc::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
else
|
||||
imgproc::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -1345,7 +1338,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
static const int sector_data[][3] =
|
||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||
|
||||
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||
}
|
||||
@ -1362,7 +1355,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
static const int sector_data[][3] =
|
||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||
|
||||
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||
}
|
||||
@ -1370,7 +1363,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template<typename T, int HR> struct RGB2HLSConvertor;
|
||||
template<int HR> struct RGB2HLSConvertor<float, HR>
|
||||
@ -1541,7 +1534,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int SRCCN, int DSTCN>
|
||||
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
||||
@ -1553,10 +1546,10 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
if (hrange == 180)
|
||||
imgproc::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
else
|
||||
imgproc::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -1598,10 +1591,10 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
if (hrange == 180)
|
||||
imgproc::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
else
|
||||
imgproc::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
imgproc_krnls::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
||||
|
||||
if (stream == 0)
|
||||
@ -1620,7 +1613,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
static const int sector_data[][3]=
|
||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||
|
||||
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||
}
|
||||
@ -1637,7 +1630,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
static const int sector_data[][3]=
|
||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||
|
||||
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||
}
|
||||
|
@ -54,20 +54,18 @@ using namespace cv::gpu;
|
||||
#define SHRT_MAX 32767
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
struct TypeLimits {};
|
||||
|
||||
template <>
|
||||
struct TypeLimits<short>
|
||||
namespace csbp_krnls
|
||||
{
|
||||
static __device__ short max() {return SHRT_MAX;}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct TypeLimits<float>
|
||||
{
|
||||
static __device__ float max() {return FLT_MAX;}
|
||||
};
|
||||
template <typename T> struct TypeLimits;
|
||||
template <> struct TypeLimits<short>
|
||||
{
|
||||
static __device__ short max() {return SHRT_MAX;}
|
||||
};
|
||||
template <> struct TypeLimits<float>
|
||||
{
|
||||
static __device__ float max() {return FLT_MAX;}
|
||||
};
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// load constants ////////////////////////
|
||||
|
@ -58,19 +58,8 @@ namespace cv
|
||||
|
||||
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
|
||||
|
||||
namespace matrix_operations
|
||||
{
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
extern "C" void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
|
||||
extern "C" void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
|
||||
|
||||
static inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
|
||||
}
|
||||
}
|
||||
|
||||
|
455
modules/gpu/src/cuda/filters.cu
Normal file
455
modules/gpu/src/cuda/filters.cu
Normal file
@ -0,0 +1,455 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "safe_call.hpp"
|
||||
#include "cuda_shared.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
#ifndef FLT_MAX
|
||||
#define FLT_MAX 3.402823466e+30F
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Linear filters
|
||||
|
||||
#define MAX_KERNEL_SIZE 16
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
__constant__ float cLinearKernel[MAX_KERNEL_SIZE];
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
void loadLinearKernel(const float kernel[], int ksize)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(filter_krnls::cLinearKernel, kernel, ksize * sizeof(float)) );
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
|
||||
__global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
|
||||
{
|
||||
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||
|
||||
const int blockStartX = blockDim.x * blockIdx.x;
|
||||
const int blockStartY = blockDim.y * blockIdx.y;
|
||||
|
||||
const int threadX = blockStartX + threadIdx.x;
|
||||
const int prevThreadX = threadX - blockDim.x;
|
||||
const int nextThreadX = threadX + blockDim.x;
|
||||
|
||||
const int threadY = blockStartY + threadIdx.y;
|
||||
|
||||
T* sDataRow = smem + threadIdx.y * blockDim.x * 3;
|
||||
|
||||
if (threadY < height)
|
||||
{
|
||||
const T* rowSrc = src + threadY * src_step;
|
||||
|
||||
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : 0;
|
||||
|
||||
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : 0;
|
||||
|
||||
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadX < width)
|
||||
{
|
||||
float sum = 0;
|
||||
|
||||
sDataRow += threadIdx.x + blockDim.x - anchor;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < KERNEL_SIZE; ++i)
|
||||
sum += cLinearKernel[i] * sDataRow[i];
|
||||
|
||||
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
template <int KERNEL_SIZE, typename T, typename D>
|
||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
||||
{
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
|
||||
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
||||
static const caller_t callers[] =
|
||||
{linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>,
|
||||
linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>,
|
||||
linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>,
|
||||
linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>,
|
||||
linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>,
|
||||
linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>,
|
||||
linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>,
|
||||
linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};
|
||||
|
||||
loadLinearKernel(kernel, ksize);
|
||||
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
}
|
||||
|
||||
void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearRowFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearRowFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearRowFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
|
||||
__global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
|
||||
{
|
||||
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||
|
||||
const int blockStartX = blockDim.x * blockIdx.x;
|
||||
const int blockStartY = blockDim.y * blockIdx.y;
|
||||
|
||||
const int threadX = blockStartX + threadIdx.x;
|
||||
|
||||
const int threadY = blockStartY + threadIdx.y;
|
||||
const int prevThreadY = threadY - blockDim.y;
|
||||
const int nextThreadY = threadY + blockDim.y;
|
||||
|
||||
const int smem_step = blockDim.x;
|
||||
|
||||
T* sDataColumn = smem + threadIdx.x;
|
||||
|
||||
if (threadX < width)
|
||||
{
|
||||
const T* colSrc = src + threadX;
|
||||
|
||||
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : 0;
|
||||
|
||||
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : 0;
|
||||
|
||||
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadY < height)
|
||||
{
|
||||
float sum = 0;
|
||||
|
||||
sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < KERNEL_SIZE; ++i)
|
||||
sum += cLinearKernel[i] * sDataColumn[i * smem_step];
|
||||
|
||||
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
template <int KERNEL_SIZE, typename T, typename D>
|
||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
||||
{
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
|
||||
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
||||
static const caller_t callers[] =
|
||||
{linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>,
|
||||
linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>,
|
||||
linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>,
|
||||
linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>,
|
||||
linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>,
|
||||
linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>,
|
||||
linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>,
|
||||
linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};
|
||||
|
||||
loadLinearKernel(kernel, ksize);
|
||||
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
}
|
||||
|
||||
void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
{
|
||||
linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
|
||||
}
|
||||
}}}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Bilateral filters
|
||||
|
||||
namespace bf_krnls
|
||||
{
|
||||
__constant__ float* ctable_color;
|
||||
__constant__ float* ctable_space;
|
||||
__constant__ size_t ctable_space_step;
|
||||
|
||||
__constant__ int cndisp;
|
||||
__constant__ int cradius;
|
||||
|
||||
__constant__ short cedge_disc;
|
||||
__constant__ short cmax_disc;
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace bf
|
||||
{
|
||||
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.ptr, sizeof(table_space.ptr)) );
|
||||
size_t table_space_step = table_space.step / sizeof(float);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace bf_krnls
|
||||
{
|
||||
template <int channels>
|
||||
struct DistRgbMax
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
uchar x = abs(a[0] - b[0]);
|
||||
uchar y = abs(a[1] - b[1]);
|
||||
uchar z = abs(a[2] - b[2]);
|
||||
return (max(max(x, y), z));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DistRgbMax<1>
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
return abs(a[0] - b[0]);
|
||||
}
|
||||
};
|
||||
|
||||
template <int channels, typename T>
|
||||
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||
|
||||
T dp[5];
|
||||
|
||||
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
|
||||
{
|
||||
dp[0] = *(disp + (y ) * disp_step + x + 0);
|
||||
dp[1] = *(disp + (y-1) * disp_step + x + 0);
|
||||
dp[2] = *(disp + (y ) * disp_step + x - 1);
|
||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||
|
||||
if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc)
|
||||
{
|
||||
const int ymin = max(0, y - cradius);
|
||||
const int xmin = max(0, x - cradius);
|
||||
const int ymax = min(h - 1, y + cradius);
|
||||
const int xmax = min(w - 1, x + cradius);
|
||||
|
||||
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
const uchar* ic = img + y * img_step + channels * x;
|
||||
|
||||
for(int yi = ymin; yi <= ymax; yi++)
|
||||
{
|
||||
const T* disp_y = disp + yi * disp_step;
|
||||
|
||||
for(int xi = xmin; xi <= xmax; xi++)
|
||||
{
|
||||
const uchar* in = img + yi * img_step + channels * xi;
|
||||
|
||||
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
|
||||
|
||||
const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];
|
||||
|
||||
const T disp_reg = disp_y[xi];
|
||||
|
||||
cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;
|
||||
cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;
|
||||
cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;
|
||||
cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;
|
||||
cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = FLT_MAX;
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
{
|
||||
minimum = cost[0];
|
||||
id = 0;
|
||||
}
|
||||
if (cost[1] < minimum)
|
||||
{
|
||||
minimum = cost[1];
|
||||
id = 1;
|
||||
}
|
||||
if (cost[2] < minimum)
|
||||
{
|
||||
minimum = cost[2];
|
||||
id = 2;
|
||||
}
|
||||
if (cost[3] < minimum)
|
||||
{
|
||||
minimum = cost[3];
|
||||
id = 3;
|
||||
}
|
||||
if (cost[4] < minimum)
|
||||
{
|
||||
minimum = cost[4];
|
||||
id = 4;
|
||||
}
|
||||
|
||||
*(disp + y * disp_step + x) = dp[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace bf
|
||||
{
|
||||
template <typename T>
|
||||
void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(disp.cols, threads.x << 1);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
if (stream != 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||
}
|
||||
|
||||
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||
}
|
||||
}}}
|
@ -45,7 +45,7 @@
|
||||
using namespace cv::gpu;
|
||||
|
||||
/////////////////////////////////// Remap ///////////////////////////////////////////////
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
|
||||
|
||||
@ -123,7 +123,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
||||
{
|
||||
@ -132,15 +132,15 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(dst.cols, threads.x);
|
||||
grid.y = divUp(dst.rows, threads.y);
|
||||
|
||||
imgproc::tex_remap.filterMode = cudaFilterModeLinear;
|
||||
imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;
|
||||
imgproc_krnls::tex_remap.filterMode = cudaFilterModeLinear;
|
||||
imgproc_krnls::tex_remap.addressMode[0] = imgproc_krnls::tex_remap.addressMode[1] = cudaAddressModeWrap;
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
|
||||
cudaSafeCall( cudaBindTexture2D(0, imgproc_krnls::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
|
||||
|
||||
imgproc::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
imgproc_krnls::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );
|
||||
cudaSafeCall( cudaUnbindTexture(imgproc_krnls::tex_remap) );
|
||||
}
|
||||
|
||||
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
||||
@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(dst.cols, threads.x);
|
||||
grid.y = divUp(dst.rows, threads.y);
|
||||
|
||||
imgproc::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
imgproc_krnls::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
texture<uchar4, 2> tex_meanshift;
|
||||
|
||||
@ -254,7 +254,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
|
||||
{
|
||||
@ -264,11 +264,11 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
imgproc_krnls::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
|
||||
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
|
||||
}
|
||||
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
|
||||
{
|
||||
@ -278,17 +278,17 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
imgproc::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||
imgproc_krnls::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
|
||||
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
|
||||
}
|
||||
}}}
|
||||
|
||||
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
template <typename T>
|
||||
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
||||
@ -391,7 +391,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
|
||||
{
|
||||
@ -400,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x << 2);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
||||
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
@ -413,7 +413,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(src.cols, threads.x << 1);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
||||
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
@ -422,7 +422,7 @@ namespace cv { namespace gpu { namespace improc
|
||||
|
||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
namespace imgproc_krnls
|
||||
{
|
||||
__constant__ float cq[16];
|
||||
|
||||
@ -457,7 +457,7 @@ namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||
@ -467,9 +467,9 @@ namespace cv { namespace gpu { namespace improc
|
||||
grid.x = divUp(disp.cols, threads.x);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cq, q, 16 * sizeof(float)) );
|
||||
|
||||
imgproc::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
|
||||
imgproc_krnls::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
@ -41,6 +41,9 @@
|
||||
//M*/
|
||||
|
||||
#include "cuda_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "transform.hpp"
|
||||
#include "vecmath.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
@ -48,6 +51,9 @@ using namespace cv::gpu;
|
||||
#define CV_PI 3.1415926535897932384626433832795f
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cart <-> Polar
|
||||
|
||||
namespace mathfunc_krnls
|
||||
{
|
||||
struct Nothing
|
||||
@ -143,8 +149,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
||||
|
||||
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float),
|
||||
mag.ptr, mag.step / sizeof(float), angle.ptr, angle.step / sizeof(float), scale, x.cols, x.rows);
|
||||
x.ptr, x.elem_step, y.ptr, y.elem_step,
|
||||
mag.ptr, mag.elem_step, angle.ptr, angle.elem_step, scale, x.cols, x.rows);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
@ -191,8 +197,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
||||
|
||||
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.step / sizeof(float),
|
||||
angle.ptr, angle.step / sizeof(float), scale, x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float), mag.cols, mag.rows);
|
||||
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.elem_step,
|
||||
angle.ptr, angle.elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step, mag.cols, mag.rows);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
@ -210,3 +216,37 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Compare
|
||||
|
||||
namespace mathfunc_krnls
|
||||
{
|
||||
template <typename T1, typename T2>
|
||||
struct NotEqual
|
||||
{
|
||||
__device__ uchar operator()(const T1& src1, const T2& src2, int, int)
|
||||
{
|
||||
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
template <typename T1, typename T2>
|
||||
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||
{
|
||||
mathfunc_krnls::NotEqual<T1, T2> op;
|
||||
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
|
||||
}
|
||||
|
||||
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||
{
|
||||
compare_ne<uint, uint>(src1, src2, dst);
|
||||
}
|
||||
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||
{
|
||||
compare_ne<float, float>(src1, src2, dst);
|
||||
}
|
||||
}}}
|
||||
|
@ -47,121 +47,18 @@
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::matrix_operations;
|
||||
|
||||
|
||||
namespace mat_operators
|
||||
namespace matop_krnls
|
||||
{
|
||||
__constant__ double scalar_d[4];
|
||||
|
||||
|
||||
template <typename T>
|
||||
class shift_and_sizeof;
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<char>
|
||||
{
|
||||
public:
|
||||
enum { shift = 0 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<unsigned char>
|
||||
{
|
||||
public:
|
||||
enum { shift = 0 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<short>
|
||||
{
|
||||
public:
|
||||
enum { shift = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<unsigned short>
|
||||
{
|
||||
public:
|
||||
enum { shift = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<int>
|
||||
{
|
||||
public:
|
||||
enum { shift = 2 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<float>
|
||||
{
|
||||
public:
|
||||
enum { shift = 2 };
|
||||
};
|
||||
|
||||
template <>
|
||||
class shift_and_sizeof<double>
|
||||
{
|
||||
public:
|
||||
enum { shift = 3 };
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// CopyTo /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat_dst[idx] = mat_src[idx];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// SetTo //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
{
|
||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat[idx] = scalar_d[ x % channels ];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat[idx] = scalar_d[ x % channels ];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T> struct shift_and_sizeof;
|
||||
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
|
||||
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
|
||||
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
|
||||
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
|
||||
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
|
||||
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
|
||||
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
|
||||
|
||||
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
|
||||
struct ReadWriteTraits
|
||||
{
|
||||
@ -218,9 +115,206 @@ namespace mat_operators
|
||||
typedef int2 read_type;
|
||||
typedef short2 write_type;
|
||||
};
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// CopyTo /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace matop_krnls
|
||||
{
|
||||
template<typename T>
|
||||
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat_dst[idx] = mat_src[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace matrix_operations
|
||||
{
|
||||
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
|
||||
template<typename T>
|
||||
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(16,16, 1);
|
||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||
if (stream == 0)
|
||||
{
|
||||
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
static CopyToFunc tab[8] =
|
||||
{
|
||||
copy_to_with_mask_run<unsigned char>,
|
||||
copy_to_with_mask_run<char>,
|
||||
copy_to_with_mask_run<unsigned short>,
|
||||
copy_to_with_mask_run<short>,
|
||||
copy_to_with_mask_run<int>,
|
||||
copy_to_with_mask_run<float>,
|
||||
copy_to_with_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
CopyToFunc func = tab[depth];
|
||||
|
||||
if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat_src, mat_dst, mask, channels, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// SetTo //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace matop_krnls
|
||||
{
|
||||
__constant__ double scalar_d[4];
|
||||
|
||||
template<typename T>
|
||||
__global__ void set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
{
|
||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat[idx] = scalar_d[ x % channels ];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat[idx] = scalar_d[ x % channels ];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace matrix_operations
|
||||
{
|
||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
|
||||
|
||||
template <typename T>
|
||||
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
}
|
||||
}
|
||||
|
||||
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
|
||||
|
||||
static SetToFunc_without_mask tab[8] =
|
||||
{
|
||||
set_to_without_mask_run<unsigned char>,
|
||||
set_to_without_mask_run<char>,
|
||||
set_to_without_mask_run<unsigned short>,
|
||||
set_to_without_mask_run<short>,
|
||||
set_to_without_mask_run<int>,
|
||||
set_to_without_mask_run<float>,
|
||||
set_to_without_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_without_mask func = tab[depth];
|
||||
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, channels, stream);
|
||||
}
|
||||
|
||||
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
|
||||
|
||||
static SetToFunc_with_mask tab[8] =
|
||||
{
|
||||
set_to_with_mask_run<unsigned char>,
|
||||
set_to_with_mask_run<char>,
|
||||
set_to_with_mask_run<unsigned short>,
|
||||
set_to_with_mask_run<short>,
|
||||
set_to_with_mask_run<int>,
|
||||
set_to_with_mask_run<float>,
|
||||
set_to_with_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_with_mask func = tab[depth];
|
||||
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, mask, channels, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace matop_krnls
|
||||
{
|
||||
template <typename T, typename DT>
|
||||
__global__ static void kernel_convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
|
||||
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
|
||||
{
|
||||
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
|
||||
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
|
||||
@ -253,253 +347,63 @@ namespace mat_operators
|
||||
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////// compare_ne ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
namespace cv { namespace gpu { namespace matrix_operations
|
||||
{
|
||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
|
||||
|
||||
template <typename T>
|
||||
__global__ void kernel_compare_ne(uchar* src1, size_t src1_step, uchar* src2, size_t src2_step, uchar* dst, size_t dst_step, int cols, int rows)
|
||||
template<typename T, typename DT>
|
||||
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int shift = ::matop_krnls::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
|
||||
|
||||
if (x < cols && y < rows)
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
T src1_pix = ((T*)(src1 + y * src1_step))[x];
|
||||
T src2_pix = ((T*)(src2 + y * src2_step))[x];
|
||||
uchar res = (uchar)(src1_pix != src2_pix) * 255;
|
||||
((dst + y * dst_step))[x] = res;
|
||||
matop_krnls::convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
matop_krnls::convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
}
|
||||
}
|
||||
} // namespace mat_operators
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
namespace matrix_operations
|
||||
static CvtFunc tab[8][8] =
|
||||
{
|
||||
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
|
||||
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// CopyTo /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
|
||||
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
|
||||
|
||||
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
|
||||
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
|
||||
|
||||
template<typename T>
|
||||
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(16,16, 1);
|
||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
}
|
||||
}
|
||||
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
|
||||
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
|
||||
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
static CopyToFunc tab[8] =
|
||||
{
|
||||
copy_to_with_mask_run<unsigned char>,
|
||||
copy_to_with_mask_run<char>,
|
||||
copy_to_with_mask_run<unsigned short>,
|
||||
copy_to_with_mask_run<short>,
|
||||
copy_to_with_mask_run<int>,
|
||||
copy_to_with_mask_run<float>,
|
||||
copy_to_with_mask_run<double>,
|
||||
0
|
||||
};
|
||||
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
|
||||
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
|
||||
|
||||
CopyToFunc func = tab[depth];
|
||||
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
|
||||
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
|
||||
|
||||
if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
|
||||
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
|
||||
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
|
||||
|
||||
func(mat_src, mat_dst, mask, channels, stream);
|
||||
}
|
||||
{0,0,0,0,0,0,0,0}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////// SetTo //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
|
||||
|
||||
template <typename T>
|
||||
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, scalar, sizeof(double) * 4));
|
||||
|
||||
static SetToFunc_without_mask tab[8] =
|
||||
{
|
||||
set_to_without_mask_run<unsigned char>,
|
||||
set_to_without_mask_run<char>,
|
||||
set_to_without_mask_run<unsigned short>,
|
||||
set_to_without_mask_run<short>,
|
||||
set_to_without_mask_run<int>,
|
||||
set_to_without_mask_run<float>,
|
||||
set_to_without_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_without_mask func = tab[depth];
|
||||
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, channels, stream);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, scalar, sizeof(double) * 4));
|
||||
|
||||
static SetToFunc_with_mask tab[8] =
|
||||
{
|
||||
set_to_with_mask_run<unsigned char>,
|
||||
set_to_with_mask_run<char>,
|
||||
set_to_with_mask_run<unsigned short>,
|
||||
set_to_with_mask_run<short>,
|
||||
set_to_with_mask_run<int>,
|
||||
set_to_with_mask_run<float>,
|
||||
set_to_with_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_with_mask func = tab[depth];
|
||||
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, mask, channels, stream);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
|
||||
|
||||
template<typename T, typename DT>
|
||||
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
const int shift = ::mat_operators::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
mat_operators::kernel_convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
static CvtFunc tab[8][8] =
|
||||
{
|
||||
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
|
||||
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
|
||||
|
||||
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
|
||||
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
|
||||
|
||||
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
|
||||
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
|
||||
|
||||
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
|
||||
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
|
||||
|
||||
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
|
||||
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
|
||||
|
||||
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
|
||||
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
|
||||
|
||||
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
|
||||
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
|
||||
|
||||
{0,0,0,0,0,0,0,0}
|
||||
};
|
||||
|
||||
CvtFunc func = tab[sdepth][ddepth];
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////// compare_ne ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
|
||||
|
||||
mat_operators::kernel_compare_ne<uint><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
|
||||
|
||||
mat_operators::kernel_compare_ne<float><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
} // namespace matrix_operations
|
||||
} // namespace gpu
|
||||
} // namespace cv
|
||||
CvtFunc func = tab[sdepth][ddepth];
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
|
||||
}
|
||||
}}}
|
||||
|
@ -49,124 +49,206 @@ namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// To fix link error: this func already defined in other obj file
|
||||
namespace
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> static __device__ uchar saturate_cast<uchar>(schar v)
|
||||
{ return (uchar)max((int)v, 0); }
|
||||
template<> static __device__ uchar saturate_cast<uchar>(ushort v)
|
||||
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ uchar saturate_cast<uchar>(int v)
|
||||
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||
template<> static __device__ uchar saturate_cast<uchar>(uint v)
|
||||
{ return (uchar)min(v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ uchar saturate_cast<uchar>(short v)
|
||||
{ return saturate_cast<uchar>((uint)v); }
|
||||
|
||||
template<> static __device__ uchar saturate_cast<uchar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
|
||||
template<> static __device__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> __device__ uchar saturate_cast<uchar>(schar v)
|
||||
{ return (uchar)max((int)v, 0); }
|
||||
template<> __device__ uchar saturate_cast<uchar>(ushort v)
|
||||
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); }
|
||||
template<> __device__ uchar saturate_cast<uchar>(int v)
|
||||
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||
template<> __device__ uchar saturate_cast<uchar>(uint v)
|
||||
{ return (uchar)min(v, (uint)UCHAR_MAX); }
|
||||
template<> __device__ uchar saturate_cast<uchar>(short v)
|
||||
{ return saturate_cast<uchar>((uint)v); }
|
||||
|
||||
template<> __device__ uchar saturate_cast<uchar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
|
||||
template<> __device__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ schar saturate_cast<schar>(uchar v)
|
||||
{ return (schar)min((int)v, SCHAR_MAX); }
|
||||
template<> __device__ schar saturate_cast<schar>(ushort v)
|
||||
{ return (schar)min((uint)v, (uint)SCHAR_MAX); }
|
||||
template<> __device__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
|
||||
v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> __device__ schar saturate_cast<schar>(short v)
|
||||
{ return saturate_cast<schar>((int)v); }
|
||||
template<> __device__ schar saturate_cast<schar>(uint v)
|
||||
{ return (schar)min(v, (uint)SCHAR_MAX); }
|
||||
|
||||
template<> __device__ schar saturate_cast<schar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
|
||||
template<> __device__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ ushort saturate_cast<ushort>(schar v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> __device__ ushort saturate_cast<ushort>(short v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> __device__ ushort saturate_cast<ushort>(int v)
|
||||
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||
template<> __device__ ushort saturate_cast<ushort>(uint v)
|
||||
{ return (ushort)min(v, (uint)USHRT_MAX); }
|
||||
template<> __device__ ushort saturate_cast<ushort>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
|
||||
template<> __device__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ short saturate_cast<short>(ushort v)
|
||||
{ return (short)min((int)v, SHRT_MAX); }
|
||||
template<> __device__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
|
||||
v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> __device__ short saturate_cast<short>(uint v)
|
||||
{ return (short)min(v, (uint)SHRT_MAX); }
|
||||
template<> __device__ short saturate_cast<short>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
|
||||
template<> __device__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
|
||||
template<> __device__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
|
||||
template<> __device__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ schar saturate_cast<schar>(uchar v)
|
||||
{ return (schar)min((int)v, SCHAR_MAX); }
|
||||
template<> static __device__ schar saturate_cast<schar>(ushort v)
|
||||
{ return (schar)min((uint)v, (uint)SCHAR_MAX); }
|
||||
template<> static __device__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
|
||||
v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> static __device__ schar saturate_cast<schar>(short v)
|
||||
{ return saturate_cast<schar>((int)v); }
|
||||
template<> static __device__ schar saturate_cast<schar>(uint v)
|
||||
{ return (schar)min(v, (uint)SCHAR_MAX); }
|
||||
|
||||
template<> static __device__ schar saturate_cast<schar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
|
||||
template<> static __device__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ ushort saturate_cast<ushort>(schar v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ ushort saturate_cast<ushort>(short v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ ushort saturate_cast<ushort>(int v)
|
||||
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||
template<> static __device__ ushort saturate_cast<ushort>(uint v)
|
||||
{ return (ushort)min(v, (uint)USHRT_MAX); }
|
||||
template<> static __device__ ushort saturate_cast<ushort>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
|
||||
template<> static __device__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ short saturate_cast<short>(ushort v)
|
||||
{ return (short)min((int)v, SHRT_MAX); }
|
||||
template<> static __device__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
|
||||
v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> static __device__ short saturate_cast<short>(uint v)
|
||||
{ return (short)min(v, (uint)SHRT_MAX); }
|
||||
template<> static __device__ short saturate_cast<short>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
|
||||
template<> static __device__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
|
||||
template<> static __device__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
|
||||
template<> static __device__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(uchar4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(char4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(ushort4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(short4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(uint4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(int4 v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(float4 v) { return _Tp(v); }
|
||||
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(char4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(ushort4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(short4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(uint4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(int4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
template<> static __device__ uchar4 saturate_cast<uchar4>(float4 v)
|
||||
{ return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }
|
||||
|
||||
template<> static __device__ char4 saturate_cast<char4>(uchar4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
template<> static __device__ char4 saturate_cast<char4>(ushort4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
template<> static __device__ char4 saturate_cast<char4>(short4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
template<> static __device__ char4 saturate_cast<char4>(uint4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
template<> static __device__ char4 saturate_cast<char4>(int4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
template<> static __device__ char4 saturate_cast<char4>(float4 v)
|
||||
{ return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }
|
||||
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(uchar4 v)
|
||||
{ return make_ushort4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(char4 v)
|
||||
{ return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(short4 v)
|
||||
{ return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(uint4 v)
|
||||
{ return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(int4 v)
|
||||
{ return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }
|
||||
template<> static __device__ ushort4 saturate_cast<ushort4>(float4 v)
|
||||
{ return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }
|
||||
|
||||
template<> static __device__ short4 saturate_cast<short4>(uchar4 v)
|
||||
{ return make_short4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ short4 saturate_cast<short4>(char4 v)
|
||||
{ return make_short4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ short4 saturate_cast<short4>(ushort4 v)
|
||||
{ return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }
|
||||
template<> static __device__ short4 saturate_cast<short4>(uint4 v)
|
||||
{ return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }
|
||||
template<> static __device__ short4 saturate_cast<short4>(int4 v)
|
||||
{ return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }
|
||||
template<> static __device__ short4 saturate_cast<short4>(float4 v)
|
||||
{ return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }
|
||||
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(uchar4 v)
|
||||
{ return make_uint4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(char4 v)
|
||||
{ return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(ushort4 v)
|
||||
{ return make_uint4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(short4 v)
|
||||
{ return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(int4 v)
|
||||
{ return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }
|
||||
template<> static __device__ uint4 saturate_cast<uint4>(float4 v)
|
||||
{ return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }
|
||||
|
||||
template<> static __device__ int4 saturate_cast<int4>(uchar4 v)
|
||||
{ return make_int4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ int4 saturate_cast<int4>(char4 v)
|
||||
{ return make_int4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ int4 saturate_cast<int4>(ushort4 v)
|
||||
{ return make_int4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ int4 saturate_cast<int4>(short4 v)
|
||||
{ return make_int4(v.x, v.y, v.z, v.w); }
|
||||
template<> static __device__ int4 saturate_cast<int4>(uint4 v)
|
||||
{ return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); }
|
||||
template<> static __device__ int4 saturate_cast<int4>(float4 v)
|
||||
{ return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); }
|
||||
}
|
||||
}
|
||||
|
||||
|
118
modules/gpu/src/cuda/transform.hpp
Normal file
118
modules/gpu/src/cuda/transform.hpp
Normal file
@ -0,0 +1,118 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
||||
#include "cuda_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vecmath.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace algo_krnls
|
||||
{
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static __global__ void transform(const T* src, size_t src_step,
|
||||
D* dst, size_t dst_step, int width, int height, UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
T src_data = src[y * src_step + x];
|
||||
dst[y * dst_step + x] = op(src_data, x, y);
|
||||
}
|
||||
}
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static __global__ void transform(const T1* src1, size_t src1_step, const T2* src2, size_t src2_step,
|
||||
D* dst, size_t dst_step, int width, int height, BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
T1 src1_data = src1[y * src1_step + x];
|
||||
T2 src2_data = src2[y * src2_step + x];
|
||||
dst[y * dst_step + x] = op(src1_data, src2_data, x, y);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
algo_krnls::transform<<<grid, threads, 0, stream>>>(src.ptr, src.elem_step,
|
||||
dst.ptr, dst.elem_step, src.cols, src.rows, op);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src1.cols, threads.x);
|
||||
grid.y = divUp(src1.rows, threads.y);
|
||||
|
||||
algo_krnls::transform<<<grid, threads, 0, stream>>>(src1.ptr, src1.elem_step,
|
||||
src2.ptr, src2.elem_step, dst.ptr, dst.elem_step, src1.cols, src1.rows, op);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
126
modules/gpu/src/cuda/vecmath.hpp
Normal file
126
modules/gpu/src/cuda/vecmath.hpp
Normal file
@ -0,0 +1,126 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VECMATH_HPP__
|
||||
#define __OPENCV_GPU_VECMATH_HPP__
|
||||
|
||||
#include "cuda_shared.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
template<typename T> struct TypeVec<T, 1> { typedef T vec_t; };
|
||||
template<> struct TypeVec<unsigned char, 2> { typedef uchar2 vec_t; };
|
||||
template<> struct TypeVec<uchar2, 2> { typedef uchar2 vec_t; };
|
||||
template<> struct TypeVec<unsigned char, 3> { typedef uchar3 vec_t; };;
|
||||
template<> struct TypeVec<uchar3, 3> { typedef uchar3 vec_t; };
|
||||
template<> struct TypeVec<unsigned char, 4> { typedef uchar4 vec_t; };;
|
||||
template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; };
|
||||
template<> struct TypeVec<char, 2> { typedef char2 vec_t; };
|
||||
template<> struct TypeVec<char2, 2> { typedef char2 vec_t; };
|
||||
template<> struct TypeVec<char, 3> { typedef char3 vec_t; };
|
||||
template<> struct TypeVec<char3, 3> { typedef char3 vec_t; };
|
||||
template<> struct TypeVec<char, 4> { typedef char4 vec_t; };
|
||||
template<> struct TypeVec<char4, 4> { typedef char4 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 2> { typedef ushort2 vec_t; };
|
||||
template<> struct TypeVec<ushort2, 2> { typedef ushort2 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 3> { typedef ushort3 vec_t; };
|
||||
template<> struct TypeVec<ushort3, 3> { typedef ushort3 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 4> { typedef ushort4 vec_t; };
|
||||
template<> struct TypeVec<ushort4, 4> { typedef ushort4 vec_t; };
|
||||
template<> struct TypeVec<short, 2> { typedef short2 vec_t; };
|
||||
template<> struct TypeVec<short2, 2> { typedef short2 vec_t; };
|
||||
template<> struct TypeVec<short, 3> { typedef short3 vec_t; };
|
||||
template<> struct TypeVec<short3, 3> { typedef short3 vec_t; };
|
||||
template<> struct TypeVec<short, 4> { typedef short4 vec_t; };
|
||||
template<> struct TypeVec<short4, 4> { typedef short4 vec_t; };
|
||||
template<> struct TypeVec<unsigned int, 2> { typedef uint2 vec_t; };
|
||||
template<> struct TypeVec<uint2, 2> { typedef uint2 vec_t; };
|
||||
template<> struct TypeVec<unsigned int, 3> { typedef uint3 vec_t; };
|
||||
template<> struct TypeVec<uint3, 3> { typedef uint3 vec_t; };
|
||||
template<> struct TypeVec<unsigned int, 4> { typedef uint4 vec_t; };
|
||||
template<> struct TypeVec<uint4, 4> { typedef uint4 vec_t; };
|
||||
template<> struct TypeVec<int, 2> { typedef int2 vec_t; };
|
||||
template<> struct TypeVec<int2, 2> { typedef int2 vec_t; };
|
||||
template<> struct TypeVec<int, 3> { typedef int3 vec_t; };
|
||||
template<> struct TypeVec<int3, 3> { typedef int3 vec_t; };
|
||||
template<> struct TypeVec<int, 4> { typedef int4 vec_t; };
|
||||
template<> struct TypeVec<int4, 4> { typedef int4 vec_t; };
|
||||
template<> struct TypeVec<float, 2> { typedef float2 vec_t; };
|
||||
template<> struct TypeVec<float2, 2> { typedef float2 vec_t; };
|
||||
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
|
||||
template<> struct TypeVec<float3, 3> { typedef float3 vec_t; };
|
||||
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
|
||||
template<> struct TypeVec<float4, 4> { typedef float4 vec_t; };
|
||||
|
||||
static __device__ uchar4 operator+(const uchar4& a, const uchar4& b)
|
||||
{
|
||||
return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
|
||||
}
|
||||
static __device__ uchar4 operator-(const uchar4& a, const uchar4& b)
|
||||
{
|
||||
return make_uchar4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
|
||||
}
|
||||
static __device__ uchar4 operator*(const uchar4& a, const uchar4& b)
|
||||
{
|
||||
return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
|
||||
}
|
||||
static __device__ uchar4 operator/(const uchar4& a, const uchar4& b)
|
||||
{
|
||||
return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
|
||||
}
|
||||
template <typename T>
|
||||
static __device__ uchar4 operator*(const uchar4& a, T s)
|
||||
{
|
||||
return make_uchar4(a.x * s, a.y * s, a.z * s, a.w * s);
|
||||
}
|
||||
template <typename T>
|
||||
static __device__ uchar4 operator*(T s, const uchar4& a)
|
||||
{
|
||||
return a * s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __OPENCV_GPU_VECMATH_HPP__
|
@ -69,6 +69,22 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int
|
||||
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
namespace matrix_operations
|
||||
{
|
||||
void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
|
||||
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Stream::Impl
|
||||
{
|
||||
cudaStream_t stream;
|
||||
|
@ -49,18 +49,18 @@ using namespace cv::gpu;
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, bool) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const GpuMat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const GpuMat&, const Size&, Point, int) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const GpuMat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const GpuMat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, bool) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
@ -71,7 +71,7 @@ void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog
|
||||
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }
|
||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, bool) { throw_nogpu(); }
|
||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point) { throw_nogpu(); }
|
||||
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }
|
||||
void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double) { throw_nogpu(); }
|
||||
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }
|
||||
@ -164,28 +164,10 @@ Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filt
|
||||
|
||||
namespace
|
||||
{
|
||||
struct RowColumnFilterApply
|
||||
{
|
||||
void operator()(Ptr<BaseRowFilter_GPU>& rowFilter, Ptr<BaseColumnFilter_GPU>& columnFilter,
|
||||
GpuMat& srcROI, GpuMat& dstROI, GpuMat& dstBufROI)
|
||||
{
|
||||
(*rowFilter)(srcROI, dstBufROI);
|
||||
(*columnFilter)(dstBufROI, dstROI);
|
||||
}
|
||||
};
|
||||
struct ColumnRowFilterApply
|
||||
{
|
||||
void operator()(Ptr<BaseRowFilter_GPU>& rowFilter, Ptr<BaseColumnFilter_GPU>& columnFilter,
|
||||
GpuMat& srcROI, GpuMat& dstROI, GpuMat& dstBufROI)
|
||||
{
|
||||
(*columnFilter)(srcROI, dstBufROI);
|
||||
(*rowFilter)(dstBufROI, dstROI);
|
||||
}
|
||||
};
|
||||
class SeparableFilterEngine_GPU_base : public FilterEngine_GPU
|
||||
class SeparableFilterEngine_GPU : public FilterEngine_GPU
|
||||
{
|
||||
public:
|
||||
SeparableFilterEngine_GPU_base(const Ptr<BaseRowFilter_GPU>& rowFilter_,
|
||||
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter_) :
|
||||
rowFilter(rowFilter_), columnFilter(columnFilter_)
|
||||
{
|
||||
@ -208,6 +190,9 @@ namespace
|
||||
srcROI = src(roi);
|
||||
dstROI = dst(roi);
|
||||
dstBufROI = dstBuf(roi);
|
||||
|
||||
(*rowFilter)(srcROI, dstBufROI);
|
||||
(*columnFilter)(dstBufROI, dstROI);
|
||||
}
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter;
|
||||
@ -219,32 +204,12 @@ namespace
|
||||
GpuMat dstROI;
|
||||
GpuMat dstBufROI;
|
||||
};
|
||||
template <typename FA>
|
||||
class SeparableFilterEngine_GPU : public SeparableFilterEngine_GPU_base
|
||||
{
|
||||
public:
|
||||
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter_, FA fa_) :
|
||||
SeparableFilterEngine_GPU_base(rowFilter_, columnFilter_), fa(fa_)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
||||
{
|
||||
SeparableFilterEngine_GPU_base::apply(src, dst, roi);
|
||||
fa(rowFilter, columnFilter, srcROI, dstROI, dstBufROI);
|
||||
}
|
||||
|
||||
FA fa;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter, bool rowFilterFirst)
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter)
|
||||
{
|
||||
if (rowFilterFirst)
|
||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU<RowColumnFilterApply>(rowFilter, columnFilter, RowColumnFilterApply()));
|
||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU<ColumnRowFilterApply>(rowFilter, columnFilter, ColumnRowFilterApply()));
|
||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -398,7 +363,7 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const GpuMat& kernel, const Size& ksize, Point anchor)
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor)
|
||||
{
|
||||
static const nppMorfFilter_t nppMorfFilter_callers[2][5] =
|
||||
{
|
||||
@ -408,11 +373,12 @@ Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Gpu
|
||||
|
||||
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
|
||||
CV_Assert(type == CV_8UC1 || type == CV_8UC4);
|
||||
CV_Assert(kernel.type() == CV_8UC1 && kernel.rows == 1 && kernel.cols == ksize.area());
|
||||
|
||||
|
||||
GpuMat gpu_krnl;
|
||||
normalizeKernel(kernel, gpu_krnl);
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, kernel, nppMorfFilter_callers[op][CV_MAT_CN(type)]));
|
||||
return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, gpu_krnl, nppMorfFilter_callers[op][CV_MAT_CN(type)]));
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -447,10 +413,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, cons
|
||||
|
||||
Size ksize = kernel.size();
|
||||
|
||||
GpuMat gpu_krnl;
|
||||
normalizeKernel(kernel, gpu_krnl);
|
||||
|
||||
Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, gpu_krnl, ksize, anchor);
|
||||
Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
|
||||
|
||||
return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));
|
||||
}
|
||||
@ -575,27 +538,25 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const GpuMat& kernel, const Size& ksize, Point anchor, int nDivisor)
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, Point anchor)
|
||||
{
|
||||
static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
|
||||
|
||||
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
|
||||
CV_Assert(kernel.type() == CV_32SC1 && kernel.rows == 1 && kernel.cols == ksize.area());
|
||||
|
||||
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
|
||||
|
||||
GpuMat gpu_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, kernel, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
|
||||
return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor)
|
||||
{
|
||||
Size ksize = kernel.size();
|
||||
|
||||
GpuMat gpu_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
|
||||
|
||||
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, gpu_krnl, ksize, anchor, nDivisor);
|
||||
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor);
|
||||
|
||||
return createFilter2D_GPU(linearFilter);
|
||||
}
|
||||
@ -614,11 +575,26 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Separable Linear Filter
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
|
||||
void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
||||
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
||||
|
||||
typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
|
||||
class NppLinearRowFilter : public BaseRowFilter_GPU
|
||||
{
|
||||
public:
|
||||
@ -638,20 +614,64 @@ namespace
|
||||
Npp32s nDivisor;
|
||||
nppFilter1D_t func;
|
||||
};
|
||||
|
||||
class GpuLinearRowFilter : public BaseRowFilter_GPU
|
||||
{
|
||||
public:
|
||||
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) :
|
||||
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
||||
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor);
|
||||
}
|
||||
|
||||
Mat kernel;
|
||||
gpuFilter1D_t func;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const GpuMat& rowKernel, int anchor, int nDivisor)
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor)
|
||||
{
|
||||
using namespace cv::gpu::filters;
|
||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
||||
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
||||
{
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,linearRowFilter_gpu_32s32s, linearRowFilter_gpu_32s32f},
|
||||
{0,0,0,0,linearRowFilter_gpu_32f32s, linearRowFilter_gpu_32f32f}
|
||||
};
|
||||
|
||||
if ((srcType == CV_8UC1 || srcType == CV_8UC4) && bufType == srcType)
|
||||
{
|
||||
GpuMat gpu_row_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);
|
||||
|
||||
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && bufType == srcType);
|
||||
CV_Assert(rowKernel.type() == CV_32SC1 && rowKernel.rows == 1);
|
||||
int ksize = gpu_row_krnl.cols;
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
int ksize = rowKernel.cols;
|
||||
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,
|
||||
nppFilter1D_callers[CV_MAT_CN(srcType)]));
|
||||
}
|
||||
else if ((srcType == CV_32SC1 || srcType == CV_32FC1) && (bufType == CV_32SC1 || bufType == CV_32FC1))
|
||||
{
|
||||
Mat temp(rowKernel.size(), CV_32FC1);
|
||||
rowKernel.convertTo(temp, CV_32FC1);
|
||||
Mat cont_krnl = temp.reshape(1, 1);
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
int ksize = cont_krnl.cols;
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, rowKernel, nDivisor, nppFilter1D_callers[CV_MAT_CN(srcType)]));
|
||||
return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl,
|
||||
gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)]));
|
||||
}
|
||||
|
||||
CV_Assert(!"Unsupported types");
|
||||
return Ptr<BaseRowFilter_GPU>(0);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -675,49 +695,88 @@ namespace
|
||||
Npp32s nDivisor;
|
||||
nppFilter1D_t func;
|
||||
};
|
||||
|
||||
class GpuLinearColumnFilter : public BaseColumnFilter_GPU
|
||||
{
|
||||
public:
|
||||
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) :
|
||||
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
||||
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor);
|
||||
}
|
||||
|
||||
Mat kernel;
|
||||
gpuFilter1D_t func;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const GpuMat& columnKernel, int anchor, int nDivisor)
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor)
|
||||
{
|
||||
using namespace cv::gpu::filters;
|
||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
||||
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
||||
{
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,0,0},
|
||||
{0,0,0,0,linearColumnFilter_gpu_32s32s, linearColumnFilter_gpu_32s32f},
|
||||
{0,0,0,0,linearColumnFilter_gpu_32f32s, linearColumnFilter_gpu_32f32f}
|
||||
};
|
||||
|
||||
if ((bufType == CV_8UC1 || bufType == CV_8UC4) && dstType == bufType)
|
||||
{
|
||||
GpuMat gpu_col_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);
|
||||
|
||||
CV_Assert((bufType == CV_8UC1 || bufType == CV_8UC4) && dstType == bufType);
|
||||
CV_Assert(columnKernel.type() == CV_32SC1 && columnKernel.rows == 1);
|
||||
int ksize = gpu_col_krnl.cols;
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
int ksize = columnKernel.cols;
|
||||
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,
|
||||
nppFilter1D_callers[CV_MAT_CN(bufType)]));
|
||||
}
|
||||
else if ((bufType == CV_32SC1 || bufType == CV_32FC1) && (dstType == CV_32SC1 || dstType == CV_32FC1))
|
||||
{
|
||||
Mat temp(columnKernel.size(), CV_32FC1);
|
||||
columnKernel.convertTo(temp, CV_32FC1);
|
||||
Mat cont_krnl = temp.reshape(1, 1);
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
int ksize = cont_krnl.cols;
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, columnKernel, nDivisor, nppFilter1D_callers[CV_MAT_CN(bufType)]));
|
||||
return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl,
|
||||
gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)]));
|
||||
}
|
||||
|
||||
CV_Assert(!"Unsupported types");
|
||||
return Ptr<BaseColumnFilter_GPU>(0);
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,
|
||||
const Point& anchor, bool rowFilterFirst)
|
||||
const Point& anchor)
|
||||
{
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
|
||||
int cn = CV_MAT_CN(srcType);
|
||||
int bdepth = std::max(sdepth, ddepth);
|
||||
int bufType = CV_MAKETYPE(bdepth, cn);
|
||||
|
||||
GpuMat gpu_row_krnl, gpu_col_krnl;
|
||||
int nRowDivisor, nColDivisor;
|
||||
normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nRowDivisor, true);
|
||||
normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nColDivisor, true);
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y);
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, gpu_row_krnl, anchor.x, nRowDivisor);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, gpu_col_krnl, anchor.y, nColDivisor);
|
||||
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter, rowFilterFirst);
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
}
|
||||
|
||||
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, bool rowFilterFirst)
|
||||
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor)
|
||||
{
|
||||
if( ddepth < 0 )
|
||||
ddepth = src.depth();
|
||||
|
||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowFilterFirst);
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor);
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
@ -728,7 +787,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, i
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), dx >= dy);
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1));
|
||||
}
|
||||
|
||||
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)
|
||||
@ -746,7 +805,7 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), dx >= dy);
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));
|
||||
}
|
||||
|
||||
void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale)
|
||||
@ -764,7 +823,7 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), dx >= dy);
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));
|
||||
}
|
||||
|
||||
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)
|
@ -75,7 +75,7 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu();
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace improc
|
||||
namespace imgproc
|
||||
{
|
||||
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
|
||||
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
|
||||
@ -142,7 +142,7 @@ namespace cv { namespace gpu
|
||||
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)
|
||||
{
|
||||
typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
|
||||
static const remap_gpu_t callers[] = {improc::remap_gpu_1c, 0, improc::remap_gpu_3c};
|
||||
static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};
|
||||
|
||||
CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);
|
||||
|
||||
@ -180,7 +180,7 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
|
||||
eps = 1.f;
|
||||
eps = (float)std::max(criteria.epsilon, 0.0);
|
||||
|
||||
improc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
|
||||
imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -207,7 +207,7 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
|
||||
eps = 1.f;
|
||||
eps = (float)std::max(criteria.epsilon, 0.0);
|
||||
|
||||
improc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);
|
||||
imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -223,7 +223,7 @@ namespace
|
||||
out = dst;
|
||||
out.create(src.size(), CV_8UC4);
|
||||
|
||||
improc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);
|
||||
imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);
|
||||
|
||||
dst = out;
|
||||
}
|
||||
@ -256,7 +256,7 @@ namespace
|
||||
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
|
||||
{
|
||||
xyzw.create(disp.rows, disp.cols, CV_32FC4);
|
||||
improc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
|
||||
imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
|
||||
}
|
||||
|
||||
typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);
|
||||
@ -313,7 +313,7 @@ namespace
|
||||
case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::RGB2RGB_gpu_8u, 0, improc::RGB2RGB_gpu_16u, 0, 0, improc::RGB2RGB_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::RGB2RGB_gpu_8u, 0, imgproc::RGB2RGB_gpu_16u, 0, 0, imgproc::RGB2RGB_gpu_32f};
|
||||
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
|
||||
@ -338,7 +338,7 @@ namespace
|
||||
|
||||
dst.create(sz, CV_8UC2);
|
||||
|
||||
improc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);
|
||||
imgproc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -356,14 +356,14 @@ namespace
|
||||
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
|
||||
improc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);
|
||||
imgproc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);
|
||||
break;
|
||||
}
|
||||
|
||||
case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::RGB2Gray_gpu_8u, 0, improc::RGB2Gray_gpu_16u, 0, 0, improc::RGB2Gray_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::RGB2Gray_gpu_8u, 0, imgproc::RGB2Gray_gpu_16u, 0, 0, imgproc::RGB2Gray_gpu_32f};
|
||||
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
|
||||
@ -383,14 +383,14 @@ namespace
|
||||
|
||||
dst.create(sz, CV_8UC1);
|
||||
|
||||
improc::RGB5x52Gray_gpu(src, green_bits, dst, stream);
|
||||
imgproc::RGB5x52Gray_gpu(src, green_bits, dst, stream);
|
||||
break;
|
||||
}
|
||||
|
||||
case CV_GRAY2BGR: case CV_GRAY2BGRA:
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::Gray2RGB_gpu_8u, 0, improc::Gray2RGB_gpu_16u, 0, 0, improc::Gray2RGB_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::Gray2RGB_gpu_8u, 0, imgproc::Gray2RGB_gpu_16u, 0, 0, imgproc::Gray2RGB_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
@ -410,7 +410,7 @@ namespace
|
||||
|
||||
dst.create(sz, CV_8UC2);
|
||||
|
||||
improc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);
|
||||
imgproc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -419,7 +419,7 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
|
||||
const void* coeffs, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::RGB2YCrCb_gpu_8u, 0, improc::RGB2YCrCb_gpu_16u, 0, 0, improc::RGB2YCrCb_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::RGB2YCrCb_gpu_8u, 0, imgproc::RGB2YCrCb_gpu_16u, 0, 0, imgproc::RGB2YCrCb_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
CV_Assert((scn == 3 || scn == 4) && (dcn == 3 || dcn == 4));
|
||||
@ -456,7 +456,7 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
|
||||
const void* coeffs, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::YCrCb2RGB_gpu_8u, 0, improc::YCrCb2RGB_gpu_16u, 0, 0, improc::YCrCb2RGB_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::YCrCb2RGB_gpu_8u, 0, imgproc::YCrCb2RGB_gpu_16u, 0, 0, imgproc::YCrCb2RGB_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
@ -485,7 +485,7 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn,
|
||||
const void* coeffs, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::RGB2XYZ_gpu_8u, 0, improc::RGB2XYZ_gpu_16u, 0, 0, improc::RGB2XYZ_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::RGB2XYZ_gpu_8u, 0, imgproc::RGB2XYZ_gpu_16u, 0, 0, imgproc::RGB2XYZ_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
@ -534,7 +534,7 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn,
|
||||
const void* coeffs, cudaStream_t stream);
|
||||
static const func_t funcs[] = {improc::XYZ2RGB_gpu_8u, 0, improc::XYZ2RGB_gpu_16u, 0, 0, improc::XYZ2RGB_gpu_32f};
|
||||
static const func_t funcs[] = {imgproc::XYZ2RGB_gpu_8u, 0, imgproc::XYZ2RGB_gpu_16u, 0, 0, imgproc::XYZ2RGB_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
@ -584,8 +584,8 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
|
||||
int hrange, cudaStream_t stream);
|
||||
static const func_t funcs_hsv[] = {improc::RGB2HSV_gpu_8u, 0, 0, 0, 0, improc::RGB2HSV_gpu_32f};
|
||||
static const func_t funcs_hls[] = {improc::RGB2HLS_gpu_8u, 0, 0, 0, 0, improc::RGB2HLS_gpu_32f};
|
||||
static const func_t funcs_hsv[] = {imgproc::RGB2HSV_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HSV_gpu_32f};
|
||||
static const func_t funcs_hls[] = {imgproc::RGB2HLS_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HLS_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
@ -610,8 +610,8 @@ namespace
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
|
||||
int hrange, cudaStream_t stream);
|
||||
static const func_t funcs_hsv[] = {improc::HSV2RGB_gpu_8u, 0, 0, 0, 0, improc::HSV2RGB_gpu_32f};
|
||||
static const func_t funcs_hls[] = {improc::HLS2RGB_gpu_8u, 0, 0, 0, 0, improc::HLS2RGB_gpu_32f};
|
||||
static const func_t funcs_hsv[] = {imgproc::HSV2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HSV2RGB_gpu_32f};
|
||||
static const func_t funcs_hls[] = {imgproc::HLS2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HLS2RGB_gpu_32f};
|
||||
|
||||
if (dcn <= 0) dcn = 3;
|
||||
|
||||
|
@ -77,6 +77,22 @@ namespace cv
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
namespace matrix_operations
|
||||
{
|
||||
void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
|
||||
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::GpuMat::upload(const Mat& m)
|
||||
{
|
||||
CV_DbgAssert(!m.empty());
|
||||
|
@ -53,7 +53,6 @@ const char* blacklist[] =
|
||||
//"GPU-NppImageMeanStdDev", // different precision
|
||||
//"GPU-NppImageExp", // different precision
|
||||
//"GPU-NppImageLog", // different precision
|
||||
//"GPU-NppImageMagnitude", // different precision
|
||||
|
||||
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
|
||||
//"GPU-NppImageResize", // different precision
|
||||
@ -61,8 +60,8 @@ const char* blacklist[] =
|
||||
//"GPU-NppImageWarpPerspective", // different precision
|
||||
//"GPU-NppImageIntegral", // different precision
|
||||
|
||||
//"GPU-NppImageSobel", // ???
|
||||
//"GPU-NppImageScharr", // ???
|
||||
//"GPU-NppImageSobel", // sign error
|
||||
//"GPU-NppImageScharr", // sign error
|
||||
//"GPU-NppImageGaussianBlur", // different precision
|
||||
0
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user