reduce warp instantiates for tiny build

This commit is contained in:
Vladislav Vinogradov 2015-03-04 16:15:14 +03:00
parent 3004f5fa30
commit e7b55f4982
2 changed files with 63 additions and 4 deletions

View File

@ -278,6 +278,7 @@ namespace cv { namespace gpu { namespace device
{
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[3][5] =
{
{
@ -285,25 +286,55 @@ namespace cv { namespace gpu { namespace device
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
0/*WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call*/,
},
{
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
0/*WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call*/,
},
{
0/*WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call*/,
0/*WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call*/,
0/*WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call*/,
0/*WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call*/,
0/*WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call*/,
}
};
#else
static const func_t funcs[3][5] =
{
{
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call,
},
{
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call,
},
{
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call,
}
};
#endif
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
const func_t func = funcs[interpolation][borderMode];
if (!func)
cv::gpu::error("Unsupported input parameters for warp_caller", __FILE__, __LINE__, "");
func(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
}
@ -320,6 +351,7 @@ namespace cv { namespace gpu { namespace device
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#ifndef OPENCV_TINY_GPU_MODULE
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@ -339,6 +371,7 @@ namespace cv { namespace gpu { namespace device
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#endif
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@ -358,6 +391,7 @@ namespace cv { namespace gpu { namespace device
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#ifndef OPENCV_TINY_GPU_MODULE
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@ -377,6 +411,7 @@ namespace cv { namespace gpu { namespace device
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#endif
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);

View File

@ -277,6 +277,17 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[6][4] =
{
{warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
};
#else
static const func_t funcs[6][4] =
{
{warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
@ -286,6 +297,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
{0 /*warpAffine_gpu<int>*/ , 0 /*warpAffine_gpu<int2>*/ , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
{warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
};
#endif
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
@ -415,6 +427,17 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[6][4] =
{
{warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
};
#else
static const func_t funcs[6][4] =
{
{warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
@ -424,6 +447,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
{0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
{warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
};
#endif
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);