reduce separable filter instantiates for tiny build

This commit is contained in:
Vladislav Vinogradov 2015-03-04 14:18:49 +03:00
parent cde697dd14
commit 00c36e88ef
21 changed files with 470 additions and 2 deletions

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float, unsigned short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float3, ushort3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float4, ushort4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float3, int3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float4, int4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float, int>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float, short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "column_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearColumn<float4, short4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -183,6 +183,186 @@ namespace filter
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
#ifdef OPENCV_TINY_GPU_MODULE
static const caller_t callers[5][33] =
{
{
0,
0,
0,
column_filter::caller< 3, T, D, BrdColReflect101>,
0,
column_filter::caller< 5, T, D, BrdColReflect101>,
0,
column_filter::caller< 7, T, D, BrdColReflect101>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
column_filter::caller< 3, T, D, BrdColReplicate>,
0,
column_filter::caller< 5, T, D, BrdColReplicate>,
0,
column_filter::caller< 7, T, D, BrdColReplicate>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
column_filter::caller< 3, T, D, BrdColConstant>,
0,
column_filter::caller< 5, T, D, BrdColConstant>,
0,
column_filter::caller< 7, T, D, BrdColConstant>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
column_filter::caller< 3, T, D, BrdColReflect>,
0,
column_filter::caller< 5, T, D, BrdColReflect>,
0,
column_filter::caller< 7, T, D, BrdColReflect>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
}
};
#else
static const caller_t callers[5][33] =
{
{
@ -361,12 +541,17 @@ namespace filter
column_filter::caller<32, T, D, BrdColWrap>
}
};
#endif
const caller_t caller = callers[brd_type][ksize];
if (!caller)
cv::gpu::error("Unsupported input parameters for column_filter", __FILE__, __LINE__, "");
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
caller((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
}

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<unsigned short, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<ushort3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<ushort4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<int3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<int4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<int, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<short, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -44,9 +44,13 @@
#include "row_filter.h"
#ifndef OPENCV_TINY_GPU_MODULE
namespace filter
{
template void linearRow<short4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif
#endif /* CUDA_DISABLER */

View File

@ -182,6 +182,186 @@ namespace filter
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
#ifdef OPENCV_TINY_GPU_MODULE
static const caller_t callers[5][33] =
{
{
0,
0,
0,
row_filter::caller< 3, T, D, BrdRowReflect101>,
0,
row_filter::caller< 5, T, D, BrdRowReflect101>,
0,
row_filter::caller< 7, T, D, BrdRowReflect101>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
row_filter::caller< 3, T, D, BrdRowReplicate>,
0,
row_filter::caller< 5, T, D, BrdRowReplicate>,
0,
row_filter::caller< 7, T, D, BrdRowReplicate>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
row_filter::caller< 3, T, D, BrdRowConstant>,
0,
row_filter::caller< 5, T, D, BrdRowConstant>,
0,
row_filter::caller< 7, T, D, BrdRowConstant>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
row_filter::caller< 3, T, D, BrdRowReflect>,
0,
row_filter::caller< 5, T, D, BrdRowReflect>,
0,
row_filter::caller< 7, T, D, BrdRowReflect>,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
},
{
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
}
};
#else
static const caller_t callers[5][33] =
{
{
@ -360,12 +540,17 @@ namespace filter
row_filter::caller<32, T, D, BrdRowWrap>
}
};
#endif
const caller_t caller = callers[brd_type][ksize];
if (!caller)
cv::gpu::error("Unsupported input parameters for row_filter", __FILE__, __LINE__, "");
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
caller((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
}

View File

@ -893,6 +893,18 @@ namespace
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
{
#ifdef OPENCV_TINY_GPU_MODULE
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearRow<uchar, float>, 0, filter::linearRow<uchar3, float3>, filter::linearRow<uchar4, float4>},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{filter::linearRow<float, float>, 0, filter::linearRow<float3, float3>, filter::linearRow<float4, float4>},
{0, 0, 0, 0}
};
#else
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearRow<uchar, float>, 0, filter::linearRow<uchar3, float3>, filter::linearRow<uchar4, float4>},
@ -903,6 +915,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
{filter::linearRow<float, float>, 0, filter::linearRow<float3, float3>, filter::linearRow<float4, float4>},
{0, 0, 0, 0}
};
#endif
static const nppFilter1D_t npp_funcs[] =
{
0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R
@ -998,6 +1011,18 @@ namespace
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
{
#ifdef OPENCV_TINY_GPU_MODULE
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearColumn<float, uchar>, 0, filter::linearColumn<float3, uchar3>, filter::linearColumn<float4, uchar4>},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{filter::linearColumn<float, float>, 0, filter::linearColumn<float3, float3>, filter::linearColumn<float4, float4>},
{0, 0, 0, 0}
};
#else
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearColumn<float, uchar>, 0, filter::linearColumn<float3, uchar3>, filter::linearColumn<float4, uchar4>},
@ -1008,6 +1033,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
{filter::linearColumn<float, float>, 0, filter::linearColumn<float3, float3>, filter::linearColumn<float4, float4>},
{0, 0, 0, 0}
};
#endif
static const nppFilter1D_t npp_funcs[] =
{
0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R