Merge branch 'master' of git://code.opencv.org/opencv

This commit is contained in:
Oleg Sklyarov 2012-10-10 17:05:29 +04:00
commit 2f4e8ee73e
5 changed files with 46 additions and 39 deletions

View File

@ -146,7 +146,7 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
} }
GPU_SANITY_CHECK(d_vertex); GPU_SANITY_CHECK(d_vertex);
GPU_SANITY_CHECK(d_colors) GPU_SANITY_CHECK(d_colors);
} }
else else
{ {

View File

@ -58,9 +58,12 @@ namespace cv { namespace gpu { namespace device
__constant__ float c_kernel[MAX_KERNEL_SIZE]; __constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float kernel[], int ksize) void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{ {
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) ); if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
} }
template <int KSIZE, typename T, typename D, typename B> template <int KSIZE, typename T, typename D, typename B>
@ -185,7 +188,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T, typename D> template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream) void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{ {
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream); typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
@ -368,18 +371,18 @@ namespace cv { namespace gpu { namespace device
} }
}; };
loadKernel(kernel, ksize); loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream); callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
} }
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace column_filter } // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */ #endif /* CUDA_DISABLER */

View File

@ -986,7 +986,10 @@ namespace cv { namespace gpu { namespace device
Filter2DCaller<T, D, BrdWrap>::call Filter2DCaller<T, D, BrdWrap>::call
}; };
cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
} }
@ -1001,4 +1004,4 @@ namespace cv { namespace gpu { namespace device
}}} // namespace cv { namespace gpu { namespace device { }}} // namespace cv { namespace gpu { namespace device {
#endif /* CUDA_DISABLER */ #endif /* CUDA_DISABLER */

View File

@ -58,9 +58,12 @@ namespace cv { namespace gpu { namespace device
__constant__ float c_kernel[MAX_KERNEL_SIZE]; __constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float kernel[], int ksize) void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{ {
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) ); if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
} }
template <int KSIZE, typename T, typename D, typename B> template <int KSIZE, typename T, typename D, typename B>
@ -184,7 +187,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T, typename D> template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream) void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{ {
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream); typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
@ -367,18 +370,18 @@ namespace cv { namespace gpu { namespace device
} }
}; };
loadKernel(kernel, ksize); loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream); callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
} }
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace row_filter } // namespace row_filter
}}} // namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */ #endif /* CUDA_DISABLER */

View File

@ -835,13 +835,13 @@ namespace cv { namespace gpu { namespace device
namespace row_filter namespace row_filter
{ {
template <typename T, typename D> template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} }
namespace column_filter namespace column_filter
{ {
template <typename T, typename D> template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} }
}}} }}}
@ -881,7 +881,7 @@ namespace
struct GpuLinearRowFilter : public BaseRowFilter_GPU struct GpuLinearRowFilter : public BaseRowFilter_GPU
{ {
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : GpuLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) :
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {} BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -891,7 +891,7 @@ namespace
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
} }
Mat kernel; GpuMat kernel;
gpuFilter1D_t func; gpuFilter1D_t func;
int brd_type; int brd_type;
}; };
@ -926,11 +926,10 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType)); CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
Mat temp(rowKernel.size(), CV_32FC1); GpuMat gpu_row_krnl;
rowKernel.convertTo(temp, CV_32FC1); normalizeKernel(rowKernel, gpu_row_krnl, CV_32F);
Mat cont_krnl = temp.reshape(1, 1);
int ksize = cont_krnl.cols; int ksize = gpu_row_krnl.cols;
CV_Assert(ksize > 0 && ksize <= 32); CV_Assert(ksize > 0 && ksize <= 32);
@ -957,7 +956,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
break; break;
} }
return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl, func, gpuBorderType)); return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, gpuBorderType));
} }
namespace namespace
@ -991,7 +990,7 @@ namespace
struct GpuLinearColumnFilter : public BaseColumnFilter_GPU struct GpuLinearColumnFilter : public BaseColumnFilter_GPU
{ {
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : GpuLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) :
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {} BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -1004,7 +1003,7 @@ namespace
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
} }
Mat kernel; GpuMat kernel;
gpuFilter1D_t func; gpuFilter1D_t func;
int brd_type; int brd_type;
}; };
@ -1039,11 +1038,10 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType)); CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
Mat temp(columnKernel.size(), CV_32FC1); GpuMat gpu_col_krnl;
columnKernel.convertTo(temp, CV_32FC1); normalizeKernel(columnKernel, gpu_col_krnl, CV_32F);
Mat cont_krnl = temp.reshape(1, 1);
int ksize = cont_krnl.cols; int ksize = gpu_col_krnl.cols;
CV_Assert(ksize > 0 && ksize <= 32); CV_Assert(ksize > 0 && ksize <= 32);
@ -1070,7 +1068,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
break; break;
} }
return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, func, gpuBorderType)); return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, gpuBorderType));
} }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,