rewrite core/cuda/vec_math.hpp file

old version isn't compiled with CUDA 5.5
new version doesn't depend on functional.hpp
This commit is contained in:
Vladislav Vinogradov 2013-06-07 14:56:37 +04:00
parent 869a35faef
commit 14e4b3adde
8 changed files with 860 additions and 262 deletions

File diff suppressed because it is too large Load Diff

View File

@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{ {
I d = a - b; I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x && return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y && lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z; lo.z <= d.z && d.z <= hi.z;
@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{ {
I d = a - b; I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x && return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y && lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z && lo.z <= d.z && d.z <= hi.z &&

View File

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"

View File

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"

View File

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"

View File

@ -46,6 +46,7 @@
#include "opencv2/core/cuda/saturate_cast.hpp" #include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"

View File

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/utility.hpp" #include "opencv2/core/cuda/utility.hpp"

View File

@ -48,6 +48,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp" #include "opencv2/core/cuda/dynamic_smem.hpp"
@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev
const int ind = ::atomicAdd(r_sizes + n, 1); const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize) if (ind < maxSize)
r_table(n, ind) = p - templCenter; r_table(n, ind) = saturate_cast<short2>(p - templCenter);
} }
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev
for (int j = 0; j < r_row_size; ++j) for (int j = 0; j < r_row_size; ++j)
{ {
short2 c = p - r_row[j]; int2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp); c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp); c.y = __float2int_rn(c.y * idp);