From 029dfbc89de765ccad40345a37f6dd2c0aa61b18 Mon Sep 17 00:00:00 2001 From: Rok Mandeljc Date: Fri, 1 Jan 2016 23:48:57 +0100 Subject: [PATCH] opencv_core: CUDA: check if __CUDA_ARCH__ is defined before comparing it Changed statements of type "#if __CUDA_ARCH__ >= 200" to "#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200" in order to avoid warnings about __CUDA_ARCH__ being undefined. --- .../opencv2/core/cuda/detail/reduce.hpp | 6 +++--- .../opencv2/core/cuda/saturate_cast.hpp | 8 ++++---- .../include/opencv2/core/cuda/warp_shuffle.hpp | 18 +++++++++--------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda/detail/reduce.hpp b/modules/core/include/opencv2/core/cuda/detail/reduce.hpp index 0c35eaba65..44400c8e1d 100644 --- a/modules/core/include/opencv2/core/cuda/detail/reduce.hpp +++ b/modules/core/include/opencv2/core/cuda/detail/reduce.hpp @@ -275,7 +275,7 @@ namespace cv { namespace cuda { namespace device template static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 (void) smem; (void) tid; @@ -298,7 +298,7 @@ namespace cv { namespace cuda { namespace device { const unsigned int laneId = Warp::laneId(); - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize); if (laneId == 0) @@ -321,7 +321,7 @@ namespace cv { namespace cuda { namespace device if (tid < 32) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 Unroll::loopShfl(val, op, M); #else Unroll::loop(smem, val, tid, op); diff --git a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp index e7633c76a3..f55ae4f4a7 100644 --- a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp +++ b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp @@ -101,7 +101,7 @@ namespace cv { namespace cuda { namespace device } template<> __device__ __forceinline__ uchar saturate_cast(double v) { - #if __CUDA_ARCH__ >= 130 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 uint res = 0; asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v)); return res; @@ -149,7 +149,7 @@ namespace cv { namespace cuda { namespace device } template<> __device__ __forceinline__ schar saturate_cast(double v) { - #if __CUDA_ARCH__ >= 130 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 uint res = 0; asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v)); return res; @@ -191,7 +191,7 @@ namespace cv { namespace cuda { namespace device } template<> __device__ __forceinline__ ushort saturate_cast(double v) { - #if __CUDA_ARCH__ >= 130 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 ushort res = 0; asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v)); return res; @@ -226,7 +226,7 @@ namespace cv { namespace cuda { namespace device } template<> __device__ __forceinline__ short saturate_cast(double v) { - #if __CUDA_ARCH__ >= 130 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 short res = 0; asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v)); return res; diff --git a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp index 5cf42ec41d..256fc2a684 100644 --- a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp +++ b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp @@ -54,7 +54,7 @@ namespace cv { namespace cuda { namespace device template __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return __shfl(val, srcLane, width); #else return T(); @@ -62,7 +62,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return (unsigned int) __shfl((int) val, srcLane, width); #else return 0; @@ -70,7 +70,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 int lo = __double2loint(val); int hi = __double2hiint(val); @@ -86,7 +86,7 @@ namespace cv { namespace cuda { namespace device template __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return __shfl_down(val, delta, width); #else return T(); @@ -94,7 +94,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return (unsigned int) __shfl_down((int) val, delta, width); #else return 0; @@ -102,7 +102,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 int lo = __double2loint(val); int hi = __double2hiint(val); @@ -118,7 +118,7 @@ namespace cv { namespace cuda { namespace device template __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return __shfl_up(val, delta, width); #else return T(); @@ -126,7 +126,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 return (unsigned int) __shfl_up((int) val, delta, width); #else return 0; @@ -134,7 +134,7 @@ namespace cv { namespace cuda { namespace device } __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) { - #if __CUDA_ARCH__ >= 300 + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 int lo = __double2loint(val); int hi = __double2hiint(val);