Merge pull request #1083 from jet47:cuda-device-layer

This commit is contained in:
Roman Donchenko 2013-07-05 13:30:11 +04:00 committed by OpenCV Buildbot
commit 2f8be2a4b1
93 changed files with 20508 additions and 0 deletions

View File

@ -0,0 +1,36 @@
if(NOT HAVE_CUDA)
ocv_module_disable(cudev)
endif()
set(the_description "CUDA device layer")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4189 /wd4505 -Wundef -Wmissing-declarations -Wunused-function -Wunused-variable)
ocv_add_module(cudev)
file(GLOB_RECURSE lib_hdrs "include/opencv2/*.hpp")
add_custom_target(${the_module} SOURCES ${lib_hdrs})
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_module} PROPERTIES FOLDER "modules")
endif()
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
# we remove /EHa as it generates warnings under windows
string(REPLACE "/EHa" "" ${var} "${${var}}")
# we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
string(REPLACE "-ggdb3" "" ${var} "${${var}}")
# we remove -Wsign-promo as it generates warnings under linux
string(REPLACE "-Wsign-promo" "" ${var} "${${var}}")
# we remove -fvisibility-inlines-hidden because it's used for C++ compiler
# but NVCC uses C compiler by default
string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
endforeach()
if(BUILD_TESTS)
add_subdirectory(test)
endif()

View File

@ -0,0 +1,112 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_HPP__
#define __OPENCV_CUDEV_HPP__
#include "cudev/common.hpp"
#include "cudev/util/atomic.hpp"
#include "cudev/util/limits.hpp"
#include "cudev/util/saturate_cast.hpp"
#include "cudev/util/simd_functions.hpp"
#include "cudev/util/tuple.hpp"
#include "cudev/util/type_traits.hpp"
#include "cudev/util/vec_math.hpp"
#include "cudev/util/vec_traits.hpp"
#include "cudev/functional/color_cvt.hpp"
#include "cudev/functional/functional.hpp"
#include "cudev/functional/tuple_adapter.hpp"
#include "cudev/warp/reduce.hpp"
#include "cudev/warp/scan.hpp"
#include "cudev/warp/shuffle.hpp"
#include "cudev/warp/warp.hpp"
#include "cudev/block/block.hpp"
#include "cudev/block/dynamic_smem.hpp"
#include "cudev/block/reduce.hpp"
#include "cudev/block/scan.hpp"
#include "cudev/block/vec_distance.hpp"
#include "cudev/grid/copy.hpp"
#include "cudev/grid/glob_reduce.hpp"
#include "cudev/grid/histogram.hpp"
#include "cudev/grid/integral.hpp"
#include "cudev/grid/pyramids.hpp"
#include "cudev/grid/reduce_to_vec.hpp"
#include "cudev/grid/split_merge.hpp"
#include "cudev/grid/transform.hpp"
#include "cudev/grid/transpose.hpp"
#include "cudev/ptr2d/constant.hpp"
#include "cudev/ptr2d/deriv.hpp"
#include "cudev/ptr2d/extrapolation.hpp"
#include "cudev/ptr2d/glob.hpp"
#include "cudev/ptr2d/gpumat.hpp"
#include "cudev/ptr2d/interpolation.hpp"
#include "cudev/ptr2d/lut.hpp"
#include "cudev/ptr2d/mask.hpp"
#include "cudev/ptr2d/remap.hpp"
#include "cudev/ptr2d/resize.hpp"
#include "cudev/ptr2d/texture.hpp"
#include "cudev/ptr2d/traits.hpp"
#include "cudev/ptr2d/transform.hpp"
#include "cudev/ptr2d/warping.hpp"
#include "cudev/ptr2d/zip.hpp"
#include "cudev/expr/binary_func.hpp"
#include "cudev/expr/binary_op.hpp"
#include "cudev/expr/color.hpp"
#include "cudev/expr/deriv.hpp"
#include "cudev/expr/expr.hpp"
#include "cudev/expr/per_element_func.hpp"
#include "cudev/expr/reduction.hpp"
#include "cudev/expr/unary_func.hpp"
#include "cudev/expr/unary_op.hpp"
#include "cudev/expr/warping.hpp"
#endif

View File

@ -0,0 +1,127 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_BLOCK_HPP__
#define __OPENCV_CUDEV_BLOCK_BLOCK_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
struct Block
{
__device__ __forceinline__ static uint blockId()
{
return (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
}
__device__ __forceinline__ static uint blockSize()
{
return blockDim.x * blockDim.y * blockDim.z;
}
__device__ __forceinline__ static uint threadLineId()
{
return (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
}
};
template <class It, typename T>
__device__ __forceinline__ static void blockFill(It beg, It end, const T& value)
{
uint STRIDE = Block::blockSize();
It t = beg + Block::threadLineId();
for(; t < end; t += STRIDE)
*t = value;
}
template <class OutIt, typename T>
__device__ __forceinline__ static void blockYota(OutIt beg, OutIt end, T value)
{
uint STRIDE = Block::blockSize();
uint tid = Block::threadLineId();
value += tid;
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
template <class InIt, class OutIt>
__device__ __forceinline__ static void blockCopy(InIt beg, InIt end, OutIt out)
{
uint STRIDE = Block::blockSize();
InIt t = beg + Block::threadLineId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = *t;
}
template <class InIt, class OutIt, class UnOp>
__device__ __forceinline__ static void blockTransfrom(InIt beg, InIt end, OutIt out, const UnOp& op)
{
uint STRIDE = Block::blockSize();
InIt t = beg + Block::threadLineId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = op(*t);
}
template <class InIt1, class InIt2, class OutIt, class BinOp>
__device__ __forceinline__ static void blockTransfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, const BinOp& op)
{
uint STRIDE = Block::blockSize();
InIt1 t1 = beg1 + Block::threadLineId();
InIt2 t2 = beg2 + Block::threadLineId();
OutIt o = out + (t1 - beg1);
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
*o = op(*t1, *t2);
}
}}
#endif

View File

@ -0,0 +1,392 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDEV_BLOCK_REDUCE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../util/type_traits.hpp"
#include "../../warp/warp.hpp"
#include "../../warp/shuffle.hpp"
namespace cv { namespace cudev {
namespace block_reduce_detail
{
// GetType
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
// For
template <int I, int N> struct For
{
template <class PointerTuple, class ValTuple>
__device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
{
get<I>(smem)[tid] = get<I>(val);
For<I + 1, N>::loadToSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple>
__device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
{
get<I>(val) = get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple, class OpTuple>
__device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
{
typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
For<I + 1, N>::merge(smem, val, tid, delta, op);
}
#if CV_CUDEV_ARCH >= 300
template <class ValTuple, class OpTuple>
__device__ static void mergeShfl(const ValTuple& val, uint delta, uint width, const OpTuple& op)
{
typename GetType<typename tuple_element<I, ValTuple>::type>::type reg = shfl_down(get<I>(val), delta, width);
get<I>(val) = get<I>(op)(get<I>(val), reg);
For<I + 1, N>::mergeShfl(val, delta, width, op);
}
#endif
};
template <int N> struct For<N, N>
{
template <class PointerTuple, class ValTuple>
__device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ValTuple&, uint)
{
}
template <class PointerTuple, class ValTuple>
__device__ __forceinline__ static void loadFromSmem(const PointerTuple&, const ValTuple&, uint)
{
}
template <class PointerTuple, class ValTuple, class OpTuple>
__device__ __forceinline__ static void merge(const PointerTuple&, const ValTuple&, uint, uint, const OpTuple&)
{
}
#if CV_CUDEV_ARCH >= 300
template <class ValTuple, class OpTuple>
__device__ __forceinline__ static void mergeShfl(const ValTuple&, uint, uint, const OpTuple&)
{
}
#endif
};
// loadToSmem / loadFromSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
{
smem[tid] = val;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, uint tid)
{
val = smem[tid];
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid)
{
For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadFromSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid)
{
For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
}
// merge
template <typename T, class Op>
__device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op)
{
T reg = smem[tid + delta];
smem[tid] = val = op(val, reg);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid,
uint delta,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
}
// mergeShfl
#if CV_CUDEV_ARCH >= 300
template <typename T, class Op>
__device__ __forceinline__ void mergeShfl(T& val, uint delta, uint width, const Op& op)
{
T reg = shfl_down(val, delta, width);
val = op(val, reg);
}
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void mergeShfl(const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint delta,
uint width,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, tuple_size<tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
}
#endif
// Generic
template <int N> struct Generic
{
template <typename Pointer, typename Reference, class Op>
__device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
{
loadToSmem(smem, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(smem, val, tid, 1024, op);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(smem, val, tid, 512, op);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(smem, val, tid, 256, op);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(smem, val, tid, 128, op);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(smem, val, tid, 64, op);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(smem, val, tid, 32, op);
}
if (tid < 16)
{
merge(smem, val, tid, 16, op);
merge(smem, val, tid, 8, op);
merge(smem, val, tid, 4, op);
merge(smem, val, tid, 2, op);
merge(smem, val, tid, 1, op);
}
}
};
// Unroll
template <int I, typename Pointer, typename Reference, class Op> struct Unroll
{
__device__ static void loop(Pointer smem, Reference val, uint tid, Op op)
{
merge(smem, val, tid, I, op);
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
}
#if CV_CUDEV_ARCH >= 300
__device__ static void loopShfl(Reference val, Op op, uint N)
{
mergeShfl(val, I, N, op);
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
}
#endif
};
template <typename Pointer, typename Reference, class Op> struct Unroll<0, Pointer, Reference, Op>
{
__device__ __forceinline__ static void loop(Pointer, Reference, uint, Op)
{
}
#if CV_CUDEV_ARCH >= 300
__device__ __forceinline__ static void loopShfl(Reference, Op, uint)
{
}
#endif
};
// WarpOptimized
template <int N> struct WarpOptimized
{
template <typename Pointer, typename Reference, class Op>
__device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
{
#if CV_CUDEV_ARCH >= 300
(void) smem;
(void) tid;
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
#else
loadToSmem(smem, val, tid);
if (tid < N / 2)
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
};
// GenericOptimized32
template <int N> struct GenericOptimized32
{
enum { M = N / 32 };
template <typename Pointer, typename Reference, class Op>
__device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
{
const uint laneId = Warp::laneId();
#if CV_CUDEV_ARCH >= 300
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#else
loadToSmem(smem, val, tid);
if (laneId < 16)
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
__syncthreads();
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#endif
__syncthreads();
loadFromSmem(smem, val, tid);
if (tid < 32)
{
#if CV_CUDEV_ARCH >= 300
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
#else
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
}
};
template <int N> struct Dispatcher
{
typedef typename SelectIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename SelectIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}
#endif

View File

@ -0,0 +1,394 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_KEY_VAL_DETAIL_HPP__
#define __OPENCV_CUDEV_BLOCK_REDUCE_KEY_VAL_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../util/type_traits.hpp"
#include "../../warp/warp.hpp"
namespace cv { namespace cudev {
namespace block_reduce_key_val_detail
{
// GetType
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
// For
template <int I, int N> struct For
{
template <class PointerTuple, class ReferenceTuple>
__device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
{
get<I>(smem)[tid] = get<I>(data);
For<I + 1, N>::loadToSmem(smem, data, tid);
}
template <class PointerTuple, class ReferenceTuple>
__device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
{
get<I>(data) = get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, data, tid);
}
template <class PointerTuple, class ReferenceTuple>
__device__ static void copy(const PointerTuple& svals, const ReferenceTuple& val, uint tid, uint delta)
{
get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
For<I + 1, N>::copy(svals, val, tid, delta);
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
__device__ static void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
const ValPointerTuple& svals, const ValReferenceTuple& val,
const CmpTuple& cmp,
uint tid, uint delta)
{
typename GetType<typename tuple_element<I, KeyPointerTuple>::type>::type reg = get<I>(skeys)[tid + delta];
if (get<I>(cmp)(reg, get<I>(key)))
{
get<I>(skeys)[tid] = get<I>(key) = reg;
get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
}
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
}
};
template <int N> struct For<N, N>
{
template <class PointerTuple, class ReferenceTuple>
__device__ static void loadToSmem(const PointerTuple&, const ReferenceTuple&, uint)
{
}
template <class PointerTuple, class ReferenceTuple>
__device__ static void loadFromSmem(const PointerTuple&, const ReferenceTuple&, uint)
{
}
template <class PointerTuple, class ReferenceTuple>
__device__ static void copy(const PointerTuple&, const ReferenceTuple&, uint, uint)
{
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
__device__ static void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
const ValPointerTuple&, const ValReferenceTuple&,
const CmpTuple&,
uint, uint)
{
}
};
// loadToSmem / loadFromSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
{
smem[tid] = data;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid)
{
data = smem[tid];
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
uint tid)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadFromSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
uint tid)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
}
// copyVals
template <typename V>
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, uint tid, uint delta)
{
svals[tid] = val = svals[tid + delta];
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyVals(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid, uint delta)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
}
// merge
template <typename K, typename V, class Cmp>
__device__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, uint tid, uint delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ void merge(volatile K* skeys, K& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp, uint tid, uint delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void merge(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
uint tid, uint delta)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
}
// Generic
template <int N> struct Generic
{
template <class KP, class KR, class VP, class VR, class Cmp>
__device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
{
loadToSmem(skeys, key, tid);
loadValsToSmem(svals, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(skeys, key, svals, val, cmp, tid, 1024);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(skeys, key, svals, val, cmp, tid, 512);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(skeys, key, svals, val, cmp, tid, 256);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(skeys, key, svals, val, cmp, tid, 128);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(skeys, key, svals, val, cmp, tid, 64);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(skeys, key, svals, val, cmp, tid, 32);
}
if (tid < 16)
{
merge(skeys, key, svals, val, cmp, tid, 16);
merge(skeys, key, svals, val, cmp, tid, 8);
merge(skeys, key, svals, val, cmp, tid, 4);
merge(skeys, key, svals, val, cmp, tid, 2);
merge(skeys, key, svals, val, cmp, tid, 1);
}
}
};
// Unroll
template <int I, class KP, class KR, class VP, class VR, class Cmp> struct Unroll
{
__device__ static void loop(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
{
merge(skeys, key, svals, val, cmp, tid, I);
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
}
};
template <class KP, class KR, class VP, class VR, class Cmp> struct Unroll<0, KP, KR, VP, VR, Cmp>
{
__device__ __forceinline__ static void loop(KP, KR, VP, VR, uint, Cmp)
{
}
};
// WarpOptimized
template <int N> struct WarpOptimized
{
template <class KP, class KR, class VP, class VR, class Cmp>
__device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
{
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (tid < N / 2)
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
}
};
// GenericOptimized32
template <uint N> struct GenericOptimized32
{
enum { M = N / 32 };
template <class KP, class KR, class VP, class VR, class Cmp>
__device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
{
const uint laneId = Warp::laneId();
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (laneId < 16)
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
__syncthreads();
if (laneId == 0)
{
loadToSmem(skeys, key, tid / 32);
loadToSmem(svals, val, tid / 32);
}
__syncthreads();
loadFromSmem(skeys, key, tid);
if (tid < 32)
{
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
}
}
};
template <int N> struct Dispatcher
{
typedef typename SelectIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename SelectIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}
#endif

View File

@ -0,0 +1,86 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_DYNAMIC_SMEM_HPP__
#define __OPENCV_CUDEV_BLOCK_DYNAMIC_SMEM_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
template <class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*) __smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*) __smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template <> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*) __smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*) __smem_d;
}
};
}}
#endif

View File

@ -0,0 +1,128 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_HPP__
#define __OPENCV_CUDEV_BLOCK_REDUCE_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "../warp/reduce.hpp"
#include "detail/reduce.hpp"
#include "detail/reduce_key_val.hpp"
namespace cv { namespace cudev {
// blockReduce
template <int N, typename T, class Op>
__device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op)
{
block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
}
template <int N,
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
block_reduce_detail::Dispatcher<N>::reductor::template reduce<
const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
}
// blockReduceKeyVal
template <int N, typename K, typename V, class Cmp>
__device__ __forceinline__ void blockReduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, uint tid, const Cmp& cmp)
{
block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <int N,
typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void blockReduceKeyVal(volatile K* skeys, K& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid, const Cmp& cmp)
{
block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <int N,
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void blockReduceKeyVal(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
{
block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
>(skeys, key, svals, val, tid, cmp);
}
}}
#endif

View File

@ -0,0 +1,101 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_SCAN_HPP__
#define __OPENCV_CUDEV_BLOCK_SCAN_HPP__
#include "../common.hpp"
#include "../warp/scan.hpp"
namespace cv { namespace cudev {
template <int THREADS_NUM, typename T>
__device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
{
if (THREADS_NUM > WARP_SIZE)
{
// bottom-level inclusive warp scan
T warpResult = warpScanInclusive(data, smem, tid);
__syncthreads();
// save top elements of each warp for exclusive warp scan
// sync to wait for warp scans to complete (because s_Data is being overwritten)
if ((tid & (WARP_SIZE - 1)) == (WARP_SIZE - 1))
{
smem[tid >> LOG_WARP_SIZE] = warpResult;
}
__syncthreads();
if (tid < (THREADS_NUM / WARP_SIZE))
{
// grab top warp elements
T val = smem[tid];
// calculate exclusive scan and write back to shared memory
smem[tid] = warpScanExclusive(val, smem, tid);
}
__syncthreads();
// return updated warp scans with exclusive scan results
return warpResult + smem[tid >> LOG_WARP_SIZE];
}
else
{
return warpScanInclusive(data, smem, tid);
}
}
template <int THREADS_NUM, typename T>
__device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid)
{
return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data;
}
}}
#endif

View File

@ -0,0 +1,184 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
#define __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
#include "../common.hpp"
#include "../functional/functional.hpp"
#include "../warp/reduce.hpp"
#include "reduce.hpp"
namespace cv { namespace cudev {
// NormL1
template <typename T> struct NormL1
{
typedef int value_type;
typedef uint result_type;
result_type mySum;
__device__ __forceinline__ NormL1() : mySum(0) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum = __sad(val1, val2, mySum);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
template <> struct NormL1<float>
{
typedef float value_type;
typedef float result_type;
result_type mySum;
__device__ __forceinline__ NormL1() : mySum(0.0f) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum += ::fabsf(val1 - val2);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
// NormL2
struct NormL2
{
typedef float value_type;
typedef float result_type;
result_type mySum;
__device__ __forceinline__ NormL2() : mySum(0.0f) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
const float diff = val1 - val2;
mySum += diff * diff;
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return ::sqrtf(mySum);
}
};
// NormHamming
struct NormHamming
{
typedef int value_type;
typedef int result_type;
result_type mySum;
__device__ __forceinline__ NormHamming() : mySum(0) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum += __popc(val1 ^ val2);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
}}
#endif

View File

@ -0,0 +1,93 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_COMMON_HPP__
#define __OPENCV_CUDEV_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/gpu_stream_accessor.hpp"
namespace cv { namespace cudev {
using namespace cv::gpu;
// CV_CUDEV_ARCH
#ifndef __CUDA_ARCH__
# define CV_CUDEV_ARCH 0
#else
# define CV_CUDEV_ARCH __CUDA_ARCH__
#endif
// CV_CUDEV_SAFE_CALL
__host__ __forceinline__ void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
{
if (cudaSuccess != err)
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
}
#ifdef __GNUC__
# define CV_CUDEV_SAFE_CALL(expr) cv::cudev::checkCudaError((expr), __FILE__, __LINE__, __func__)
#else
# define CV_CUDEV_SAFE_CALL(expr) cv::cudev::checkCudaError((expr), __FILE__, __LINE__, "")
#endif
// divUp
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
// math constants
#define CV_PI_F ((float)CV_PI)
#define CV_LOG2_F ((float)CV_LOG2)
}}
#endif

View File

@ -0,0 +1,75 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_BINARY_FUNC_HPP__
#define __OPENCV_CUDEV_EXPR_BINARY_FUNC_HPP__
#include "../common.hpp"
#include "../util/type_traits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../functional/functional.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
#define CV_CUDEV_EXPR_BINARY_FUNC(name) \
template <class SrcPtr1, class SrcPtr2> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, name ## _func<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > > \
name ## _(const SrcPtr1& src1, const SrcPtr2& src2) \
{ \
return makeExpr(transformPtr(src1, src2, name ## _func<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>())); \
}
CV_CUDEV_EXPR_BINARY_FUNC(hypot)
CV_CUDEV_EXPR_BINARY_FUNC(magnitude)
CV_CUDEV_EXPR_BINARY_FUNC(atan2)
CV_CUDEV_EXPR_BINARY_FUNC(absdiff)
#undef CV_CUDEV_EXPR_BINARY_FUNC
}}
#endif

View File

@ -0,0 +1,235 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_BINARY_OP_HPP__
#define __OPENCV_CUDEV_EXPR_BINARY_OP_HPP__
#include "../common.hpp"
#include "../util/type_traits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/texture.hpp"
#include "../ptr2d/glob.hpp"
#include "../functional/functional.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
// Binary Operations
#define CV_CUDEV_EXPR_BINOP_INST(op, functor) \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
operator op(const GpuMat_<T>& src1, const GpuMat_<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
operator op(const GpuMat_<T>& src1, const GlobPtrSz<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
operator op(const GlobPtrSz<T>& src1, const GpuMat_<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
operator op(const GpuMat_<T>& src1, const Texture<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
operator op(const Texture<T>& src1, const GpuMat_<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const GpuMat_<T>& src1, const Expr<Body>& src2) \
{ \
return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const Expr<Body>& src1, const GpuMat_<T>& src2) \
{ \
return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, Binder2nd< functor<T> > > > \
operator op(const GpuMat_<T>& src, T val) \
{ \
return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, Binder1st< functor<T> > > > \
operator op(T val, const GpuMat_<T>& src) \
{ \
return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
operator op(const GlobPtrSz<T>& src1, const GlobPtrSz<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
operator op(const GlobPtrSz<T>& src1, const Texture<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
operator op(const Texture<T>& src1, const GlobPtrSz<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const GlobPtrSz<T>& src1, const Expr<Body>& src2) \
{ \
return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const Expr<Body>& src1, const GlobPtrSz<T>& src2) \
{ \
return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, Binder2nd< functor<T> > > > \
operator op(const GlobPtrSz<T>& src, T val) \
{ \
return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, Binder1st< functor<T> > > > \
operator op(T val, const GlobPtrSz<T>& src) \
{ \
return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
} \
template <typename T> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
operator op(const Texture<T>& src1, const Texture<T>& src2) \
{ \
return makeExpr(transformPtr(src1, src2, functor<T>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const Texture<T>& src1, const Expr<Body>& src2) \
{ \
return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T, class Body> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
operator op(const Expr<Body>& src1, const Texture<T>& src2) \
{ \
return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, Binder2nd< functor<T> > > > \
operator op(const Texture<T>& src, T val) \
{ \
return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, Binder1st< functor<T> > > > \
operator op(T val, const Texture<T>& src) \
{ \
return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
} \
template <class Body1, class Body2> \
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body1>::ptr_type, typename PtrTraits<Body2>::ptr_type, functor<typename LargerType<typename PtrTraits<Body1>::value_type, typename PtrTraits<Body2>::value_type>::type> > > \
operator op(const Expr<Body1>& a, const Expr<Body2>& b) \
{ \
return makeExpr(transformPtr(a.body, b.body, functor<typename LargerType<typename PtrTraits<Body1>::value_type, typename PtrTraits<Body2>::value_type>::type>())); \
} \
template <class Body> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, Binder2nd< functor<typename Body::value_type> > > > \
operator op(const Expr<Body>& a, typename Body::value_type val) \
{ \
return makeExpr(transformPtr(a.body, bind2nd(functor<typename Body::value_type>(), val))); \
} \
template <class Body> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, Binder1st< functor<typename Body::value_type> > > > \
operator op(typename Body::value_type val, const Expr<Body>& a) \
{ \
return makeExpr(transformPtr(a.body, bind1st(functor<typename Body::value_type>(), val))); \
}
CV_CUDEV_EXPR_BINOP_INST(+, plus)
CV_CUDEV_EXPR_BINOP_INST(-, minus)
CV_CUDEV_EXPR_BINOP_INST(*, multiplies)
CV_CUDEV_EXPR_BINOP_INST(/, divides)
CV_CUDEV_EXPR_BINOP_INST(%, modulus)
CV_CUDEV_EXPR_BINOP_INST(==, equal_to)
CV_CUDEV_EXPR_BINOP_INST(!=, not_equal_to)
CV_CUDEV_EXPR_BINOP_INST(>, greater)
CV_CUDEV_EXPR_BINOP_INST(<, less)
CV_CUDEV_EXPR_BINOP_INST(>=, greater_equal)
CV_CUDEV_EXPR_BINOP_INST(<=, less_equal)
CV_CUDEV_EXPR_BINOP_INST(&&, logical_and)
CV_CUDEV_EXPR_BINOP_INST(||, logical_or)
CV_CUDEV_EXPR_BINOP_INST(&, bit_and)
CV_CUDEV_EXPR_BINOP_INST(|, bit_or)
CV_CUDEV_EXPR_BINOP_INST(^, bit_xor)
CV_CUDEV_EXPR_BINOP_INST(<<, bit_lshift)
CV_CUDEV_EXPR_BINOP_INST(>>, bit_rshift)
#undef CV_CUDEV_EXPR_BINOP_INST
}}
#endif

View File

@ -0,0 +1,282 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_COLOR_HPP__
#define __OPENCV_CUDEV_EXPR_COLOR_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../functional/color_cvt.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
#define CV_CUDEV_EXPR_CVTCOLOR_INST(name) \
template <class SrcPtr> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename VecTraits<typename PtrTraits<SrcPtr>::value_type>::elem_type> > > \
name ## _(const SrcPtr& src) \
{ \
return makeExpr(transformPtr(src, name ## _func<typename VecTraits<typename PtrTraits<SrcPtr>::value_type>::elem_type>())); \
}
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_GRAY)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_GRAY)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_GRAY)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_GRAY)
CV_CUDEV_EXPR_CVTCOLOR_INST(GRAY_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(GRAY_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YUV)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YUV)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YUV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YUV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YUV)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YUV)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YUV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YUV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YCrCb)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YCrCb)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YCrCb4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YCrCb4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YCrCb)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YCrCb)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YCrCb4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YCrCb4)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_XYZ)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_XYZ)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_XYZ4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_XYZ4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_XYZ)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_XYZ)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_XYZ4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_XYZ4)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGB_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGBA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGB_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGBA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGR_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGRA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGR_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGRA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS4_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGB_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGBA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGB_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGBA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGR_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGRA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGR_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGRA_FULL)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Lab)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Lab4)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LRGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LRGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LRGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LRGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LBGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LBGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LBGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LBGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Luv)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Luv4)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_RGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_RGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_BGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_BGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LRGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LRGB)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LRGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LRGBA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LBGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGR)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LBGRA)
CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGRA)
#undef CV_CUDEV_EXPR_CVTCOLOR_INST
}}
#endif

View File

@ -0,0 +1,121 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_DERIV_HPP__
#define __OPENCV_CUDEV_EXPR_DERIV_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/deriv.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
// derivX
template <class SrcPtr>
__host__ Expr<DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
derivX_(const SrcPtr& src)
{
return makeExpr(derivXPtr(src));
}
// derivY
template <class SrcPtr>
__host__ Expr<DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
derivY_(const SrcPtr& src)
{
return makeExpr(derivYPtr(src));
}
// sobelX
template <class SrcPtr>
__host__ Expr<SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
sobelX_(const SrcPtr& src)
{
return makeExpr(sobelXPtr(src));
}
// sobelY
template <class SrcPtr>
__host__ Expr<SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
sobelY_(const SrcPtr& src)
{
return makeExpr(sobelYPtr(src));
}
// scharrX
template <class SrcPtr>
__host__ Expr<ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
scharrX_(const SrcPtr& src)
{
return makeExpr(scharrXPtr(src));
}
// scharrY
template <class SrcPtr>
__host__ Expr<ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
scharrY_(const SrcPtr& src)
{
return makeExpr(scharrYPtr(src));
}
// laplacian
template <int ksize, class SrcPtr>
__host__ Expr<LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> >
laplacian_(const SrcPtr& src)
{
return makeExpr(laplacianPtr<ksize>(src));
}
}}
#endif

View File

@ -0,0 +1,92 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_EXPR_HPP__
#define __OPENCV_CUDEV_EXPR_EXPR_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
namespace cv { namespace cudev {
template <class Body> struct Expr
{
Body body;
};
template <class Body>
__host__ Expr<Body> makeExpr(const Body& body)
{
Expr<Body> e;
e.body = body;
return e;
}
template <class Body> struct PtrTraits< Expr<Body> >
{
typedef Expr<Body> ptr_sz_type;
typedef typename PtrTraits<Body>::ptr_type ptr_type;
typedef typename ptr_type::value_type value_type;
__host__ static ptr_type shrinkPtr(const Expr<Body>& expr)
{
return PtrTraits<Body>::shrinkPtr(expr.body);
}
__host__ static int getRows(const Expr<Body>& expr)
{
return PtrTraits<Body>::getRows(expr.body);
}
__host__ static int getCols(const Expr<Body>& expr)
{
return PtrTraits<Body>::getCols(expr.body);
}
};
}}
#endif

View File

@ -0,0 +1,132 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_PER_ELEMENT_FUNC_HPP__
#define __OPENCV_CUDEV_EXPR_PER_ELEMENT_FUNC_HPP__
#include "../common.hpp"
#include "../util/type_traits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../ptr2d/lut.hpp"
#include "../functional/functional.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
// min/max
template <class SrcPtr1, class SrcPtr2>
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, minimum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > >
min_(const SrcPtr1& src1, const SrcPtr2& src2)
{
return makeExpr(transformPtr(src1, src2, minimum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>()));
}
template <class SrcPtr1, class SrcPtr2>
__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, maximum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > >
max_(const SrcPtr1& src1, const SrcPtr2& src2)
{
return makeExpr(transformPtr(src1, src2, maximum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>()));
}
// threshold
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshBinaryFunc<typename PtrTraits<SrcPtr>::value_type> > >
threshBinary_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh, typename PtrTraits<SrcPtr>::value_type maxVal)
{
return makeExpr(transformPtr(src, thresh_binary_func(thresh, maxVal)));
}
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshBinaryInvFunc<typename PtrTraits<SrcPtr>::value_type> > >
threshBinaryInv_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh, typename PtrTraits<SrcPtr>::value_type maxVal)
{
return makeExpr(transformPtr(src, thresh_binary_inv_func(thresh, maxVal)));
}
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshTruncFunc<typename PtrTraits<SrcPtr>::value_type> > >
threshTrunc_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
{
return makeExpr(transformPtr(src, thresh_trunc_func(thresh)));
}
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshToZeroFunc<typename PtrTraits<SrcPtr>::value_type> > >
threshToZero_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
{
return makeExpr(transformPtr(src, thresh_to_zero_func(thresh)));
}
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshToZeroInvFunc<typename PtrTraits<SrcPtr>::value_type> > >
threshToZeroInv_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
{
return makeExpr(transformPtr(src, thresh_to_zero_inv_func(thresh)));
}
// cvt
template <typename D, class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, saturate_cast_func<typename PtrTraits<SrcPtr>::value_type, D> > >
cvt_(const SrcPtr& src)
{
return makeExpr(transformPtr(src, saturate_cast_func<typename PtrTraits<SrcPtr>::value_type, D>()));
}
// lut
template <class SrcPtr, class TablePtr>
__host__ Expr<LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> >
lut_(const SrcPtr& src, const TablePtr& tbl)
{
return makeExpr(lutPtr(src, tbl));
}
}}
#endif

View File

@ -0,0 +1,259 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_REDUCTION_HPP__
#define __OPENCV_CUDEV_EXPR_REDUCTION_HPP__
#include "../common.hpp"
#include "../grid/glob_reduce.hpp"
#include "../grid/histogram.hpp"
#include "../grid/integral.hpp"
#include "../grid/reduce_to_vec.hpp"
#include "../ptr2d/traits.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
// sum
template <class SrcPtr> struct SumExprBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCalcSum(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<SumExprBody<SrcPtr> >
sum_(const SrcPtr& src)
{
SumExprBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// minVal
template <class SrcPtr> struct FindMinValExprBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridFindMinVal(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<FindMinValExprBody<SrcPtr> >
minVal_(const SrcPtr& src)
{
FindMinValExprBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// maxVal
template <class SrcPtr> struct FindMaxValExprBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridFindMaxVal(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<FindMaxValExprBody<SrcPtr> >
maxVal_(const SrcPtr& src)
{
FindMaxValExprBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// minMaxVal
template <class SrcPtr> struct FindMinMaxValExprBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridFindMinMaxVal(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<FindMinMaxValExprBody<SrcPtr> >
minMaxVal_(const SrcPtr& src)
{
FindMinMaxValExprBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// countNonZero
template <class SrcPtr> struct CountNonZeroExprBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCountNonZero(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<CountNonZeroExprBody<SrcPtr> >
countNonZero_(const SrcPtr& src)
{
CountNonZeroExprBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// reduceToRow
template <class Reductor, class SrcPtr> struct ReduceToRowBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridReduceToRow<Reductor>(src, dst, stream);
}
};
template <class Reductor, class SrcPtr>
__host__ Expr<ReduceToRowBody<Reductor, SrcPtr> >
reduceToRow_(const SrcPtr& src)
{
ReduceToRowBody<Reductor, SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// reduceToColumn
template <class Reductor, class SrcPtr> struct ReduceToColumnBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridReduceToColumn<Reductor>(src, dst, stream);
}
};
template <class Reductor, class SrcPtr>
__host__ Expr<ReduceToColumnBody<Reductor, SrcPtr> >
reduceToColumn_(const SrcPtr& src)
{
ReduceToColumnBody<Reductor, SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// histogram
template <int BIN_COUNT, class SrcPtr> struct HistogramBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridHistogram<BIN_COUNT>(src, dst, stream);
}
};
template <int BIN_COUNT, class SrcPtr>
__host__ Expr<HistogramBody<BIN_COUNT, SrcPtr> >
histogram_(const SrcPtr& src)
{
HistogramBody<BIN_COUNT, SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// integral
template <class SrcPtr> struct IntegralBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridIntegral(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<IntegralBody<SrcPtr> >
integral_(const SrcPtr& src)
{
IntegralBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
}}
#endif

View File

@ -0,0 +1,98 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_UNARY_FUNC_HPP__
#define __OPENCV_CUDEV_EXPR_UNARY_FUNC_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../functional/functional.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
#define CV_CUDEV_EXPR_UNARY_FUNC(name) \
template <class SrcPtr> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename PtrTraits<SrcPtr>::value_type> > > \
name ## _(const SrcPtr& src) \
{ \
return makeExpr(transformPtr(src, name ## _func<typename PtrTraits<SrcPtr>::value_type>())); \
}
CV_CUDEV_EXPR_UNARY_FUNC(abs)
CV_CUDEV_EXPR_UNARY_FUNC(sqr)
CV_CUDEV_EXPR_UNARY_FUNC(sqrt)
CV_CUDEV_EXPR_UNARY_FUNC(exp)
CV_CUDEV_EXPR_UNARY_FUNC(exp2)
CV_CUDEV_EXPR_UNARY_FUNC(exp10)
CV_CUDEV_EXPR_UNARY_FUNC(log)
CV_CUDEV_EXPR_UNARY_FUNC(log2)
CV_CUDEV_EXPR_UNARY_FUNC(log10)
CV_CUDEV_EXPR_UNARY_FUNC(sin)
CV_CUDEV_EXPR_UNARY_FUNC(cos)
CV_CUDEV_EXPR_UNARY_FUNC(tan)
CV_CUDEV_EXPR_UNARY_FUNC(asin)
CV_CUDEV_EXPR_UNARY_FUNC(acos)
CV_CUDEV_EXPR_UNARY_FUNC(atan)
CV_CUDEV_EXPR_UNARY_FUNC(sinh)
CV_CUDEV_EXPR_UNARY_FUNC(cosh)
CV_CUDEV_EXPR_UNARY_FUNC(tanh)
CV_CUDEV_EXPR_UNARY_FUNC(asinh)
CV_CUDEV_EXPR_UNARY_FUNC(acosh)
CV_CUDEV_EXPR_UNARY_FUNC(atanh)
#undef CV_CUDEV_EXPR_UNARY_FUNC
template <class SrcPtr>
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Binder2nd<pow_func<typename PtrTraits<SrcPtr>::value_type> > > >
pow_(const SrcPtr& src, float power)
{
return makeExpr(transformPtr(src, bind2nd(pow_func<typename PtrTraits<SrcPtr>::value_type>(), power)));
}
}}
#endif

View File

@ -0,0 +1,94 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_UNARY_OP_HPP__
#define __OPENCV_CUDEV_EXPR_UNARY_OP_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/transform.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/texture.hpp"
#include "../ptr2d/glob.hpp"
#include "../functional/functional.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
#define CV_CUDEV_EXPR_UNOP_INST(op, functor) \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
operator op(const GpuMat_<T>& src) \
{ \
return makeExpr(transformPtr(src, functor<T>())); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
operator op(const GlobPtrSz<T>& src) \
{ \
return makeExpr(transformPtr(src, functor<T>())); \
} \
template <typename T> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
operator op(const Texture<T>& src) \
{ \
return makeExpr(transformPtr(src, functor<T>())); \
} \
template <class Body> \
__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, functor<typename Body::value_type> > > \
operator op(const Expr<Body>& src) \
{ \
return makeExpr(transformPtr(src.body, functor<typename Body::value_type>())); \
}
CV_CUDEV_EXPR_UNOP_INST(-, negate)
CV_CUDEV_EXPR_UNOP_INST(!, logical_not)
CV_CUDEV_EXPR_UNOP_INST(~, bit_not)
#undef CV_CUDEV_EXPR_UNOP_INST
}}
#endif

View File

@ -0,0 +1,171 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_EXPR_WARPING_HPP__
#define __OPENCV_CUDEV_EXPR_WARPING_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/resize.hpp"
#include "../ptr2d/remap.hpp"
#include "../ptr2d/warping.hpp"
#include "../grid/pyramids.hpp"
#include "../grid/transpose.hpp"
#include "expr.hpp"
namespace cv { namespace cudev {
// resize
template <class SrcPtr>
__host__ Expr<ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
resize_(const SrcPtr& src, float fx, float fy)
{
return makeExpr(resizePtr(src, fx, fy));
}
// remap
template <class SrcPtr, class MapPtr>
__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type> >
remap_(const SrcPtr& src, const MapPtr& map)
{
return makeExpr(remapPtr(src, map));
}
template <class SrcPtr, class MapXPtr, class MapYPtr>
__host__ Expr<RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type> >
remap_(const SrcPtr& src, const MapXPtr& mapx, const MapYPtr& mapy)
{
return makeExpr(remapPtr(src, mapx, mapy));
}
// warpAffine
template <class SrcPtr>
__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, AffineMapPtr> >
warpAffine_(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
{
return makeExpr(warpAffinePtr(src, dstSize, warpMat));
}
// warpPerspective
template <class SrcPtr>
__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, PerspectiveMapPtr> >
warpPerspective_(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
{
return makeExpr(warpPerspectivePtr(src, dstSize, warpMat));
}
// pyrDown
template <class SrcPtr> struct PyrDownBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridPyrDown(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<PyrDownBody<SrcPtr> >
pyrDown_(const SrcPtr& src)
{
PyrDownBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// pyrUp
template <class SrcPtr> struct PyrUpBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridPyrUp(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<PyrUpBody<SrcPtr> >
pyrUp_(const SrcPtr& src)
{
PyrUpBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
// transpose
template <class SrcPtr> struct TransposeBody
{
SrcPtr src;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridTranspose(src, dst, stream);
}
};
template <class SrcPtr>
__host__ Expr<TransposeBody<SrcPtr> >
transpose_(const SrcPtr& src)
{
TransposeBody<SrcPtr> body;
body.src = src;
return makeExpr(body);
}
}}
#endif

View File

@ -0,0 +1,474 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_HPP__
#define __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_HPP__
#include "../common.hpp"
#include "detail/color_cvt.hpp"
namespace cv { namespace cudev {
// Various 3/4-channel to 3/4-channel RGB transformations
#define CV_CUDEV_RGB2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2RGB<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_RGB2RGB_INST(BGR_to_RGB, 3, 3, 2)
CV_CUDEV_RGB2RGB_INST(BGR_to_BGRA, 3, 4, 0)
CV_CUDEV_RGB2RGB_INST(BGR_to_RGBA, 3, 4, 2)
CV_CUDEV_RGB2RGB_INST(BGRA_to_BGR, 4, 3, 0)
CV_CUDEV_RGB2RGB_INST(BGRA_to_RGB, 4, 3, 2)
CV_CUDEV_RGB2RGB_INST(BGRA_to_RGBA, 4, 4, 2)
#undef CV_CUDEV_RGB2RGB_INST
// RGB to Grayscale
#define CV_CUDEV_RGB2GRAY_INST(name, scn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Gray<SrcDepth, scn, bidx> \
{ \
};
CV_CUDEV_RGB2GRAY_INST(RGB_to_GRAY, 3, 2)
CV_CUDEV_RGB2GRAY_INST(BGR_to_GRAY, 3, 0)
CV_CUDEV_RGB2GRAY_INST(RGBA_to_GRAY, 4, 2)
CV_CUDEV_RGB2GRAY_INST(BGRA_to_GRAY, 4, 0)
#undef CV_CUDEV_RGB2GRAY_INST
// Grayscale to RGB
#define CV_CUDEV_GRAY2RGB_INST(name, dcn) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Gray2RGB<SrcDepth, dcn> \
{ \
};
CV_CUDEV_GRAY2RGB_INST(GRAY_to_BGR, 3)
CV_CUDEV_GRAY2RGB_INST(GRAY_to_BGRA, 4)
#undef CV_CUDEV_GRAY2RGB_INST
// RGB to YUV
#define CV_CUDEV_RGB2YUV_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2YUV<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_RGB2YUV_INST(RGB_to_YUV, 3, 3, 0)
CV_CUDEV_RGB2YUV_INST(RGBA_to_YUV, 4, 3, 0)
CV_CUDEV_RGB2YUV_INST(RGB_to_YUV4, 3, 4, 0)
CV_CUDEV_RGB2YUV_INST(RGBA_to_YUV4, 4, 4, 0)
CV_CUDEV_RGB2YUV_INST(BGR_to_YUV, 3, 3, 2)
CV_CUDEV_RGB2YUV_INST(BGRA_to_YUV, 4, 3, 2)
CV_CUDEV_RGB2YUV_INST(BGR_to_YUV4, 3, 4, 2)
CV_CUDEV_RGB2YUV_INST(BGRA_to_YUV4, 4, 4, 2)
#undef CV_CUDEV_RGB2YUV_INST
// YUV to RGB
#define CV_CUDEV_YUV2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::YUV2RGB<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_YUV2RGB_INST(YUV_to_RGB, 3, 3, 0)
CV_CUDEV_YUV2RGB_INST(YUV_to_RGBA, 3, 4, 0)
CV_CUDEV_YUV2RGB_INST(YUV4_to_RGB, 4, 3, 0)
CV_CUDEV_YUV2RGB_INST(YUV4_to_RGBA, 4, 4, 0)
CV_CUDEV_YUV2RGB_INST(YUV_to_BGR, 3, 3, 2)
CV_CUDEV_YUV2RGB_INST(YUV_to_BGRA, 3, 4, 2)
CV_CUDEV_YUV2RGB_INST(YUV4_to_BGR, 4, 3, 2)
CV_CUDEV_YUV2RGB_INST(YUV4_to_BGRA, 4, 4, 2)
#undef CV_CUDEV_YUV2RGB_INST
// RGB to YCrCb
#define CV_CUDEV_RGB2YCrCb_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2YCrCb<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_RGB2YCrCb_INST(RGB_to_YCrCb, 3, 3, 2)
CV_CUDEV_RGB2YCrCb_INST(RGBA_to_YCrCb, 4, 3, 2)
CV_CUDEV_RGB2YCrCb_INST(RGB_to_YCrCb4, 3, 4, 2)
CV_CUDEV_RGB2YCrCb_INST(RGBA_to_YCrCb4, 4, 4, 2)
CV_CUDEV_RGB2YCrCb_INST(BGR_to_YCrCb, 3, 3, 0)
CV_CUDEV_RGB2YCrCb_INST(BGRA_to_YCrCb, 4, 3, 0)
CV_CUDEV_RGB2YCrCb_INST(BGR_to_YCrCb4, 3, 4, 0)
CV_CUDEV_RGB2YCrCb_INST(BGRA_to_YCrCb4, 4, 4, 0)
#undef CV_CUDEV_RGB2YCrCb_INST
// YCrCb to RGB
#define CV_CUDEV_YCrCb2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::YCrCb2RGB<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_RGB, 3, 3, 2)
CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_RGBA, 3, 4, 2)
CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_RGB, 4, 3, 2)
CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_RGBA, 4, 4, 2)
CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_BGR, 3, 3, 0)
CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_BGRA, 3, 4, 0)
CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_BGR, 4, 3, 0)
CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_BGRA, 4, 4, 0)
#undef CV_CUDEV_YCrCb2RGB_INST
// RGB to XYZ
#define CV_CUDEV_RGB2XYZ_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2XYZ<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_RGB2XYZ_INST(RGB_to_XYZ, 3, 3, 2)
CV_CUDEV_RGB2XYZ_INST(RGBA_to_XYZ, 4, 3, 2)
CV_CUDEV_RGB2XYZ_INST(RGB_to_XYZ4, 3, 4, 2)
CV_CUDEV_RGB2XYZ_INST(RGBA_to_XYZ4, 4, 4, 2)
CV_CUDEV_RGB2XYZ_INST(BGR_to_XYZ, 3, 3, 0)
CV_CUDEV_RGB2XYZ_INST(BGRA_to_XYZ, 4, 3, 0)
CV_CUDEV_RGB2XYZ_INST(BGR_to_XYZ4, 3, 4, 0)
CV_CUDEV_RGB2XYZ_INST(BGRA_to_XYZ4, 4, 4, 0)
#undef CV_CUDEV_RGB2XYZ_INST
// XYZ to RGB
#define CV_CUDEV_XYZ2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::XYZ2RGB<SrcDepth, scn, dcn, bidx> \
{ \
};
CV_CUDEV_XYZ2RGB_INST(XYZ_to_RGB, 3, 3, 2)
CV_CUDEV_XYZ2RGB_INST(XYZ4_to_RGB, 4, 3, 2)
CV_CUDEV_XYZ2RGB_INST(XYZ_to_RGBA, 3, 4, 2)
CV_CUDEV_XYZ2RGB_INST(XYZ4_to_RGBA, 4, 4, 2)
CV_CUDEV_XYZ2RGB_INST(XYZ_to_BGR, 3, 3, 0)
CV_CUDEV_XYZ2RGB_INST(XYZ4_to_BGR, 4, 3, 0)
CV_CUDEV_XYZ2RGB_INST(XYZ_to_BGRA, 3, 4, 0)
CV_CUDEV_XYZ2RGB_INST(XYZ4_to_BGRA, 4, 4, 0)
#undef CV_CUDEV_XYZ2RGB_INST
// RGB to HSV
#define CV_CUDEV_RGB2HSV_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2HSV<SrcDepth, scn, dcn, bidx, 180> \
{ \
}; \
template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::RGB2HSV<SrcDepth, scn, dcn, bidx, 256> \
{ \
}; \
template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::RGB2HSV<float, scn, dcn, bidx, 360> \
{ \
}; \
template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::RGB2HSV<float, scn, dcn, bidx, 360> \
{ \
};
CV_CUDEV_RGB2HSV_INST(RGB_to_HSV, 3, 3, 2)
CV_CUDEV_RGB2HSV_INST(RGBA_to_HSV, 4, 3, 2)
CV_CUDEV_RGB2HSV_INST(RGB_to_HSV4, 3, 4, 2)
CV_CUDEV_RGB2HSV_INST(RGBA_to_HSV4, 4, 4, 2)
CV_CUDEV_RGB2HSV_INST(BGR_to_HSV, 3, 3, 0)
CV_CUDEV_RGB2HSV_INST(BGRA_to_HSV, 4, 3, 0)
CV_CUDEV_RGB2HSV_INST(BGR_to_HSV4, 3, 4, 0)
CV_CUDEV_RGB2HSV_INST(BGRA_to_HSV4, 4, 4, 0)
#undef CV_CUDEV_RGB2HSV_INST
// HSV to RGB
#define CV_CUDEV_HSV2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::HSV2RGB<SrcDepth, scn, dcn, bidx, 180> \
{ \
}; \
template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::HSV2RGB<SrcDepth, scn, dcn, bidx, 255> \
{ \
}; \
template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::HSV2RGB<float, scn, dcn, bidx, 360> \
{ \
}; \
template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::HSV2RGB<float, scn, dcn, bidx, 360> \
{ \
};
CV_CUDEV_HSV2RGB_INST(HSV_to_RGB, 3, 3, 2)
CV_CUDEV_HSV2RGB_INST(HSV_to_RGBA, 3, 4, 2)
CV_CUDEV_HSV2RGB_INST(HSV4_to_RGB, 4, 3, 2)
CV_CUDEV_HSV2RGB_INST(HSV4_to_RGBA, 4, 4, 2)
CV_CUDEV_HSV2RGB_INST(HSV_to_BGR, 3, 3, 0)
CV_CUDEV_HSV2RGB_INST(HSV_to_BGRA, 3, 4, 0)
CV_CUDEV_HSV2RGB_INST(HSV4_to_BGR, 4, 3, 0)
CV_CUDEV_HSV2RGB_INST(HSV4_to_BGRA, 4, 4, 0)
#undef CV_CUDEV_HSV2RGB_INST
// RGB to HLS
#define CV_CUDEV_RGB2HLS_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2HLS<SrcDepth, scn, dcn, bidx, 180> \
{ \
}; \
template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::RGB2HLS<SrcDepth, scn, dcn, bidx, 256> \
{ \
}; \
template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::RGB2HLS<float, scn, dcn, bidx, 360> \
{ \
}; \
template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::RGB2HLS<float, scn, dcn, bidx, 360> \
{ \
};
CV_CUDEV_RGB2HLS_INST(RGB_to_HLS, 3, 3, 2)
CV_CUDEV_RGB2HLS_INST(RGBA_to_HLS, 4, 3, 2)
CV_CUDEV_RGB2HLS_INST(RGB_to_HLS4, 3, 4, 2)
CV_CUDEV_RGB2HLS_INST(RGBA_to_HLS4, 4, 4, 2)
CV_CUDEV_RGB2HLS_INST(BGR_to_HLS, 3, 3, 0)
CV_CUDEV_RGB2HLS_INST(BGRA_to_HLS, 4, 3, 0)
CV_CUDEV_RGB2HLS_INST(BGR_to_HLS4, 3, 4, 0)
CV_CUDEV_RGB2HLS_INST(BGRA_to_HLS4, 4, 4, 0)
#undef CV_CUDEV_RGB2HLS_INST
// HLS to RGB
#define CV_CUDEV_HLS2RGB_INST(name, scn, dcn, bidx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::HLS2RGB<SrcDepth, scn, dcn, bidx, 180> \
{ \
}; \
template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::HLS2RGB<SrcDepth, scn, dcn, bidx, 255> \
{ \
}; \
template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::HLS2RGB<float, scn, dcn, bidx, 360> \
{ \
}; \
template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::HLS2RGB<float, scn, dcn, bidx, 360> \
{ \
};
CV_CUDEV_HLS2RGB_INST(HLS_to_RGB, 3, 3, 2)
CV_CUDEV_HLS2RGB_INST(HLS_to_RGBA, 3, 4, 2)
CV_CUDEV_HLS2RGB_INST(HLS4_to_RGB, 4, 3, 2)
CV_CUDEV_HLS2RGB_INST(HLS4_to_RGBA, 4, 4, 2)
CV_CUDEV_HLS2RGB_INST(HLS_to_BGR, 3, 3, 0)
CV_CUDEV_HLS2RGB_INST(HLS_to_BGRA, 3, 4, 0)
CV_CUDEV_HLS2RGB_INST(HLS4_to_BGR, 4, 3, 0)
CV_CUDEV_HLS2RGB_INST(HLS4_to_BGRA, 4, 4, 0)
#undef CV_CUDEV_HLS2RGB_INST
// RGB to Lab
#define CV_CUDEV_RGB2Lab_INST(name, scn, dcn, sRGB, blueIdx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Lab<SrcDepth, scn, dcn, sRGB, blueIdx> \
{ \
};
CV_CUDEV_RGB2Lab_INST(RGB_to_Lab, 3, 3, true, 2)
CV_CUDEV_RGB2Lab_INST(RGBA_to_Lab, 4, 3, true, 2)
CV_CUDEV_RGB2Lab_INST(RGB_to_Lab4, 3, 4, true, 2)
CV_CUDEV_RGB2Lab_INST(RGBA_to_Lab4, 4, 4, true, 2)
CV_CUDEV_RGB2Lab_INST(BGR_to_Lab, 3, 3, true, 0)
CV_CUDEV_RGB2Lab_INST(BGRA_to_Lab, 4, 3, true, 0)
CV_CUDEV_RGB2Lab_INST(BGR_to_Lab4, 3, 4, true, 0)
CV_CUDEV_RGB2Lab_INST(BGRA_to_Lab4, 4, 4, true, 0)
CV_CUDEV_RGB2Lab_INST(LRGB_to_Lab, 3, 3, false, 2)
CV_CUDEV_RGB2Lab_INST(LRGBA_to_Lab, 4, 3, false, 2)
CV_CUDEV_RGB2Lab_INST(LRGB_to_Lab4, 3, 4, false, 2)
CV_CUDEV_RGB2Lab_INST(LRGBA_to_Lab4, 4, 4, false, 2)
CV_CUDEV_RGB2Lab_INST(LBGR_to_Lab, 3, 3, false, 0)
CV_CUDEV_RGB2Lab_INST(LBGRA_to_Lab, 4, 3, false, 0)
CV_CUDEV_RGB2Lab_INST(LBGR_to_Lab4, 3, 4, false, 0)
CV_CUDEV_RGB2Lab_INST(LBGRA_to_Lab4, 4, 4, false, 0)
#undef CV_CUDEV_RGB2Lab_INST
// Lab to RGB
#define CV_CUDEV_Lab2RGB_INST(name, scn, dcn, sRGB, blueIdx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Lab2RGB<SrcDepth, scn, dcn, sRGB, blueIdx> \
{ \
};
CV_CUDEV_Lab2RGB_INST(Lab_to_RGB, 3, 3, true, 2)
CV_CUDEV_Lab2RGB_INST(Lab4_to_RGB, 4, 3, true, 2)
CV_CUDEV_Lab2RGB_INST(Lab_to_RGBA, 3, 4, true, 2)
CV_CUDEV_Lab2RGB_INST(Lab4_to_RGBA, 4, 4, true, 2)
CV_CUDEV_Lab2RGB_INST(Lab_to_BGR, 3, 3, true, 0)
CV_CUDEV_Lab2RGB_INST(Lab4_to_BGR, 4, 3, true, 0)
CV_CUDEV_Lab2RGB_INST(Lab_to_BGRA, 3, 4, true, 0)
CV_CUDEV_Lab2RGB_INST(Lab4_to_BGRA, 4, 4, true, 0)
CV_CUDEV_Lab2RGB_INST(Lab_to_LRGB, 3, 3, false, 2)
CV_CUDEV_Lab2RGB_INST(Lab4_to_LRGB, 4, 3, false, 2)
CV_CUDEV_Lab2RGB_INST(Lab_to_LRGBA, 3, 4, false, 2)
CV_CUDEV_Lab2RGB_INST(Lab4_to_LRGBA, 4, 4, false, 2)
CV_CUDEV_Lab2RGB_INST(Lab_to_LBGR, 3, 3, false, 0)
CV_CUDEV_Lab2RGB_INST(Lab4_to_LBGR, 4, 3, false, 0)
CV_CUDEV_Lab2RGB_INST(Lab_to_LBGRA, 3, 4, false, 0)
CV_CUDEV_Lab2RGB_INST(Lab4_to_LBGRA, 4, 4, false, 0)
#undef CV_CUDEV_Lab2RGB_INST
// RGB to Luv
#define CV_CUDEV_RGB2Luv_INST(name, scn, dcn, sRGB, blueIdx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Luv<SrcDepth, scn, dcn, sRGB, blueIdx> \
{ \
};
CV_CUDEV_RGB2Luv_INST(RGB_to_Luv, 3, 3, true, 2)
CV_CUDEV_RGB2Luv_INST(RGBA_to_Luv, 4, 3, true, 2)
CV_CUDEV_RGB2Luv_INST(RGB_to_Luv4, 3, 4, true, 2)
CV_CUDEV_RGB2Luv_INST(RGBA_to_Luv4, 4, 4, true, 2)
CV_CUDEV_RGB2Luv_INST(BGR_to_Luv, 3, 3, true, 0)
CV_CUDEV_RGB2Luv_INST(BGRA_to_Luv, 4, 3, true, 0)
CV_CUDEV_RGB2Luv_INST(BGR_to_Luv4, 3, 4, true, 0)
CV_CUDEV_RGB2Luv_INST(BGRA_to_Luv4, 4, 4, true, 0)
CV_CUDEV_RGB2Luv_INST(LRGB_to_Luv, 3, 3, false, 2)
CV_CUDEV_RGB2Luv_INST(LRGBA_to_Luv, 4, 3, false, 2)
CV_CUDEV_RGB2Luv_INST(LRGB_to_Luv4, 3, 4, false, 2)
CV_CUDEV_RGB2Luv_INST(LRGBA_to_Luv4, 4, 4, false, 2)
CV_CUDEV_RGB2Luv_INST(LBGR_to_Luv, 3, 3, false, 0)
CV_CUDEV_RGB2Luv_INST(LBGRA_to_Luv, 4, 3, false, 0)
CV_CUDEV_RGB2Luv_INST(LBGR_to_Luv4, 3, 4, false, 0)
CV_CUDEV_RGB2Luv_INST(LBGRA_to_Luv4, 4, 4, false, 0)
#undef CV_CUDEV_RGB2Luv_INST
// Luv to RGB
#define CV_CUDEV_Luv2RGB_INST(name, scn, dcn, sRGB, blueIdx) \
template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Luv2RGB<SrcDepth, scn, dcn, sRGB, blueIdx> \
{ \
};
CV_CUDEV_Luv2RGB_INST(Luv_to_RGB, 3, 3, true, 2)
CV_CUDEV_Luv2RGB_INST(Luv4_to_RGB, 4, 3, true, 2)
CV_CUDEV_Luv2RGB_INST(Luv_to_RGBA, 3, 4, true, 2)
CV_CUDEV_Luv2RGB_INST(Luv4_to_RGBA, 4, 4, true, 2)
CV_CUDEV_Luv2RGB_INST(Luv_to_BGR, 3, 3, true, 0)
CV_CUDEV_Luv2RGB_INST(Luv4_to_BGR, 4, 3, true, 0)
CV_CUDEV_Luv2RGB_INST(Luv_to_BGRA, 3, 4, true, 0)
CV_CUDEV_Luv2RGB_INST(Luv4_to_BGRA, 4, 4, true, 0)
CV_CUDEV_Luv2RGB_INST(Luv_to_LRGB, 3, 3, false, 2)
CV_CUDEV_Luv2RGB_INST(Luv4_to_LRGB, 4, 3, false, 2)
CV_CUDEV_Luv2RGB_INST(Luv_to_LRGBA, 3, 4, false, 2)
CV_CUDEV_Luv2RGB_INST(Luv4_to_LRGBA, 4, 4, false, 2)
CV_CUDEV_Luv2RGB_INST(Luv_to_LBGR, 3, 3, false, 0)
CV_CUDEV_Luv2RGB_INST(Luv4_to_LBGR, 4, 3, false, 0)
CV_CUDEV_Luv2RGB_INST(Luv_to_LBGRA, 3, 4, false, 0)
CV_CUDEV_Luv2RGB_INST(Luv4_to_LBGRA, 4, 4, false, 0)
#undef CV_CUDEV_Luv2RGB_INST
// 24/32-bit RGB to 16-bit (565 or 555) RGB
#define CV_CUDEV_RGB2RGB5x5_INST(name, scn, bidx, green_bits) \
typedef cv::cudev::color_cvt_detail::RGB2RGB5x5<scn, bidx, green_bits> name ## _func;
CV_CUDEV_RGB2RGB5x5_INST(BGR_to_BGR555, 3, 0, 5)
CV_CUDEV_RGB2RGB5x5_INST(BGR_to_BGR565, 3, 0, 6)
CV_CUDEV_RGB2RGB5x5_INST(RGB_to_BGR555, 3, 2, 5)
CV_CUDEV_RGB2RGB5x5_INST(RGB_to_BGR565, 3, 2, 6)
CV_CUDEV_RGB2RGB5x5_INST(BGRA_to_BGR555, 4, 0, 5)
CV_CUDEV_RGB2RGB5x5_INST(BGRA_to_BGR565, 4, 0, 6)
CV_CUDEV_RGB2RGB5x5_INST(RGBA_to_BGR555, 4, 2, 5)
CV_CUDEV_RGB2RGB5x5_INST(RGBA_to_BGR565, 4, 2, 6)
#undef CV_CUDEV_RGB2RGB5x5_INST
// 16-bit (565 or 555) RGB to 24/32-bit RGB
#define CV_CUDEV_RGB5x52RGB_INST(name, dcn, bidx, green_bits) \
typedef cv::cudev::color_cvt_detail::RGB5x52RGB<dcn, bidx, green_bits> name ## _func;
CV_CUDEV_RGB5x52RGB_INST(BGR555_to_RGB, 3, 2, 5)
CV_CUDEV_RGB5x52RGB_INST(BGR565_to_RGB, 3, 2, 6)
CV_CUDEV_RGB5x52RGB_INST(BGR555_to_BGR, 3, 0, 5)
CV_CUDEV_RGB5x52RGB_INST(BGR565_to_BGR, 3, 0, 6)
CV_CUDEV_RGB5x52RGB_INST(BGR555_to_RGBA, 4, 2, 5)
CV_CUDEV_RGB5x52RGB_INST(BGR565_to_RGBA, 4, 2, 6)
CV_CUDEV_RGB5x52RGB_INST(BGR555_to_BGRA, 4, 0, 5)
CV_CUDEV_RGB5x52RGB_INST(BGR565_to_BGRA, 4, 0, 6)
#undef CV_CUDEV_RGB5x52RGB_INST
// Grayscale to 16-bit (565 or 555) RGB
#define CV_CUDEV_GRAY2RGB5x5_INST(name, green_bits) \
typedef cv::cudev::color_cvt_detail::Gray2RGB5x5<green_bits> name ## _func;
CV_CUDEV_GRAY2RGB5x5_INST(GRAY_to_BGR555, 5)
CV_CUDEV_GRAY2RGB5x5_INST(GRAY_to_BGR565, 6)
#undef CV_CUDEV_GRAY2RGB5x5_INST
// 16-bit (565 or 555) RGB to Grayscale
#define CV_CUDEV_RGB5x52GRAY_INST(name, green_bits) \
typedef cv::cudev::color_cvt_detail::RGB5x52Gray<green_bits> name ## _func;
CV_CUDEV_RGB5x52GRAY_INST(BGR555_to_GRAY, 5)
CV_CUDEV_RGB5x52GRAY_INST(BGR565_to_GRAY, 6)
#undef CV_CUDEV_RGB5x52GRAY_INST
}}
#endif

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,854 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_FUNCTIONAL_FUNCTIONAL_HPP__
#define __OPENCV_CUDEV_FUNCTIONAL_FUNCTIONAL_HPP__
#include "../common.hpp"
#include "../util/saturate_cast.hpp"
#include "../util/vec_traits.hpp"
#include "../util/vec_math.hpp"
#include "../util/type_traits.hpp"
namespace cv { namespace cudev {
// Function Objects
template <typename _Arg, typename _Result> struct unary_function
{
typedef _Arg argument_type;
typedef _Result result_type;
};
template <typename _Arg1, typename _Arg2, typename _Result> struct binary_function
{
typedef _Arg1 first_argument_type;
typedef _Arg2 second_argument_type;
typedef _Result result_type;
};
// Arithmetic Operations
template <typename T> struct plus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return saturate_cast<T>(a + b);
}
};
template <typename T> struct minus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return saturate_cast<T>(a - b);
}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return saturate_cast<T>(a * b);
}
};
template <typename T> struct divides : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return saturate_cast<T>(a / b);
}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return saturate_cast<T>(a % b);
}
};
template <typename T> struct negate : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a) const
{
return saturate_cast<T>(-a);
}
};
// Comparison Operations
template <typename T> struct equal_to : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a == b;
}
};
template <typename T> struct not_equal_to : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a != b;
}
};
template <typename T> struct greater : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a > b;
}
};
template <typename T> struct less : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a < b;
}
};
template <typename T> struct greater_equal : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a >= b;
}
};
template <typename T> struct less_equal : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a <= b;
}
};
// Logical Operations
template <typename T> struct logical_and : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a && b;
}
};
template <typename T> struct logical_or : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a || b;
}
};
template <typename T> struct logical_not : unary_function<T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
{
__device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
operator ()(typename TypeTraits<T>::parameter_type a) const
{
return !a;
}
};
// Bitwise Operations
template <typename T> struct bit_and : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a & b;
}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a | b;
}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a ^ b;
}
};
template <typename T> struct bit_not : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type v) const
{
return ~v;
}
};
template <typename T> struct bit_lshift : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a << b;
}
};
template <typename T> struct bit_rshift : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return a >> b;
}
};
// Generalized Identity Operations
template <typename T> struct identity : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
{
return x;
}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
__device__ __forceinline__ T1
operator ()(typename TypeTraits<T1>::parameter_type lhs,
typename TypeTraits<T2>::parameter_type) const
{
return lhs;
}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
__device__ __forceinline__ T2
operator ()(typename TypeTraits<T1>::parameter_type,
typename TypeTraits<T2>::parameter_type rhs) const
{
return rhs;
}
};
// Min/Max Operations
template <typename T> struct maximum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return max(a, b);
}
};
template <typename T> struct minimum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
typename TypeTraits<T>::parameter_type b) const
{
return min(a, b);
}
};
#define CV_CUDEV_MINMAX_INST(type, maxop, minop) \
template <> struct maximum<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator ()(type a, type b) const {return maxop(a, b);} \
}; \
template <> struct minimum<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator ()(type a, type b) const {return minop(a, b);} \
};
CV_CUDEV_MINMAX_INST(uchar, ::max, ::min)
CV_CUDEV_MINMAX_INST(schar, ::max, ::min)
CV_CUDEV_MINMAX_INST(ushort, ::max, ::min)
CV_CUDEV_MINMAX_INST(short, ::max, ::min)
CV_CUDEV_MINMAX_INST(int, ::max, ::min)
CV_CUDEV_MINMAX_INST(uint, ::max, ::min)
CV_CUDEV_MINMAX_INST(float, ::fmaxf, ::fminf)
CV_CUDEV_MINMAX_INST(double, ::fmax, ::fmin)
#undef CV_CUDEV_MINMAX_INST
// abs_func
template <typename T> struct abs_func : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
{
return abs(x);
}
};
template <> struct abs_func<uchar> : unary_function<uchar, uchar>
{
__device__ __forceinline__ uchar operator ()(uchar x) const
{
return x;
}
};
template <> struct abs_func<schar> : unary_function<schar, schar>
{
__device__ __forceinline__ schar operator ()(schar x) const
{
return ::abs((int) x);
}
};
template <> struct abs_func<ushort> : unary_function<ushort, ushort>
{
__device__ __forceinline__ ushort operator ()(ushort x) const
{
return x;
}
};
template <> struct abs_func<short> : unary_function<short, short>
{
__device__ __forceinline__ short operator ()(short x) const
{
return ::abs((int) x);
}
};
template <> struct abs_func<uint> : unary_function<uint, uint>
{
__device__ __forceinline__ uint operator ()(uint x) const
{
return x;
}
};
template <> struct abs_func<int> : unary_function<int, int>
{
__device__ __forceinline__ int operator ()(int x) const
{
return ::abs(x);
}
};
template <> struct abs_func<float> : unary_function<float, float>
{
__device__ __forceinline__ float operator ()(float x) const
{
return ::fabsf(x);
}
};
template <> struct abs_func<double> : unary_function<double, double>
{
__device__ __forceinline__ double operator ()(double x) const
{
return ::fabs(x);
}
};
// absdiff_func
template <typename T> struct absdiff_func : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
{
abs_func<T> f;
return f(a - b);
}
};
// Math functions
template <typename T> struct sqr_func : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
{
return x * x;
}
};
namespace functional_detail
{
template <typename T> struct FloatType
{
typedef typename MakeVec<
typename LargerType<float, typename VecTraits<T>::elem_type>::type,
VecTraits<T>::cn
>::type type;
};
}
#define CV_CUDEV_UNARY_FUNCTION_INST(name, func) \
template <typename T> struct name ## _func : unary_function<T, typename functional_detail::FloatType<T>::type> \
{ \
__device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a) const \
{ \
return name(a); \
} \
}; \
template <> struct name ## _func<uchar> : unary_function<uchar, float> \
{ \
__device__ __forceinline__ float operator ()(uchar a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<schar> : unary_function<schar, float> \
{ \
__device__ __forceinline__ float operator ()(schar a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<ushort> : unary_function<ushort, float> \
{ \
__device__ __forceinline__ float operator ()(ushort a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<short> : unary_function<short, float> \
{ \
__device__ __forceinline__ float operator ()(short a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<uint> : unary_function<uint, float> \
{ \
__device__ __forceinline__ float operator ()(uint a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<int> : unary_function<int, float> \
{ \
__device__ __forceinline__ float operator ()(int a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<float> : unary_function<float, float> \
{ \
__device__ __forceinline__ float operator ()(float a) const \
{ \
return func ## f(a); \
} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
__device__ __forceinline__ double operator ()(double a) const \
{ \
return func(a); \
} \
};
CV_CUDEV_UNARY_FUNCTION_INST(sqrt, ::sqrt)
CV_CUDEV_UNARY_FUNCTION_INST(exp, ::exp)
CV_CUDEV_UNARY_FUNCTION_INST(exp2, ::exp2)
CV_CUDEV_UNARY_FUNCTION_INST(exp10, ::exp10)
CV_CUDEV_UNARY_FUNCTION_INST(log, ::log)
CV_CUDEV_UNARY_FUNCTION_INST(log2, ::log2)
CV_CUDEV_UNARY_FUNCTION_INST(log10, ::log10)
CV_CUDEV_UNARY_FUNCTION_INST(sin, ::sin)
CV_CUDEV_UNARY_FUNCTION_INST(cos, ::cos)
CV_CUDEV_UNARY_FUNCTION_INST(tan, ::tan)
CV_CUDEV_UNARY_FUNCTION_INST(asin, ::asin)
CV_CUDEV_UNARY_FUNCTION_INST(acos, ::acos)
CV_CUDEV_UNARY_FUNCTION_INST(atan, ::atan)
CV_CUDEV_UNARY_FUNCTION_INST(sinh, ::sinh)
CV_CUDEV_UNARY_FUNCTION_INST(cosh, ::cosh)
CV_CUDEV_UNARY_FUNCTION_INST(tanh, ::tanh)
CV_CUDEV_UNARY_FUNCTION_INST(asinh, ::asinh)
CV_CUDEV_UNARY_FUNCTION_INST(acosh, ::acosh)
CV_CUDEV_UNARY_FUNCTION_INST(atanh, ::atanh)
#undef CV_CUDEV_UNARY_FUNCTION_INST
#define CV_CUDEV_BINARY_FUNCTION_INST(name, func) \
template <typename T> struct name ## _func : binary_function<T, T, typename functional_detail::FloatType<T>::type> \
{ \
__device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const \
{ \
return name(a, b); \
} \
}; \
template <> struct name ## _func<uchar> : binary_function<uchar, uchar, float> \
{ \
__device__ __forceinline__ float operator ()(uchar a, uchar b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<schar> : binary_function<schar, schar, float> \
{ \
__device__ __forceinline__ float operator ()(schar a, schar b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<ushort> : binary_function<ushort, ushort, float> \
{ \
__device__ __forceinline__ float operator ()(ushort a, ushort b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<short> : binary_function<short, short, float> \
{ \
__device__ __forceinline__ float operator ()(short a, short b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<uint> : binary_function<uint, uint, float> \
{ \
__device__ __forceinline__ float operator ()(uint a, uint b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<int> : binary_function<int, int, float> \
{ \
__device__ __forceinline__ float operator ()(int a, int b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<float> : binary_function<float, float, float> \
{ \
__device__ __forceinline__ float operator ()(float a, float b) const \
{ \
return func ## f(a, b); \
} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
__device__ __forceinline__ double operator ()(double a, double b) const \
{ \
return func(a, b); \
} \
};
CV_CUDEV_BINARY_FUNCTION_INST(hypot, ::hypot)
CV_CUDEV_BINARY_FUNCTION_INST(atan2, ::atan2)
#undef CV_CUDEV_BINARY_FUNCTION_INST
template <typename T> struct magnitude_func : binary_function<T, T, typename functional_detail::FloatType<T>::type>
{
__device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
{
sqrt_func<typename functional_detail::FloatType<T>::type> f;
return f(a * a + b * b);
}
};
template <typename T> struct pow_func : binary_function<T, float, float>
{
__device__ __forceinline__ float operator ()(T val, float power) const
{
return ::powf(val, power);
}
};
template <> struct pow_func<double> : binary_function<double, double, double>
{
__device__ __forceinline__ double operator ()(double val, double power) const
{
return ::pow(val, power);
}
};
// Saturate Cast Functor
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
{
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::parameter_type v) const
{
return saturate_cast<D>(v);
}
};
// Threshold Functors
template <typename T> struct ThreshBinaryFunc : unary_function<T, T>
{
T thresh;
T maxVal;
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
{
return saturate_cast<T>(src > thresh) * maxVal;
}
};
template <typename T>
__host__ __device__ ThreshBinaryFunc<T> thresh_binary_func(T thresh, T maxVal)
{
ThreshBinaryFunc<T> f;
f.thresh = thresh;
f.maxVal = maxVal;
return f;
}
template <typename T> struct ThreshBinaryInvFunc : unary_function<T, T>
{
T thresh;
T maxVal;
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
{
return saturate_cast<T>(src <= thresh) * maxVal;
}
};
template <typename T>
__host__ __device__ ThreshBinaryInvFunc<T> thresh_binary_inv_func(T thresh, T maxVal)
{
ThreshBinaryInvFunc<T> f;
f.thresh = thresh;
f.maxVal = maxVal;
return f;
}
template <typename T> struct ThreshTruncFunc : unary_function<T, T>
{
T thresh;
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
{
minimum<T> minOp;
return minOp(src, thresh);
}
};
template <typename T>
__host__ __device__ ThreshTruncFunc<T> thresh_trunc_func(T thresh)
{
ThreshTruncFunc<T> f;
f.thresh = thresh;
return f;
}
template <typename T> struct ThreshToZeroFunc : unary_function<T, T>
{
T thresh;
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
{
return saturate_cast<T>(src > thresh) * src;
}
};
template <typename T>
__host__ __device__ ThreshToZeroFunc<T> thresh_to_zero_func(T thresh)
{
ThreshToZeroFunc<T> f;
f.thresh = thresh;
return f;
}
template <typename T> struct ThreshToZeroInvFunc : unary_function<T, T>
{
T thresh;
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
{
return saturate_cast<T>(src <= thresh) * src;
}
};
template <typename T>
__host__ __device__ ThreshToZeroInvFunc<T> thresh_to_zero_inv_func(T thresh)
{
ThreshToZeroInvFunc<T> f;
f.thresh = thresh;
return f;
}
// Function Object Adaptors
template <class Predicate> struct UnaryNegate : unary_function<typename Predicate::argument_type, typename Predicate::result_type>
{
Predicate pred;
__device__ __forceinline__ typename Predicate::result_type operator ()(
typename TypeTraits<typename Predicate::argument_type>::parameter_type x) const
{
return !pred(x);
}
};
template <class Predicate>
__host__ __device__ UnaryNegate<Predicate> not1(const Predicate& pred)
{
UnaryNegate<Predicate> n;
n.pred = pred;
return n;
}
template <class Predicate> struct BinaryNegate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, typename Predicate::result_type>
{
Predicate pred;
__device__ __forceinline__ typename Predicate::result_type operator ()(
typename TypeTraits<typename Predicate::first_argument_type>::parameter_type x,
typename TypeTraits<typename Predicate::second_argument_type>::parameter_type y) const
{
return !pred(x, y);
}
};
template <class Predicate>
__host__ __device__ BinaryNegate<Predicate> not2(const Predicate& pred)
{
BinaryNegate<Predicate> n;
n.pred = pred;
return n;
}
template <class Op> struct Binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
{
Op op;
typename Op::first_argument_type arg1;
__device__ __forceinline__ typename Op::result_type operator ()(
typename TypeTraits<typename Op::second_argument_type>::parameter_type a) const
{
return op(arg1, a);
}
};
template <class Op>
__host__ __device__ Binder1st<Op> bind1st(const Op& op, const typename Op::first_argument_type& arg1)
{
Binder1st<Op> b;
b.op = op;
b.arg1 = arg1;
return b;
}
template <class Op> struct Binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
{
Op op;
typename Op::second_argument_type arg2;
__device__ __forceinline__ typename Op::result_type operator ()(
typename TypeTraits<typename Op::first_argument_type>::parameter_type a) const
{
return op(a, arg2);
}
};
template <class Op>
__host__ __device__ Binder2nd<Op> bind2nd(const Op& op, const typename Op::second_argument_type& arg2)
{
Binder2nd<Op> b;
b.op = op;
b.arg2 = arg2;
return b;
}
// Functor Traits
template <typename F> struct IsUnaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T, typename D> static Yes check(unary_function<T, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
template <typename F> struct IsBinaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
}}
#endif

View File

@ -0,0 +1,98 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_FUNCTIONAL_TUPLE_ADAPTER_HPP__
#define __OPENCV_CUDEV_FUNCTIONAL_TUPLE_ADAPTER_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
namespace cv { namespace cudev {
template <class Op, int n> struct UnaryTupleAdapter
{
typedef typename Op::result_type result_type;
Op op;
template <class Tuple>
__device__ __forceinline__ typename Op::result_type operator ()(const Tuple& t) const
{
return op(get<n>(t));
}
};
template <int n, class Op>
__host__ __device__ UnaryTupleAdapter<Op, n> unaryTupleAdapter(const Op& op)
{
UnaryTupleAdapter<Op, n> a;
a.op = op;
return a;
}
template <class Op, int n0, int n1> struct BinaryTupleAdapter
{
typedef typename Op::result_type result_type;
Op op;
template <class Tuple>
__device__ __forceinline__ typename Op::result_type operator ()(const Tuple& t) const
{
return op(get<n0>(t), get<n1>(t));
}
};
template <int n0, int n1, class Op>
__host__ __device__ BinaryTupleAdapter<Op, n0, n1> binaryTupleAdapter(const Op& op)
{
BinaryTupleAdapter<Op, n0, n1> a;
a.op = op;
return a;
}
}}
#endif

View File

@ -0,0 +1,263 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_COPY_HPP__
#define __OPENCV_CUDEV_GRID_COPY_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "../ptr2d/zip.hpp"
#include "detail/copy.hpp"
namespace cv { namespace cudev {
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridCopy_(const SrcPtr& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(rows, cols);
grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridCopy_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows, cols);
grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// Default Policy
struct DefaultCopyPolicy
{
enum {
block_size_x = 32,
block_size_y = 8
};
};
template <class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridCopy(const SrcPtr& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridCopy(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, stream);
}
template <class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename D0, typename D1>
__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, stream);
}
template <class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename D0, typename D1, typename D2>
__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, stream);
}
template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, Stream& stream = Stream::Null())
{
gridCopy_<DefaultCopyPolicy>(src, dst, stream);
}
}}
#endif

View File

@ -0,0 +1,132 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_COPY_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_COPY_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace grid_copy_detail
{
template <class SrcPtr, typename DstType, class MaskPtr>
__global__ void copy(const SrcPtr src, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = saturate_cast<DstType>(src(y, x));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void copy(const SrcPtr& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
copy<<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
template <int count> struct Unroll
{
template <class SrcPtrTuple, class DstPtrTuple>
__device__ static void copy(const SrcPtrTuple& src, DstPtrTuple& dst, const int y, const int x)
{
typedef typename tuple_element<count - 1, DstPtrTuple>::type dst_ptr_type;
typedef typename PtrTraits<dst_ptr_type>::value_type dst_type;
get<count - 1>(dst)(y, x) = saturate_cast<dst_type>(get<count - 1>(src)(y, x));
Unroll<count - 1>::copy(src, dst, y, x);
}
};
template <> struct Unroll<0>
{
template <class SrcPtrTuple, class DstPtrTuple>
__device__ __forceinline__ static void copy(const SrcPtrTuple&, DstPtrTuple&, const int, const int)
{
}
};
template <class SrcPtrTuple, class DstPtrTuple, class MaskPtr>
__global__ void copy_tuple(const SrcPtrTuple src, DstPtrTuple dst, const MaskPtr mask, const int rows, const int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
Unroll<tuple_size<SrcPtrTuple>::value>::copy(src, dst, y, x);
}
template <class Policy, class SrcPtrTuple, class DstPtrTuple, class MaskPtr>
__host__ void copy_tuple(const SrcPtrTuple& src, const DstPtrTuple& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
copy_tuple<<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,475 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../util/atomic.hpp"
#include "../../util/vec_traits.hpp"
#include "../../util/type_traits.hpp"
#include "../../util/limits.hpp"
#include "../../block/reduce.hpp"
#include "../../functional/functional.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace grid_glob_reduce_detail
{
// Unroll
template <int cn> struct Unroll;
template <> struct Unroll<1>
{
template <int BLOCK_SIZE, typename R>
__device__ __forceinline__ static volatile R* smem(R* ptr)
{
return ptr;
}
template <typename R>
__device__ __forceinline__ static R& res(R& val)
{
return val;
}
template <class Op>
__device__ __forceinline__ static const Op& op(const Op& aop)
{
return aop;
}
};
template <> struct Unroll<2>
{
template <int BLOCK_SIZE, typename R>
__device__ __forceinline__ static tuple<volatile R*, volatile R*> smem(R* ptr)
{
return smem_tuple(ptr, ptr + BLOCK_SIZE);
}
template <typename R>
__device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&> res(R& val)
{
return tie(val.x, val.y);
}
template <class Op>
__device__ __forceinline__ static tuple<Op, Op> op(const Op& aop)
{
return make_tuple(aop, aop);
}
};
template <> struct Unroll<3>
{
template <int BLOCK_SIZE, typename R>
__device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*> smem(R* ptr)
{
return smem_tuple(ptr, ptr + BLOCK_SIZE, ptr + 2 * BLOCK_SIZE);
}
template <typename R>
__device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&,
typename VecTraits<R>::elem_type&,
typename VecTraits<R>::elem_type&> res(R& val)
{
return tie(val.x, val.y, val.z);
}
template <class Op>
__device__ __forceinline__ static tuple<Op, Op, Op> op(const Op& aop)
{
return make_tuple(aop, aop, aop);
}
};
template <> struct Unroll<4>
{
template <int BLOCK_SIZE, typename R>
__device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem(R* ptr)
{
return smem_tuple(ptr, ptr + BLOCK_SIZE, ptr + 2 * BLOCK_SIZE, ptr + 3 * BLOCK_SIZE);
}
template <typename R>
__device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&,
typename VecTraits<R>::elem_type&,
typename VecTraits<R>::elem_type&,
typename VecTraits<R>::elem_type&> res(R& val)
{
return tie(val.x, val.y, val.z, val.w);
}
template <class Op>
__device__ __forceinline__ static tuple<Op, Op, Op, Op> op(const Op& aop)
{
return make_tuple(aop, aop, aop, aop);
}
};
// AtomicUnroll
template <typename R, int cn> struct AtomicUnroll;
template <typename R> struct AtomicUnroll<R, 1>
{
__device__ __forceinline__ static void add(R* ptr, R val)
{
atomicAdd(ptr, val);
}
__device__ __forceinline__ static void min(R* ptr, R val)
{
atomicMin(ptr, val);
}
__device__ __forceinline__ static void max(R* ptr, R val)
{
atomicMax(ptr, val);
}
};
template <typename R> struct AtomicUnroll<R, 2>
{
typedef typename MakeVec<R, 2>::type val_type;
__device__ __forceinline__ static void add(R* ptr, val_type val)
{
atomicAdd(ptr, val.x);
atomicAdd(ptr + 1, val.y);
}
__device__ __forceinline__ static void min(R* ptr, val_type val)
{
atomicMin(ptr, val.x);
atomicMin(ptr + 1, val.y);
}
__device__ __forceinline__ static void max(R* ptr, val_type val)
{
atomicMax(ptr, val.x);
atomicMax(ptr + 1, val.y);
}
};
template <typename R> struct AtomicUnroll<R, 3>
{
typedef typename MakeVec<R, 3>::type val_type;
__device__ __forceinline__ static void add(R* ptr, val_type val)
{
atomicAdd(ptr, val.x);
atomicAdd(ptr + 1, val.y);
atomicAdd(ptr + 2, val.z);
}
__device__ __forceinline__ static void min(R* ptr, val_type val)
{
atomicMin(ptr, val.x);
atomicMin(ptr + 1, val.y);
atomicMin(ptr + 2, val.z);
}
__device__ __forceinline__ static void max(R* ptr, val_type val)
{
atomicMax(ptr, val.x);
atomicMax(ptr + 1, val.y);
atomicMax(ptr + 2, val.z);
}
};
template <typename R> struct AtomicUnroll<R, 4>
{
typedef typename MakeVec<R, 4>::type val_type;
__device__ __forceinline__ static void add(R* ptr, val_type val)
{
atomicAdd(ptr, val.x);
atomicAdd(ptr + 1, val.y);
atomicAdd(ptr + 2, val.z);
atomicAdd(ptr + 3, val.w);
}
__device__ __forceinline__ static void min(R* ptr, val_type val)
{
atomicMin(ptr, val.x);
atomicMin(ptr + 1, val.y);
atomicMin(ptr + 2, val.z);
atomicMin(ptr + 3, val.w);
}
__device__ __forceinline__ static void max(R* ptr, val_type val)
{
atomicMax(ptr, val.x);
atomicMax(ptr + 1, val.y);
atomicMax(ptr + 2, val.z);
atomicMax(ptr + 3, val.w);
}
};
// SumReductor
template <typename src_type, typename work_type> struct SumReductor
{
typedef typename VecTraits<work_type>::elem_type work_elem_type;
enum { cn = VecTraits<src_type>::cn };
work_type sum;
__device__ __forceinline__ SumReductor()
{
sum = VecTraits<work_type>::all(0);
}
__device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
{
sum = sum + saturate_cast<work_type>(srcVal);
}
template <int BLOCK_SIZE>
__device__ void reduceGrid(work_elem_type* result, int tid)
{
__shared__ work_elem_type smem[BLOCK_SIZE * cn];
blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>()));
if (tid == 0)
AtomicUnroll<work_elem_type, cn>::add(result, sum);
}
};
// MinMaxReductor
template <typename T> struct minop : minimum<T>
{
__device__ __forceinline__ static T initial()
{
return numeric_limits<T>::max();
}
__device__ __forceinline__ static void atomic(T* result, T myval)
{
atomicMin(result, myval);
}
};
template <typename T> struct maxop : maximum<T>
{
__device__ __forceinline__ static T initial()
{
return -numeric_limits<T>::max();
}
__device__ __forceinline__ static void atomic(T* result, T myval)
{
atomicMax(result, myval);
}
};
struct both
{
};
template <class Op, typename src_type, typename work_type> struct MinMaxReductor
{
work_type myval;
__device__ __forceinline__ MinMaxReductor()
{
myval = Op::initial();
}
__device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
{
Op op;
myval = op(myval, srcVal);
}
template <int BLOCK_SIZE>
__device__ void reduceGrid(work_type* result, int tid)
{
__shared__ work_type smem[BLOCK_SIZE];
Op op;
blockReduce<BLOCK_SIZE>(smem, myval, tid, op);
if (tid == 0)
Op::atomic(result, myval);
}
};
template <typename src_type, typename work_type> struct MinMaxReductor<both, src_type, work_type>
{
work_type mymin;
work_type mymax;
__device__ __forceinline__ MinMaxReductor()
{
mymin = numeric_limits<work_type>::max();
mymax = -numeric_limits<work_type>::max();
}
__device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
{
minimum<work_type> minOp;
maximum<work_type> maxOp;
mymin = minOp(mymin, srcVal);
mymax = maxOp(mymax, srcVal);
}
template <int BLOCK_SIZE>
__device__ void reduceGrid(work_type* result, int tid)
{
__shared__ work_type sminval[BLOCK_SIZE];
__shared__ work_type smaxval[BLOCK_SIZE];
minimum<work_type> minOp;
maximum<work_type> maxOp;
blockReduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), tie(mymin, mymax), tid, make_tuple(minOp, maxOp));
if (tid == 0)
{
atomicMin(result, mymin);
atomicMax(result + 1, mymax);
}
}
};
// glob_reduce
template <class Reductor, int BLOCK_SIZE, int PATCH_X, int PATCH_Y, class SrcPtr, typename ResType, class MaskPtr>
__global__ void glob_reduce(const SrcPtr src, ResType* result, const MaskPtr mask, const int rows, const int cols)
{
const int x0 = blockIdx.x * blockDim.x * PATCH_X + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * PATCH_Y + threadIdx.y;
Reductor reductor;
for (int i = 0, y = y0; i < PATCH_Y && y < rows; ++i, y += blockDim.y)
{
for (int j = 0, x = x0; j < PATCH_X && x < cols; ++j, x += blockDim.x)
{
if (mask(y, x))
{
reductor.reduceVal(src(y, x));
}
}
}
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
reductor.template reduceGrid<BLOCK_SIZE>(result, tid);
}
template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void glob_reduce(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
glob_reduce<Reductor, BLOCK_SIZE, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
// callers
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void sum(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<SumReductor<src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void minVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<minop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void maxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<maxop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void minMaxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<both, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
}
}
}}
#endif

View File

@ -0,0 +1,109 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_HISTOGRAM_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_HISTOGRAM_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/atomic.hpp"
namespace cv { namespace cudev {
namespace grid_histogram_detail
{
template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
__global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
{
__shared__ ResType smem[BIN_COUNT];
const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
for (int i = tid; i < BIN_COUNT; i += BLOCK_SIZE)
smem[i] = 0;
__syncthreads();
if (y < rows)
{
for (int x = threadIdx.x; x < cols; x += blockDim.x)
{
if (mask(y, x))
{
const uint data = src(y, x);
atomicAdd(&smem[data % BIN_COUNT], 1);
}
}
}
__syncthreads();
for (int i = tid; i < BIN_COUNT; i += BLOCK_SIZE)
{
const ResType histVal = smem[i];
if (histVal > 0)
atomicAdd(hist + i, histVal);
}
}
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void histogram(const SrcPtr& src, ResType* hist, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(rows, block.y));
const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
histogram<BIN_COUNT, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, hist, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,626 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_INTEGRAL_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_INTEGRAL_DETAIL_HPP__
#include "../../common.hpp"
#include "../../warp/shuffle.hpp"
#include "../../block/scan.hpp"
#include "../../ptr2d/glob.hpp"
namespace cv { namespace cudev {
namespace integral_detail
{
// horizontal_pass
template <int NUM_SCAN_THREADS, class SrcPtr, typename D>
__global__ void horizontal_pass(const SrcPtr src, GlobPtr<D> dst, const int cols)
{
__shared__ D smem[NUM_SCAN_THREADS * 2];
__shared__ D carryElem;
carryElem = 0;
__syncthreads();
D* dst_row = dst.row(blockIdx.x);
int numBuckets = divUp(cols, NUM_SCAN_THREADS);
int offsetX = 0;
while (numBuckets--)
{
const int curElemOffs = offsetX + threadIdx.x;
D curElem = 0.0f;
if (curElemOffs < cols)
curElem = src(blockIdx.x, curElemOffs);
const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
if (curElemOffs < cols)
dst_row[curElemOffs] = carryElem + curScanElem;
offsetX += NUM_SCAN_THREADS;
__syncthreads();
if (threadIdx.x == NUM_SCAN_THREADS - 1)
{
carryElem += curScanElem;
}
__syncthreads();
}
}
template <int NUM_SCAN_THREADS, typename T, typename D>
__global__ void horizontal_pass(const GlobPtr<T> src, GlobPtr<D> dst, const int cols)
{
__shared__ D smem[NUM_SCAN_THREADS * 2];
__shared__ D carryElem;
carryElem = 0;
__syncthreads();
const T* src_row = src.row(blockIdx.x);
D* dst_row = dst.row(blockIdx.x);
int numBuckets = divUp(cols, NUM_SCAN_THREADS);
int offsetX = 0;
while (numBuckets--)
{
const int curElemOffs = offsetX + threadIdx.x;
D curElem = 0.0f;
if (curElemOffs < cols)
curElem = src_row[curElemOffs];
const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
if (curElemOffs < cols)
dst_row[curElemOffs] = carryElem + curScanElem;
offsetX += NUM_SCAN_THREADS;
__syncthreads();
if (threadIdx.x == NUM_SCAN_THREADS - 1)
{
carryElem += curScanElem;
}
__syncthreads();
}
}
template <class SrcPtr, typename D>
__host__ void horizontal_pass(const SrcPtr& src, const GlobPtr<D>& dst, int rows, int cols, cudaStream_t stream)
{
const int NUM_SCAN_THREADS = 256;
const dim3 block(NUM_SCAN_THREADS);
const dim3 grid(rows);
horizontal_pass<NUM_SCAN_THREADS><<<grid, block, 0, stream>>>(src, dst, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
}
// horisontal_pass_8u_shfl
__device__ static uchar4 int_to_uchar4(unsigned int in)
{
uchar4 bytes;
bytes.x = (in & 0x000000ff) >> 0;
bytes.y = (in & 0x0000ff00) >> 8;
bytes.z = (in & 0x00ff0000) >> 16;
bytes.w = (in & 0xff000000) >> 24;
return bytes;
}
__global__ static void horisontal_pass_8u_shfl_kernel(const GlobPtr<uint4> img, GlobPtr<uint4> integral)
{
#if CV_CUDEV_ARCH >= 300
__shared__ int sums[128];
const int id = threadIdx.x;
const int lane_id = id % warpSize;
const int warp_id = id / warpSize;
const uint4 data = img(blockIdx.x, id);
const uchar4 a = int_to_uchar4(data.x);
const uchar4 b = int_to_uchar4(data.y);
const uchar4 c = int_to_uchar4(data.z);
const uchar4 d = int_to_uchar4(data.w);
int result[16];
result[0] = a.x;
result[1] = result[0] + a.y;
result[2] = result[1] + a.z;
result[3] = result[2] + a.w;
result[4] = result[3] + b.x;
result[5] = result[4] + b.y;
result[6] = result[5] + b.z;
result[7] = result[6] + b.w;
result[8] = result[7] + c.x;
result[9] = result[8] + c.y;
result[10] = result[9] + c.z;
result[11] = result[10] + c.w;
result[12] = result[11] + d.x;
result[13] = result[12] + d.y;
result[14] = result[13] + d.z;
result[15] = result[14] + d.w;
int sum = result[15];
// the prefix sum for each thread's 16 value is computed,
// now the final sums (result[15]) need to be shared
// with the other threads and add. To do this,
// the shfl_up() instruction is used and a shuffle scan
// operation is performed to distribute the sums to the correct
// threads
#pragma unroll
for (int i = 1; i < 32; i *= 2)
{
const int n = shfl_up(sum, i, 32);
if (lane_id >= i)
{
#pragma unroll
for (int k = 0; k < 16; ++k)
result[k] += n;
sum += n;
}
}
// Now the final sum for the warp must be shared
// between warps. This is done by each warp
// having a thread store to shared memory, then
// having some other warp load the values and
// compute a prefix sum, again by using shfl_up.
// The results are uniformly added back to the warps.
// last thread in the warp holding sum of the warp
// places that in shared
if (threadIdx.x % warpSize == warpSize - 1)
sums[warp_id] = result[15];
__syncthreads();
if (warp_id == 0)
{
int warp_sum = sums[lane_id];
#pragma unroll
for (int i = 1; i <= 32; i *= 2)
{
const int n = shfl_up(warp_sum, i, 32);
if (lane_id >= i)
warp_sum += n;
}
sums[lane_id] = warp_sum;
}
__syncthreads();
int blockSum = 0;
// fold in unused warp
if (warp_id > 0)
{
blockSum = sums[warp_id - 1];
#pragma unroll
for (int k = 0; k < 16; ++k)
result[k] += blockSum;
}
// assemble result
// Each thread has 16 values to write, which are
// now integer data (to avoid overflow). Instead of
// each thread writing consecutive uint4s, the
// approach shown here experiments using
// the shuffle command to reformat the data
// inside the registers so that each thread holds
// consecutive data to be written so larger contiguous
// segments can be assembled for writing.
/*
For example data that needs to be written as
GMEM[16] <- x0 x1 x2 x3 y0 y1 y2 y3 z0 z1 z2 z3 w0 w1 w2 w3
but is stored in registers (r0..r3), in four threads (0..3) as:
threadId 0 1 2 3
r0 x0 y0 z0 w0
r1 x1 y1 z1 w1
r2 x2 y2 z2 w2
r3 x3 y3 z3 w3
after apply shfl_xor operations to move data between registers r1..r3:
threadId 00 01 10 11
x0 y0 z0 w0
xor(01)->y1 x1 w1 z1
xor(10)->z2 w2 x2 y2
xor(11)->w3 z3 y3 x3
and now x0..x3, and z0..z3 can be written out in order by all threads.
In the current code, each register above is actually representing
four integers to be written as uint4's to GMEM.
*/
result[4] = shfl_xor(result[4] , 1, 32);
result[5] = shfl_xor(result[5] , 1, 32);
result[6] = shfl_xor(result[6] , 1, 32);
result[7] = shfl_xor(result[7] , 1, 32);
result[8] = shfl_xor(result[8] , 2, 32);
result[9] = shfl_xor(result[9] , 2, 32);
result[10] = shfl_xor(result[10], 2, 32);
result[11] = shfl_xor(result[11], 2, 32);
result[12] = shfl_xor(result[12], 3, 32);
result[13] = shfl_xor(result[13], 3, 32);
result[14] = shfl_xor(result[14], 3, 32);
result[15] = shfl_xor(result[15], 3, 32);
uint4* integral_row = integral.row(blockIdx.x);
uint4 output;
///////
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 8] = output;
// continuning from the above example,
// this use of shfl_xor() places the y0..y3 and w0..w3 data
// in order.
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] = shfl_xor(result[i], 1, 32);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16 + 4] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 12] = output;
#endif
}
__host__ static void horisontal_pass_8u_shfl(const GlobPtr<uchar> src, GlobPtr<uint> integral, int rows, int cols, cudaStream_t stream)
{
// each thread handles 16 values, use 1 block/row
// save, because step is actually can't be less 512 bytes
const int block = cols / 16;
// launch 1 block / row
const int grid = rows;
CV_CUDEV_SAFE_CALL( cudaFuncSetCacheConfig(horisontal_pass_8u_shfl_kernel, cudaFuncCachePreferL1) );
GlobPtr<uint4> src4 = globPtr((uint4*) src.data, src.step);
GlobPtr<uint4> integral4 = globPtr((uint4*) integral.data, integral.step);
horisontal_pass_8u_shfl_kernel<<<grid, block, 0, stream>>>(src4, integral4);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
}
// vertical
template <typename T>
__global__ void vertical_pass(GlobPtr<T> integral, const int rows, const int cols)
{
#if CV_CUDEV_ARCH >= 300
__shared__ T sums[32][9];
const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
const int lane_id = tidx % 8;
sums[threadIdx.x][threadIdx.y] = 0;
__syncthreads();
T stepSum = 0;
int numBuckets = divUp(rows, blockDim.y);
int y = threadIdx.y;
while (numBuckets--)
{
T* p = integral.row(y) + tidx;
T sum = (tidx < cols) && (y < rows) ? *p : 0;
y += blockDim.y;
sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads();
// place into SMEM
// shfl scan reduce the SMEM, reformating so the column
// sums are computed in a warp
// then read out properly
const int j = threadIdx.x % 8;
const int k = threadIdx.x / 8 + threadIdx.y * 4;
T partial_sum = sums[k][j];
for (int i = 1; i <= 8; i *= 2)
{
T n = shfl_up(partial_sum, i, 32);
if (lane_id >= i)
partial_sum += n;
}
sums[k][j] = partial_sum;
__syncthreads();
if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1];
if (tidx < cols)
{
sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
*p = sum;
}
__syncthreads();
}
#else
__shared__ T smem[32][32];
__shared__ T prevVals[32];
volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y;
if (threadIdx.y == 0)
prevVals[threadIdx.x] = 0;
__syncthreads();
const int x = blockIdx.x * blockDim.x + threadIdx.x;
int numBuckets = divUp(rows, 8 * 4);
int offsetY = 0;
while (numBuckets--)
{
const int curRowOffs = offsetY + threadIdx.y;
T curElems[4];
T temp[4];
// load patch
smem[threadIdx.y + 0][threadIdx.x] = 0.0f;
smem[threadIdx.y + 8][threadIdx.x] = 0.0f;
smem[threadIdx.y + 16][threadIdx.x] = 0.0f;
smem[threadIdx.y + 24][threadIdx.x] = 0.0f;
if (x < cols)
{
for (int i = 0; i < 4; ++i)
{
if (curRowOffs + i * 8 < rows)
smem[threadIdx.y + i * 8][threadIdx.x] = integral(curRowOffs + i * 8, x);
}
}
__syncthreads();
// reduce
curElems[0] = smem[threadIdx.x][threadIdx.y ];
curElems[1] = smem[threadIdx.x][threadIdx.y + 8];
curElems[2] = smem[threadIdx.x][threadIdx.y + 16];
curElems[3] = smem[threadIdx.x][threadIdx.y + 24];
__syncthreads();
temp[0] = curElems[0] = warpScanInclusive(curElems[0], smem_row, threadIdx.x);
temp[1] = curElems[1] = warpScanInclusive(curElems[1], smem_row, threadIdx.x);
temp[2] = curElems[2] = warpScanInclusive(curElems[2], smem_row, threadIdx.x);
temp[3] = curElems[3] = warpScanInclusive(curElems[3], smem_row, threadIdx.x);
curElems[0] += prevVals[threadIdx.y ];
curElems[1] += prevVals[threadIdx.y + 8];
curElems[2] += prevVals[threadIdx.y + 16];
curElems[3] += prevVals[threadIdx.y + 24];
__syncthreads();
if (threadIdx.x == 31)
{
prevVals[threadIdx.y ] += temp[0];
prevVals[threadIdx.y + 8] += temp[1];
prevVals[threadIdx.y + 16] += temp[2];
prevVals[threadIdx.y + 24] += temp[3];
}
smem[threadIdx.y ][threadIdx.x] = curElems[0];
smem[threadIdx.y + 8][threadIdx.x] = curElems[1];
smem[threadIdx.y + 16][threadIdx.x] = curElems[2];
smem[threadIdx.y + 24][threadIdx.x] = curElems[3];
__syncthreads();
// store patch
if (x < cols)
{
// read 4 value from source
for (int i = 0; i < 4; ++i)
{
if (curRowOffs + i * 8 < rows)
integral(curRowOffs + i * 8, x) = smem[threadIdx.x][threadIdx.y + i * 8];
}
}
__syncthreads();
offsetY += 8 * 4;
}
#endif
}
template <typename T>
__host__ void vertical_pass(const GlobPtr<T>& integral, int rows, int cols, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x));
vertical_pass<<<grid, block, 0, stream>>>(integral, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
}
// integral
template <class SrcPtr, typename D>
__host__ void integral(const SrcPtr& src, const GlobPtr<D>& dst, int rows, int cols, cudaStream_t stream)
{
horizontal_pass(src, dst, rows, cols, stream);
vertical_pass(dst, rows, cols, stream);
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
__host__ static void integral(const GlobPtr<uchar> src, GlobPtr<uint> dst, int rows, int cols, cudaStream_t stream)
{
if (deviceSupports(FEATURE_SET_COMPUTE_30)
&& (cols % 16 == 0)
&& reinterpret_cast<intptr_t>(src.data) % 32 == 0
&& reinterpret_cast<intptr_t>(dst.data) % 32 == 0)
{
horisontal_pass_8u_shfl(src, dst, rows, cols, stream);
}
else
{
horizontal_pass(src, dst, rows, cols, stream);
}
vertical_pass(dst, rows, cols, stream);
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
__host__ static void integral(const GlobPtr<uchar> src, GlobPtr<int> dst, int rows, int cols, cudaStream_t stream)
{
GlobPtr<uint> dstui = globPtr((uint*) dst.data, dst.step);
integral(src, dstui, rows, cols, stream);
}
}
}}
#endif

View File

@ -0,0 +1,201 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/vec_traits.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../util/type_traits.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace pyramids_detail
{
template <class Brd, class SrcPtr, typename DstType>
__global__ void pyrDown(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
__shared__ work_type smem[256 + 4];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y;
const int src_y = 2 * y;
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
{
{
work_type sum;
sum = 0.0625f * src(src_y - 2, x);
sum = sum + 0.25f * src(src_y - 1, x);
sum = sum + 0.375f * src(src_y , x);
sum = sum + 0.25f * src(src_y + 1, x);
sum = sum + 0.0625f * src(src_y + 2, x);
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_type sum;
sum = 0.0625f * src(src_y - 2, left_x);
sum = sum + 0.25f * src(src_y - 1, left_x);
sum = sum + 0.375f * src(src_y , left_x);
sum = sum + 0.25f * src(src_y + 1, left_x);
sum = sum + 0.0625f * src(src_y + 2, left_x);
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_type sum;
sum = 0.0625f * src(src_y - 2, right_x);
sum = sum + 0.25f * src(src_y - 1, right_x);
sum = sum + 0.375f * src(src_y , right_x);
sum = sum + 0.25f * src(src_y + 1, right_x);
sum = sum + 0.0625f * src(src_y + 2, right_x);
smem[4 + threadIdx.x] = sum;
}
}
else
{
{
work_type sum;
sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(x, src_cols));
sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(x, src_cols));
sum = sum + 0.375f * src(src_y , Brd::idx_high(x, src_cols));
sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(x, src_cols));
sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(x, src_cols));
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_type sum;
sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
sum = sum + 0.375f * src(src_y , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_type sum;
sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(right_x, src_cols));
sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(right_x, src_cols));
sum = sum + 0.375f * src(src_y , Brd::idx_high(right_x, src_cols));
sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(right_x, src_cols));
sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(right_x, src_cols));
smem[4 + threadIdx.x] = sum;
}
}
__syncthreads();
if (threadIdx.x < 128)
{
const int tid2 = threadIdx.x * 2;
work_type sum;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
if (dst_x < dst_cols)
dst(y, dst_x) = saturate_cast<DstType>(sum);
}
}
template <class Brd, class SrcPtr, typename DstType>
__host__ void pyrDown(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src_cols, block.x), dst_rows);
pyrDown<Brd><<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,172 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/vec_traits.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../util/type_traits.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace pyramids_detail
{
template <class SrcPtr, typename DstType>
__global__ void pyrUp(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_rows, const int dst_cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
__shared__ work_type s_srcPatch[10][10];
__shared__ work_type s_dstPatch[20][16];
if (threadIdx.x < 10 && threadIdx.y < 10)
{
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
srcx = ::abs(srcx);
srcx = ::min(src_cols - 1, srcx);
srcy = ::abs(srcy);
srcy = ::min(src_rows - 1, srcy);
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<work_type>(src(srcy, srcx));
}
__syncthreads();
work_type sum = VecTraits<work_type>::all(0);
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
const bool eveny = ((threadIdx.y & 1) == 0);
const int tidx = threadIdx.x;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
if (threadIdx.y < 2)
{
sum = VecTraits<work_type>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
}
if (threadIdx.y > 13)
{
sum = VecTraits<work_type>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
}
__syncthreads();
sum = VecTraits<work_type>::all(0);
const int tidy = threadIdx.y;
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
if (x < dst_cols && y < dst_rows)
dst(y, x) = saturate_cast<DstType>(4.0f * sum);
}
template <class SrcPtr, typename DstType>
__host__ void pyrUp(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst_cols, block.x), divUp(dst_rows, block.y));
pyrUp<<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_rows, dst_cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,106 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../block/reduce.hpp"
namespace cv { namespace cudev {
namespace grid_reduce_to_vec_detail
{
template <class Reductor, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
__global__ void reduceToColumn(const SrcPtr src, ResType* dst, const MaskPtr mask, const int cols)
{
typedef typename Reductor::work_type work_type;
__shared__ work_type smem[BLOCK_SIZE];
const int y = blockIdx.x;
work_type myVal = Reductor::initialValue();
Reductor op;
for (int x = threadIdx.x; x < cols; x += BLOCK_SIZE)
{
if (mask(y, x))
{
myVal = op(myVal, saturate_cast<work_type>(src(y, x)));
}
}
blockReduce<BLOCK_SIZE>(smem, myVal, threadIdx.x, op);
if (threadIdx.x == 0)
dst[y] = saturate_cast<ResType>(Reductor::result(myVal, cols));
}
template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void reduceToColumn(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const int BLOCK_SIZE_X = Policy::block_size_x;
const int BLOCK_SIZE_Y = Policy::block_size_y;
const int BLOCK_SIZE = BLOCK_SIZE_X * BLOCK_SIZE_Y;
const dim3 block(BLOCK_SIZE);
const dim3 grid(rows);
reduceToColumn<Reductor, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, dst, mask, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,118 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_ROW_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_REDUCE_TO_ROW_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../block/reduce.hpp"
namespace cv { namespace cudev {
namespace grid_reduce_to_vec_detail
{
template <class Reductor, int BLOCK_SIZE_X, int BLOCK_SIZE_Y, class SrcPtr, typename ResType, class MaskPtr>
__global__ void reduceToRow(const SrcPtr src, ResType* dst, const MaskPtr mask, const int rows, const int cols)
{
typedef typename Reductor::work_type work_type;
__shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y];
const int x = blockIdx.x * BLOCK_SIZE_X + threadIdx.x;
work_type myVal = Reductor::initialValue();
Reductor op;
if (x < cols)
{
for (int y = threadIdx.y; y < rows; y += BLOCK_SIZE_Y)
{
if (mask(y, x))
{
myVal = op(myVal, saturate_cast<work_type>(src(y, x)));
}
}
}
smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal;
__syncthreads();
volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X;
myVal = srow[threadIdx.x];
blockReduce<BLOCK_SIZE_X>(srow, myVal, threadIdx.x, op);
if (threadIdx.x == 0)
srow[0] = myVal;
__syncthreads();
if (threadIdx.y == 0 && x < cols)
dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));
}
template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
__host__ void reduceToRow(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const int BLOCK_SIZE_X = 16;
const int BLOCK_SIZE_Y = 16;
const dim3 block(BLOCK_SIZE_X, BLOCK_SIZE_Y);
const dim3 grid(divUp(cols, block.x));
reduceToRow<Reductor, BLOCK_SIZE_X, BLOCK_SIZE_Y><<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,282 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../util/tuple.hpp"
#include "../../util/vec_traits.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace grid_split_merge_detail
{
// merge
template <class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
__global__ void mergeC2(const Src1Ptr src1, const Src2Ptr src2, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
{
typedef typename VecTraits<DstType>::elem_type dst_elem_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = VecTraits<DstType>::make(
saturate_cast<dst_elem_type>(src1(y, x)),
saturate_cast<dst_elem_type>(src2(y, x))
);
}
template <class Policy, class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
__host__ void mergeC2(const Src1Ptr& src1, const Src2Ptr& src2, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
mergeC2<<<grid, block, 0, stream>>>(src1, src2, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
template <class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
__global__ void mergeC3(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
{
typedef typename VecTraits<DstType>::elem_type dst_elem_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = VecTraits<DstType>::make(
saturate_cast<dst_elem_type>(src1(y, x)),
saturate_cast<dst_elem_type>(src2(y, x)),
saturate_cast<dst_elem_type>(src3(y, x))
);
}
template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
__host__ void mergeC3(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
mergeC3<<<grid, block, 0, stream>>>(src1, src2, src3, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
template <class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
__global__ void mergeC4(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, const Src4Ptr src4, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
{
typedef typename VecTraits<DstType>::elem_type dst_elem_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = VecTraits<DstType>::make(
saturate_cast<dst_elem_type>(src1(y, x)),
saturate_cast<dst_elem_type>(src2(y, x)),
saturate_cast<dst_elem_type>(src3(y, x)),
saturate_cast<dst_elem_type>(src4(y, x))
);
}
template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
__host__ void mergeC4(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const Src4Ptr& src4, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
mergeC4<<<grid, block, 0, stream>>>(src1, src2, src3, src4, dst, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
template <int cn, class Policy> struct MergeImpl;
template <class Policy> struct MergeImpl<2, Policy>
{
template <class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
mergeC2<Policy>(get<0>(src), get<1>(src), dst, mask, rows, cols, stream);
}
};
template <class Policy> struct MergeImpl<3, Policy>
{
template <class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
mergeC3<Policy>(get<0>(src), get<1>(src), get<2>(src), dst, mask, rows, cols, stream);
}
};
template <class Policy> struct MergeImpl<4, Policy>
{
template <class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
mergeC4<Policy>(get<0>(src), get<1>(src), get<2>(src), get<3>(src), dst, mask, rows, cols, stream);
}
};
// split
template <class SrcPtr, typename DstType, class MaskPtr>
__global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, const MaskPtr mask, const int rows, const int cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
const src_type src_value = src(y, x);
dst1(y, x) = src_value.x;
dst2(y, x) = src_value.y;
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
split<<<grid, block, 0, stream>>>(src, dst1, dst2, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
template <class SrcPtr, typename DstType, class MaskPtr>
__global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, const MaskPtr mask, const int rows, const int cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
const src_type src_value = src(y, x);
dst1(y, x) = src_value.x;
dst2(y, x) = src_value.y;
dst3(y, x) = src_value.z;
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
template <class SrcPtr, typename DstType, class MaskPtr>
__global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, GlobPtr<DstType> dst4, const MaskPtr mask, const int rows, const int cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
const src_type src_value = src(y, x);
dst1(y, x) = src_value.x;
dst2(y, x) = src_value.y;
dst3(y, x) = src_value.z;
dst4(y, x) = src_value.w;
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const GlobPtr<DstType>& dst4, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, dst4, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
}
}
}}
#endif

View File

@ -0,0 +1,423 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../util/vec_traits.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace grid_transform_detail
{
// OpUnroller
template <int cn> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, class UnOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, class UnOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, class UnOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, class UnOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
__device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
// transformSimple
template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__global__ void transformSimple(const SrcPtr src, GlobPtr<DstType> dst, const UnOp op, const MaskPtr mask, const int rows, const int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = saturate_cast<DstType>(op(src(y, x)));
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__global__ void transformSimple(const SrcPtr1 src1, const SrcPtr2 src2, GlobPtr<DstType> dst, const BinOp op, const MaskPtr mask, const int rows, const int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
dst(y, x) = saturate_cast<DstType>(op(src1(y, x), src2(y, x)));
}
// transformSmart
template <int SHIFT, typename SrcType, typename DstType, class UnOp, class MaskPtr>
__global__ void transformSmart(const GlobPtr<SrcType> src_, GlobPtr<DstType> dst_, const UnOp op, const MaskPtr mask, const int rows, const int cols)
{
typedef typename MakeVec<SrcType, SHIFT>::type read_type;
typedef typename MakeVec<DstType, SHIFT>::type write_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x_shifted = x * SHIFT;
if (y < rows)
{
const SrcType* src = src_.row(y);
DstType* dst = dst_.row(y);
if (x_shifted + SHIFT - 1 < cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<SHIFT>::unroll(src_n_el, dst_n_el, op, mask, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <int SHIFT, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
__global__ void transformSmart(const GlobPtr<SrcType1> src1_, const GlobPtr<SrcType2> src2_, PtrStep<DstType> dst_, const BinOp op, const MaskPtr mask, const int rows, const int cols)
{
typedef typename MakeVec<SrcType1, SHIFT>::type read_type1;
typedef typename MakeVec<SrcType2, SHIFT>::type read_type2;
typedef typename MakeVec<DstType, SHIFT>::type write_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x_shifted = x * SHIFT;
if (y < rows)
{
const SrcType1* src1 = src1_.row(y);
const SrcType2* src2 = src2_.row(y);
DstType* dst = dst_.row(y);
if (x_shifted + SHIFT - 1 < cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<SHIFT>::unroll(src1_n_el, src2_n_el, dst_n_el, op, mask, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
// TransformDispatcher
template <bool UseSmart, class Policy> struct TransformDispatcher;
template <class Policy> struct TransformDispatcher<false, Policy>
{
template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ static void call(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
transformSimple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ static void call(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
transformSimple<<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
};
template <class Policy> struct TransformDispatcher<true, Policy>
{
template <typename T>
__host__ static bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
__host__ static bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
template <typename SrcType, typename DstType, class UnOp, class MaskPtr>
__host__ static void call(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
if (Policy::shift == 1 ||
!isAligned(src.data, Policy::shift * sizeof(SrcType)) || !isAligned(src.step, Policy::shift * sizeof(SrcType)) ||
!isAligned(dst.data, Policy::shift * sizeof(DstType)) || !isAligned(dst.step, Policy::shift * sizeof(DstType)))
{
TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
return;
}
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
template <typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
__host__ static void call(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
if (Policy::shift == 1 ||
!isAligned(src1.data, Policy::shift * sizeof(SrcType1)) || !isAligned(src1.step, Policy::shift * sizeof(SrcType1)) ||
!isAligned(src2.data, Policy::shift * sizeof(SrcType2)) || !isAligned(src2.step, Policy::shift * sizeof(SrcType2)) ||
!isAligned(dst.data, Policy::shift * sizeof(DstType)) || !isAligned(dst.step, Policy::shift * sizeof(DstType)))
{
TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
return;
}
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
};
template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ void transform(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ void transform(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
}
template <class Policy, typename SrcType, typename DstType, class UnOp, class MaskPtr>
__host__ void transform(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<VecTraits<SrcType>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src, dst, op, mask, rows, cols, stream);
}
template <class Policy, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
__host__ void transform(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<VecTraits<SrcType1>::cn == 1 && VecTraits<SrcType2>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
}
// transform_tuple
template <int count> struct Unroll
{
template <class SrcVal, class DstPtrTuple, class OpTuple>
__device__ static void transform(const SrcVal& srcVal, DstPtrTuple& dst, const OpTuple& op, int y, int x)
{
typedef typename tuple_element<count - 1, DstPtrTuple>::type dst_ptr_type;
typedef typename PtrTraits<dst_ptr_type>::value_type dst_type;
get<count - 1>(dst)(y, x) = saturate_cast<dst_type>(get<count - 1>(op)(srcVal));
Unroll<count - 1>::transform(srcVal, dst, op, y, x);
}
};
template <> struct Unroll<0>
{
template <class SrcVal, class DstPtrTuple, class OpTuple>
__device__ __forceinline__ static void transform(const SrcVal&, DstPtrTuple&, const OpTuple&, int, int)
{
}
};
template <class SrcPtr, class DstPtrTuple, class OpTuple, class MaskPtr>
__global__ void transform_tuple(const SrcPtr src, DstPtrTuple dst, const OpTuple op, const MaskPtr mask, const int rows, const int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows || !mask(y, x))
return;
typename PtrTraits<SrcPtr>::value_type srcVal = src(y, x);
Unroll<tuple_size<DstPtrTuple>::value>::transform(srcVal, dst, op, y, x);
}
template <class Policy, class SrcPtrTuple, class DstPtrTuple, class OpTuple, class MaskPtr>
__host__ void transform_tuple(const SrcPtrTuple& src, const DstPtrTuple& dst, const OpTuple& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
transform_tuple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,130 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/saturate_cast.hpp"
#include "../../ptr2d/glob.hpp"
#include "../../ptr2d/traits.hpp"
namespace cv { namespace cudev {
namespace transpose_detail
{
const int TRANSPOSE_TILE_DIM = 16;
const int TRANSPOSE_BLOCK_ROWS = 16;
template <class SrcPtr, typename DstType>
__global__ void transpose(const SrcPtr src, GlobPtr<DstType> dst, const int rows, const int cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
__shared__ src_type tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
int blockIdx_x, blockIdx_y;
// do diagonal reordering
if (gridDim.x == gridDim.y)
{
blockIdx_y = blockIdx.x;
blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
}
else
{
int bid = blockIdx.x + gridDim.x * blockIdx.y;
blockIdx_y = bid % gridDim.y;
blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
}
int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
if (xIndex < cols)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < rows)
{
tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
}
}
}
__syncthreads();
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
if (xIndex < rows)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < cols)
{
dst(yIndex + i, xIndex) = saturate_cast<DstType>(tile[threadIdx.x][threadIdx.y + i]);
}
}
}
}
template <class SrcPtr, typename DstType>
__host__ void transpose(const SrcPtr& src, const GlobPtr<DstType>& dst, int rows, int cols, cudaStream_t stream)
{
const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
transpose<<<grid, block, 0, stream>>>(src, dst, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@ -0,0 +1,314 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
#include <limits>
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "../ptr2d/transform.hpp"
#include "detail/glob_reduce.hpp"
namespace cv { namespace cudev {
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(0, stream);
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(0, stream);
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
dst.create(1, 2);
dst.col(0).setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
dst.col(1).setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
dst.create(1, 2);
dst.col(0).setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
dst.col(1).setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(0, stream);
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
typedef typename PtrTraits<SrcPtr>::value_type src_type;
not_equal_to<src_type> ne_op;
const src_type zero = VecTraits<src_type>::all(0);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
dst.create(1, 1);
dst.setTo(0, stream);
const int rows = getRows(src);
const int cols = getCols(src);
typedef typename PtrTraits<SrcPtr>::value_type src_type;
not_equal_to<src_type> ne_op;
const src_type zero = VecTraits<src_type>::all(0);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// default policy
struct DefaultGlobReducePolicy
{
enum {
block_size_x = 32,
block_size_y = 8,
patch_size_x = 4,
patch_size_y = 4
};
};
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCalcSum(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCalcSum_<DefaultGlobReducePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridCalcSum(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridCalcSum_<DefaultGlobReducePolicy>(src, dst, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMinVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridFindMinVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridFindMinVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridFindMinVal_<DefaultGlobReducePolicy>(src, dst, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridFindMaxVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridFindMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridFindMaxVal_<DefaultGlobReducePolicy>(src, dst, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridFindMinMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridFindMinMaxVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridFindMinMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridFindMinMaxVal_<DefaultGlobReducePolicy>(src, dst, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridCountNonZero_<DefaultGlobReducePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridCountNonZero_<DefaultGlobReducePolicy>(src, dst, stream);
}
}}
#endif

View File

@ -0,0 +1,115 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_HISTOGRAM_HPP__
#define __OPENCV_CUDEV_GRID_HISTOGRAM_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "detail/histogram.hpp"
namespace cv { namespace cudev {
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(1, BIN_COUNT);
dst.setTo(0, stream);
grid_histogram_detail::histogram<BIN_COUNT, Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(1, BIN_COUNT);
dst.setTo(0, stream);
grid_histogram_detail::histogram<BIN_COUNT, Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// default policy
struct DefaultHistogramPolicy
{
enum {
block_size_x = 32,
block_size_y = 8
};
};
template <int BIN_COUNT, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridHistogram(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridHistogram_<BIN_COUNT, DefaultHistogramPolicy>(src, dst, mask, stream);
}
template <int BIN_COUNT, class SrcPtr, typename ResType>
__host__ void gridHistogram(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridHistogram_<BIN_COUNT, DefaultHistogramPolicy>(src, dst, stream);
}
}}
#endif

View File

@ -0,0 +1,69 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_INTEGRAL_HPP__
#define __OPENCV_CUDEV_GRID_INTEGRAL_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "detail/integral.hpp"
namespace cv { namespace cudev {
template <class SrcPtr, typename DstType>
__host__ void gridIntegral(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows, cols);
integral_detail::integral(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
}
}}
#endif

View File

@ -0,0 +1,88 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_PYRAMIDS_HPP__
#define __OPENCV_CUDEV_GRID_PYRAMIDS_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/extrapolation.hpp"
#include "detail/pyr_down.hpp"
#include "detail/pyr_up.hpp"
namespace cv { namespace cudev {
template <class Brd, class SrcPtr, typename DstType>
__host__ void gridPyrDown_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(divUp(rows, 2), divUp(cols, 2));
pyramids_detail::pyrDown<Brd>(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream));
}
template <class SrcPtr, typename DstType>
__host__ void gridPyrDown(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
gridPyrDown_<BrdReflect101>(src, dst, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridPyrUp(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows * 2, cols * 2);
pyramids_detail::pyrUp(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream));
}
}}
#endif

View File

@ -0,0 +1,209 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_VEC_HPP__
#define __OPENCV_CUDEV_GRID_REDUCE_TO_VEC_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../util/limits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "../functional/functional.hpp"
#include "detail/reduce_to_column.hpp"
#include "detail/reduce_to_row.hpp"
namespace cv { namespace cudev {
template <typename T> struct Sum : plus<T>
{
typedef T work_type;
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(0);
}
__device__ __forceinline__ static T result(T r, int)
{
return r;
}
};
template <typename T> struct Avg : plus<T>
{
typedef T work_type;
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(0);
}
__device__ __forceinline__ static T result(T r, int sz)
{
return r / sz;
}
};
template <typename T> struct Min : minimum<T>
{
typedef T work_type;
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
}
__device__ __forceinline__ static T result(T r, int)
{
return r;
}
};
template <typename T> struct Max : maximum<T>
{
typedef T work_type;
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
}
__device__ __forceinline__ static T result(T r, int)
{
return r;
}
};
template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridReduceToRow(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(1, cols);
grid_reduce_to_vec_detail::reduceToRow<Reductor>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Reductor, class SrcPtr, typename ResType>
__host__ void gridReduceToRow(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(1, cols);
grid_reduce_to_vec_detail::reduceToRow<Reductor>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
createContinuous(rows, 1, DataType<ResType>::type, dst);
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Reductor, class Policy, class SrcPtr, typename ResType>
__host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
createContinuous(rows, 1, DataType<ResType>::type, dst);
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// default policy
struct DefaultReduceToVecPolicy
{
enum {
block_size_x = 32,
block_size_y = 8
};
};
template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridReduceToColumn_<Reductor, DefaultReduceToVecPolicy>(src, dst, mask, stream);
}
template <class Reductor, class SrcPtr, typename ResType>
__host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
gridReduceToColumn_<Reductor, DefaultReduceToVecPolicy>(src, dst, stream);
}
}}
#endif

View File

@ -0,0 +1,407 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_SPLIT_MERGE_HPP__
#define __OPENCV_CUDEV_GRID_SPLIT_MERGE_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "../util/vec_traits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "detail/split_merge.hpp"
namespace cv { namespace cudev {
template <class Policy, class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(rows, cols);
grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
shrinkPtr(dst),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename DstType>
__host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows, cols);
grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
shrinkPtr(dst),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst[0].create(rows, cols);
dst[1].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
dst[0].create(rows, cols);
dst[1].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)), shrinkPtr(get<3>(dst)),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
dst[4].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)), shrinkPtr(get<3>(dst)),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
dst[4].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// Default Policy
struct DefaultSplitMergePolicy
{
enum {
block_size_x = 32,
block_size_y = 8
};
};
template <class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridMerge_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename DstType>
__host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
gridMerge_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, int COUNT, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType, int COUNT>
__host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
}}
#endif

View File

@ -0,0 +1,309 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_TRANSFORM_HPP__
#define __OPENCV_CUDEV_GRID_TRANSFORM_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "../ptr2d/zip.hpp"
#include "detail/transform.hpp"
namespace cv { namespace cudev {
template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class UnOp>
__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
op,
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
op,
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
op,
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
op,
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
op,
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
get<0>(dst).create(rows, cols);
get<1>(dst).create(rows, cols);
get<2>(dst).create(rows, cols);
get<3>(dst).create(rows, cols);
grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
op,
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// Default Policy
struct DefaultTransformPolicy
{
enum {
block_size_x = 32,
block_size_y = 8,
shift = 4
};
};
template <class SrcPtr, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
}
}}
#endif

View File

@ -0,0 +1,69 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_TRANSPOSE_HPP__
#define __OPENCV_CUDEV_GRID_TRANSPOSE_HPP__
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "detail/transpose.hpp"
namespace cv { namespace cudev {
template <class SrcPtr, typename DstType>
__host__ void gridTranspose(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(cols, rows);
transpose_detail::transpose(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
}
}}
#endif

View File

@ -0,0 +1,93 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_CONSTANT_HPP__
#define __OPENCV_CUDEV_PTR2D_CONSTANT_HPP__
#include "../common.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
template <typename T> struct ConstantPtr
{
typedef T value_type;
typedef int index_type;
T value;
__device__ __forceinline__ T operator ()(int, int) const { return value; }
};
template <typename T> struct ConstantPtrSz : ConstantPtr<T>
{
int rows, cols;
};
template <typename T>
__host__ ConstantPtr<T> constantPtr(T value)
{
ConstantPtr<T> p;
p.value = value;
return p;
}
template <typename T> ConstantPtrSz<T>
__host__ constantPtr(T value, int rows, int cols)
{
ConstantPtrSz<T> p;
p.value = value;
p.rows = rows;
p.cols = cols;
return p;
}
template <typename T> struct PtrTraits< ConstantPtrSz<T> > : PtrTraitsBase< ConstantPtrSz<T>, ConstantPtr<T> >
{
};
}}
#endif

View File

@ -0,0 +1,393 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_DERIV_HPP__
#define __OPENCV_CUDEV_PTR2D_DERIV_HPP__
#include "../common.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
// derivX
template <class SrcPtr> struct DerivXPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
return src(y, x + 1) - src(y, x - 1);
}
};
template <class SrcPtr> struct DerivXPtrSz : DerivXPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> derivXPtr(const SrcPtr& src)
{
DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< DerivXPtrSz<SrcPtr> > : PtrTraitsBase<DerivXPtrSz<SrcPtr>, DerivXPtr<SrcPtr> >
{
};
// derivY
template <class SrcPtr> struct DerivYPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
return src(y + 1, x) - src(y - 1, x);
}
};
template <class SrcPtr> struct DerivYPtrSz : DerivYPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> derivYPtr(const SrcPtr& src)
{
DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< DerivYPtrSz<SrcPtr> > : PtrTraitsBase<DerivYPtrSz<SrcPtr>, DerivYPtr<SrcPtr> >
{
};
// sobelX
template <class SrcPtr> struct SobelXPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[6] =
{
src(y - 1, x - 1), src(y - 1, x + 1),
src(y , x - 1), src(y , x + 1),
src(y + 1, x - 1), src(y + 1, x + 1),
};
return (vals[1] - vals[0]) + 2 * (vals[3] - vals[2]) + (vals[5] - vals[4]);
}
};
template <class SrcPtr> struct SobelXPtrSz : SobelXPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> sobelXPtr(const SrcPtr& src)
{
SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< SobelXPtrSz<SrcPtr> > : PtrTraitsBase<SobelXPtrSz<SrcPtr>, SobelXPtr<SrcPtr> >
{
};
// sobelY
template <class SrcPtr> struct SobelYPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[6] =
{
src(y - 1, x - 1), src(y - 1, x), src(y - 1, x + 1),
src(y + 1, x - 1), src(y + 1, x), src(y + 1, x + 1)
};
return (vals[3] - vals[0]) + 2 * (vals[4] - vals[1]) + (vals[5] - vals[2]);
}
};
template <class SrcPtr> struct SobelYPtrSz : SobelYPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> sobelYPtr(const SrcPtr& src)
{
SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< SobelYPtrSz<SrcPtr> > : PtrTraitsBase<SobelYPtrSz<SrcPtr>, SobelYPtr<SrcPtr> >
{
};
// scharrX
template <class SrcPtr> struct ScharrXPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[6] =
{
src(y - 1, x - 1), src(y - 1, x + 1),
src(y , x - 1), src(y , x + 1),
src(y + 1, x - 1), src(y + 1, x + 1),
};
return 3 * (vals[1] - vals[0]) + 10 * (vals[3] - vals[2]) + 3 * (vals[5] - vals[4]);
}
};
template <class SrcPtr> struct ScharrXPtrSz : ScharrXPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> scharrXPtr(const SrcPtr& src)
{
ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< ScharrXPtrSz<SrcPtr> > : PtrTraitsBase<ScharrXPtrSz<SrcPtr>, ScharrXPtr<SrcPtr> >
{
};
// scharrY
template <class SrcPtr> struct ScharrYPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[6] =
{
src(y - 1, x - 1), src(y - 1, x), src(y - 1, x + 1),
src(y + 1, x - 1), src(y + 1, x), src(y + 1, x + 1)
};
return 3 * (vals[3] - vals[0]) + 10 * (vals[4] - vals[1]) + 3 * (vals[5] - vals[2]);
}
};
template <class SrcPtr> struct ScharrYPtrSz : ScharrYPtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> scharrYPtr(const SrcPtr& src)
{
ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
s.src = shrinkPtr(src);
s.rows = getRows(src);
s.cols = getCols(src);
return s;
}
template <class SrcPtr> struct PtrTraits< ScharrYPtrSz<SrcPtr> > : PtrTraitsBase<ScharrYPtrSz<SrcPtr>, ScharrYPtr<SrcPtr> >
{
};
// laplacian
template <int ksize, class SrcPtr> struct LaplacianPtr;
template <class SrcPtr> struct LaplacianPtr<1, SrcPtr>
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[5] =
{
src(y - 1, x),
src(y, x - 1), src(y , x), src(y, x + 1),
src(y + 1, x)
};
return (vals[0] + vals[1] + vals[3] + vals[4]) - 4 * vals[2];
}
};
template <class SrcPtr> struct LaplacianPtr<3, SrcPtr>
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
typename PtrTraits<SrcPtr>::value_type vals[5] =
{
src(y - 1, x - 1), src(y - 1, x + 1),
src(y, x),
src(y + 1, x - 1), src(y + 1, x + 1)
};
return 2 * (vals[0] + vals[1] + vals[3] + vals[4]) - 8 * vals[2];
}
};
template <int ksize, class SrcPtr> struct LaplacianPtrSz : LaplacianPtr<ksize, SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <int ksize, class SrcPtr>
__host__ LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> laplacianPtr(const SrcPtr& src)
{
LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> ptr;
ptr.src = shrinkPtr(src);
ptr.rows = getRows(src);
ptr.cols = getCols(src);
return ptr;
}
template <int ksize, class SrcPtr> struct PtrTraits< LaplacianPtrSz<ksize, SrcPtr> > : PtrTraitsBase<LaplacianPtrSz<ksize, SrcPtr>, LaplacianPtr<ksize, SrcPtr> >
{
};
}}
#endif

View File

@ -0,0 +1,338 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_GPUMAT_DETAIL_HPP__
#define __OPENCV_CUDEV_PTR2D_GPUMAT_DETAIL_HPP__
#include "../gpumat.hpp"
namespace cv { namespace cudev {
template <typename T>
__host__ GpuMat_<T>::GpuMat_()
: GpuMat()
{
flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(int arows, int acols)
: GpuMat(arows, acols, DataType<T>::type)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(Size asize)
: GpuMat(asize.height, asize.width, DataType<T>::type)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(int arows, int acols, Scalar val)
: GpuMat(arows, acols, DataType<T>::type, val)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(Size asize, Scalar val)
: GpuMat(asize.height, asize.width, DataType<T>::type, val)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m)
: GpuMat(m)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(const GpuMat& m)
: GpuMat()
{
flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
if (DataType<T>::type == m.type())
{
GpuMat::operator =(m);
return;
}
if (DataType<T>::depth == m.depth())
{
GpuMat::operator =(m.reshape(DataType<T>::channels, m.rows));
return;
}
CV_Assert( DataType<T>::channels == m.channels() );
m.convertTo(*this, type());
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(int arows, int acols, T* adata, size_t astep)
: GpuMat(arows, acols, DataType<T>::type, adata, astep)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(Size asize, T* adata, size_t astep)
: GpuMat(asize.height, asize.width, DataType<T>::type, adata, astep)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m, Range arowRange, Range acolRange)
: GpuMat(m, arowRange, acolRange)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m, Rect roi)
: GpuMat(m, roi)
{
}
template <typename T>
__host__ GpuMat_<T>::GpuMat_(InputArray arr)
: GpuMat()
{
flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
upload(arr);
}
template <typename T>
__host__ GpuMat_<T>& GpuMat_<T>::operator =(const GpuMat_& m)
{
GpuMat::operator =(m);
return *this;
}
template <typename T>
__host__ void GpuMat_<T>::create(int arows, int acols)
{
GpuMat::create(arows, acols, DataType<T>::type);
}
template <typename T>
__host__ void GpuMat_<T>::create(Size asize)
{
GpuMat::create(asize, DataType<T>::type);
}
template <typename T>
__host__ void GpuMat_<T>::swap(GpuMat_& mat)
{
GpuMat::swap(mat);
}
template <typename T>
__host__ void GpuMat_<T>::upload(InputArray arr)
{
CV_Assert( arr.type() == DataType<T>::type );
GpuMat::upload(arr);
}
template <typename T>
__host__ void GpuMat_<T>::upload(InputArray arr, Stream& stream)
{
CV_Assert( arr.type() == DataType<T>::type );
GpuMat::upload(arr, stream);
}
template <typename T>
__host__ GpuMat_<T>::operator GlobPtrSz<T>() const
{
return globPtr((T*) data, step, rows, cols);
}
template <typename T>
__host__ GpuMat_<T>::operator GlobPtr<T>() const
{
return globPtr((T*) data, step);
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::clone() const
{
return GpuMat_(GpuMat::clone());
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::row(int y) const
{
return GpuMat_(*this, Range(y, y+1), Range::all());
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::col(int x) const
{
return GpuMat_(*this, Range::all(), Range(x, x+1));
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::rowRange(int startrow, int endrow) const
{
return GpuMat_(*this, Range(startrow, endrow), Range::all());
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::rowRange(Range r) const
{
return GpuMat_(*this, r, Range::all());
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::colRange(int startcol, int endcol) const
{
return GpuMat_(*this, Range::all(), Range(startcol, endcol));
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::colRange(Range r) const
{
return GpuMat_(*this, Range::all(), r);
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::operator ()(Range _rowRange, Range _colRange) const
{
return GpuMat_(*this, _rowRange, _colRange);
}
template <typename T>
__host__ GpuMat_<T> GpuMat_<T>::operator ()(Rect roi) const
{
return GpuMat_(*this, roi);
}
template <typename T>
__host__ GpuMat_<T>& GpuMat_<T>::adjustROI(int dtop, int dbottom, int dleft, int dright)
{
return (GpuMat_<T>&)(GpuMat::adjustROI(dtop, dbottom, dleft, dright));
}
template <typename T>
__host__ size_t GpuMat_<T>::elemSize() const
{
CV_DbgAssert( GpuMat::elemSize() == sizeof(T) );
return sizeof(T);
}
template <typename T>
__host__ size_t GpuMat_<T>::elemSize1() const
{
CV_DbgAssert( GpuMat::elemSize1() == sizeof(T) / DataType<T>::channels );
return sizeof(T) / DataType<T>::channels;
}
template <typename T>
__host__ int GpuMat_<T>::type() const
{
CV_DbgAssert( GpuMat::type() == DataType<T>::type );
return DataType<T>::type;
}
template <typename T>
__host__ int GpuMat_<T>::depth() const
{
CV_DbgAssert( GpuMat::depth() == DataType<T>::depth );
return DataType<T>::depth;
}
template <typename T>
__host__ int GpuMat_<T>::channels() const
{
CV_DbgAssert( GpuMat::channels() == DataType<T>::channels );
return DataType<T>::channels;
}
template <typename T>
__host__ size_t GpuMat_<T>::stepT() const
{
return step / elemSize();
}
template <typename T>
__host__ size_t GpuMat_<T>::step1() const
{
return step / elemSize1();
}
template <typename T>
__host__ T* GpuMat_<T>::operator [](int y)
{
return (T*)ptr(y);
}
template <typename T>
__host__ const T* GpuMat_<T>::operator [](int y) const
{
return (const T*)ptr(y);
}
template <typename T> template <class Body>
__host__ GpuMat_<T>::GpuMat_(const Expr<Body>& expr)
: GpuMat()
{
flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
*this = expr;
}
template <typename T> template <class Body>
__host__ GpuMat_<T>& GpuMat_<T>::operator =(const Expr<Body>& expr)
{
expr.body.assignTo(*this);
return *this;
}
template <typename T> template <class Body>
__host__ GpuMat_<T>& GpuMat_<T>::assign(const Expr<Body>& expr, Stream& stream)
{
expr.body.assignTo(*this, stream);
return *this;
}
}}
#endif

View File

@ -0,0 +1,219 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_EXTRAPOLATION_HPP__
#define __OPENCV_CUDEV_PTR2D_EXTRAPOLATION_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
// BrdConstant
template <class SrcPtr> struct BrdConstant
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
int rows, cols;
typename PtrTraits<SrcPtr>::value_type val;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
return (x >= 0 && x < cols && y >= 0 && y < rows) ? src(y, x) : val;
}
};
template <class SrcPtr>
__host__ BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> brdConstant(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type val)
{
BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> b;
b.src = shrinkPtr(src);
b.rows = getRows(src);
b.cols = getCols(src);
b.val = val;
return b;
}
template <class SrcPtr>
__host__ BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> brdConstant(const SrcPtr& src)
{
return brdConstant(src, VecTraits<typename PtrTraits<SrcPtr>::value_type>::all(0));
}
// BrdBase
template <class BrdImpl, class SrcPtr> struct BrdBase
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef int index_type;
SrcPtr src;
int rows, cols;
__device__ __forceinline__ int idx_row(int y) const
{
return BrdImpl::idx_low(BrdImpl::idx_high(y, rows), rows);
}
__device__ __forceinline__ int idx_col(int x) const
{
return BrdImpl::idx_low(BrdImpl::idx_high(x, cols), cols);
}
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
{
return src(idx_row(y), idx_col(x));
}
};
// BrdReplicate
struct BrdReplicate
{
__device__ __forceinline__ static int idx_low(int i, int len)
{
return ::max(i, 0);
}
__device__ __forceinline__ static int idx_high(int i, int len)
{
return ::min(i, len - 1);
}
};
template <class SrcPtr>
__host__ BrdBase<BrdReplicate, typename PtrTraits<SrcPtr>::ptr_type> brdReplicate(const SrcPtr& src)
{
BrdBase<BrdReplicate, typename PtrTraits<SrcPtr>::ptr_type> b;
b.src = shrinkPtr(src);
b.rows = getRows(src);
b.cols = getCols(src);
return b;
}
// BrdReflect101
struct BrdReflect101
{
__device__ __forceinline__ static int idx_low(int i, int len)
{
return ::abs(i) % len;
}
__device__ __forceinline__ static int idx_high(int i, int len)
{
const int last_ind = len - 1;
return ::abs(last_ind - ::abs(last_ind - i)) % len;
}
};
template <class SrcPtr>
__host__ BrdBase<BrdReflect101, typename PtrTraits<SrcPtr>::ptr_type> brdReflect101(const SrcPtr& src)
{
BrdBase<BrdReflect101, typename PtrTraits<SrcPtr>::ptr_type> b;
b.src = shrinkPtr(src);
b.rows = getRows(src);
b.cols = getCols(src);
return b;
}
// BrdReflect
struct BrdReflect
{
__device__ __forceinline__ static int idx_low(int i, int len)
{
return (::abs(i) - (i < 0)) % len;
}
__device__ __forceinline__ static int idx_high(int i, int len)
{
const int last_ind = len - 1;
return (last_ind - ::abs(last_ind - i) + (i > last_ind));
}
};
template <class SrcPtr>
__host__ BrdBase<BrdReflect, typename PtrTraits<SrcPtr>::ptr_type> brdReflect(const SrcPtr& src)
{
BrdBase<BrdReflect, typename PtrTraits<SrcPtr>::ptr_type> b;
b.src = shrinkPtr(src);
b.rows = getRows(src);
b.cols = getCols(src);
return b;
}
// BrdWrap
struct BrdWrap
{
__device__ __forceinline__ static int idx_low(int i, int len)
{
return (i >= 0) * i + (i < 0) * (i - ((i - len + 1) / len) * len);
}
__device__ __forceinline__ static int idx_high(int i, int len)
{
return (i < len) * i + (i >= len) * (i % len);
}
};
template <class SrcPtr>
__host__ BrdBase<BrdWrap, typename PtrTraits<SrcPtr>::ptr_type> brdWrap(const SrcPtr& src)
{
BrdBase<BrdWrap, typename PtrTraits<SrcPtr>::ptr_type> b;
b.src = shrinkPtr(src);
b.rows = getRows(src);
b.cols = getCols(src);
return b;
}
}}
#endif

View File

@ -0,0 +1,100 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_GLOB_HPP__
#define __OPENCV_CUDEV_PTR2D_GLOB_HPP__
#include "../common.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
template <typename T> struct GlobPtr
{
typedef T value_type;
typedef int index_type;
T* data;
size_t step;
__device__ __forceinline__ T* row(int y) { return ( T*)( ( uchar*)data + y * step); }
__device__ __forceinline__ const T* row(int y) const { return (const T*)( (const uchar*)data + y * step); }
__device__ __forceinline__ T& operator ()(int y, int x) { return row(y)[x]; }
__device__ __forceinline__ const T& operator ()(int y, int x) const { return row(y)[x]; }
};
template <typename T> struct GlobPtrSz : GlobPtr<T>
{
int rows, cols;
};
template <typename T>
__host__ GlobPtr<T> globPtr(T* data, size_t step)
{
GlobPtr<T> p;
p.data = data;
p.step = step;
return p;
}
template <typename T>
__host__ GlobPtrSz<T> globPtr(T* data, size_t step, int rows, int cols)
{
GlobPtrSz<T> p;
p.data = data;
p.step = step;
p.rows = rows;
p.cols = cols;
return p;
}
template <typename T> struct PtrTraits< GlobPtrSz<T> > : PtrTraitsBase<GlobPtrSz<T>, GlobPtr<T> >
{
};
}}
#endif

View File

@ -0,0 +1,161 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_GPUMAT_HPP__
#define __OPENCV_CUDEV_PTR2D_GPUMAT_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../expr/expr.hpp"
#include "glob.hpp"
namespace cv { namespace cudev {
template <typename T>
class GpuMat_ : public GpuMat
{
public:
typedef T value_type;
//! default constructor
__host__ GpuMat_();
//! constructs GpuMat of the specified size
__host__ GpuMat_(int arows, int acols);
__host__ explicit GpuMat_(Size asize);
//! constucts GpuMat and fills it with the specified value
__host__ GpuMat_(int arows, int acols, Scalar val);
__host__ GpuMat_(Size asize, Scalar val);
//! copy constructor
__host__ GpuMat_(const GpuMat_& m);
//! copy/conversion contructor. If m is of different type, it's converted
__host__ explicit GpuMat_(const GpuMat& m);
//! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type
__host__ GpuMat_(int arows, int acols, T* adata, size_t astep = Mat::AUTO_STEP);
__host__ GpuMat_(Size asize, T* adata, size_t astep = Mat::AUTO_STEP);
//! selects a submatrix
__host__ GpuMat_(const GpuMat_& m, Range arowRange, Range acolRange);
__host__ GpuMat_(const GpuMat_& m, Rect roi);
//! builds GpuMat from host memory (Blocking call)
__host__ explicit GpuMat_(InputArray arr);
//! assignment operators
__host__ GpuMat_& operator =(const GpuMat_& m);
//! allocates new GpuMat data unless the GpuMat already has specified size and type
__host__ void create(int arows, int acols);
__host__ void create(Size asize);
//! swaps with other smart pointer
__host__ void swap(GpuMat_& mat);
//! pefroms upload data to GpuMat (Blocking call)
__host__ void upload(InputArray arr);
//! pefroms upload data to GpuMat (Non-Blocking call)
__host__ void upload(InputArray arr, Stream& stream);
//! convert to GlobPtr
__host__ operator GlobPtrSz<T>() const;
__host__ operator GlobPtr<T>() const;
//! overridden forms of GpuMat::row() etc.
__host__ GpuMat_ clone() const;
__host__ GpuMat_ row(int y) const;
__host__ GpuMat_ col(int x) const;
__host__ GpuMat_ rowRange(int startrow, int endrow) const;
__host__ GpuMat_ rowRange(Range r) const;
__host__ GpuMat_ colRange(int startcol, int endcol) const;
__host__ GpuMat_ colRange(Range r) const;
__host__ GpuMat_ operator ()(Range rowRange, Range colRange) const;
__host__ GpuMat_ operator ()(Rect roi) const;
__host__ GpuMat_& adjustROI(int dtop, int dbottom, int dleft, int dright);
//! overridden forms of GpuMat::elemSize() etc.
__host__ size_t elemSize() const;
__host__ size_t elemSize1() const;
__host__ int type() const;
__host__ int depth() const;
__host__ int channels() const;
__host__ size_t step1() const;
//! returns step()/sizeof(T)
__host__ size_t stepT() const;
//! more convenient forms of row and element access operators
__host__ T* operator [](int y);
__host__ const T* operator [](int y) const;
//! expression templates
template <class Body> __host__ GpuMat_(const Expr<Body>& expr);
template <class Body> __host__ GpuMat_& operator =(const Expr<Body>& expr);
template <class Body> __host__ GpuMat_& assign(const Expr<Body>& expr, Stream& stream);
};
//! creates alternative GpuMat header for the same data, with different
//! number of channels and/or different number of rows. see cvReshape.
template <int cn, typename T>
__host__ GpuMat_<typename MakeVec<typename VecTraits<T>::elem_type, cn>::type>
reshape_(const GpuMat_<T>& mat, int rows = 0)
{
GpuMat_<typename MakeVec<typename VecTraits<T>::elem_type, cn>::type> dst(mat.reshape(cn, rows));
return dst;
}
template <typename T> struct PtrTraits< GpuMat_<T> > : PtrTraitsBase<GpuMat_<T>, GlobPtr<T> >
{
};
}}
#include "detail/gpumat.hpp"
#endif

View File

@ -0,0 +1,385 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_INTERPOLATION_HPP__
#define __OPENCV_CUDEV_PTR2D_INTERPOLATION_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../util/saturate_cast.hpp"
#include "../util/type_traits.hpp"
#include "../util/limits.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
// Nearest
template <class SrcPtr> struct NearestInterPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef float index_type;
SrcPtr src;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
{
return src(__float2int_rn(y), __float2int_rn(x));
}
};
template <class SrcPtr> struct NearestInterPtrSz : NearestInterPtr<SrcPtr>
{
int rows, cols;
};
template <class SrcPtr>
__host__ NearestInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interNearest(const SrcPtr& src)
{
NearestInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
i.src = shrinkPtr(src);
i.rows = getRows(src);
i.cols = getCols(src);
return i;
}
template <class SrcPtr> struct PtrTraits< NearestInterPtrSz<SrcPtr> > : PtrTraitsBase<NearestInterPtrSz<SrcPtr>, NearestInterPtr<SrcPtr> >
{
};
// Linear
template <typename SrcPtr> struct LinearInterPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef float index_type;
SrcPtr src;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
typename PtrTraits<SrcPtr>::value_type src_reg = src(y1, x1);
out = out + src_reg * static_cast<work_elem_type>((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * static_cast<work_elem_type>((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * static_cast<work_elem_type>((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * static_cast<work_elem_type>((x - x1) * (y - y1));
return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out);
}
};
template <class SrcPtr> struct LinearInterPtrSz : LinearInterPtr<SrcPtr>
{
int rows, cols;
};
template <class SrcPtr>
__host__ LinearInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interLinear(const SrcPtr& src)
{
LinearInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
i.src = shrinkPtr(src);
i.rows = getRows(src);
i.cols = getCols(src);
return i;
}
template <class SrcPtr> struct PtrTraits< LinearInterPtrSz<SrcPtr> > : PtrTraitsBase<LinearInterPtrSz<SrcPtr>, LinearInterPtr<SrcPtr> >
{
};
// Cubic
template <typename SrcPtr> struct CubicInterPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef float index_type;
SrcPtr src;
__device__ static float bicubicCoeff(float x_)
{
float x = ::fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
typename PtrTraits<SrcPtr>::value_type src_reg = src(__float2int_rd(cy), __float2int_rd(cx));
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + static_cast<work_elem_type>(w) * src_reg;
wsum += w;
}
}
work_type res = (wsum > numeric_limits<float>::epsilon()) ? VecTraits<work_type>::all(0) : sum / static_cast<work_elem_type>(wsum);
return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(res);
}
};
template <class SrcPtr> struct CubicInterPtrSz : CubicInterPtr<SrcPtr>
{
int rows, cols;
};
template <class SrcPtr>
__host__ CubicInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interCubic(const SrcPtr& src)
{
CubicInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
i.src = shrinkPtr(src);
i.rows = getRows(src);
i.cols = getCols(src);
return i;
}
template <class SrcPtr> struct PtrTraits< CubicInterPtrSz<SrcPtr> > : PtrTraitsBase<CubicInterPtrSz<SrcPtr>, CubicInterPtr<SrcPtr> >
{
};
// IntegerArea
template <typename SrcPtr> struct IntegerAreaInterPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef float index_type;
SrcPtr src;
int area_width, area_height;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
const int sx1 = __float2int_rd(x);
const int sx2 = sx1 + area_width;
const int sy1 = __float2int_rd(y);
const int sy2 = sy1 + area_height;
work_type out = VecTraits<work_type>::all(0);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
{
out = out + saturate_cast<work_type>(src(dy, dx));
}
}
const work_elem_type scale = 1.0f / (area_width * area_height);
return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out * scale);
}
};
template <class SrcPtr> struct IntegerAreaInterPtrSz : IntegerAreaInterPtr<SrcPtr>
{
int rows, cols;
};
template <class SrcPtr>
__host__ IntegerAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interArea(const SrcPtr& src, Size areaSize)
{
IntegerAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
i.src = shrinkPtr(src);
i.area_width = areaSize.width;
i.area_height = areaSize.height;
i.rows = getRows(src);
i.cols = getCols(src);
return i;
}
template <class SrcPtr> struct PtrTraits< IntegerAreaInterPtrSz<SrcPtr> > : PtrTraitsBase<IntegerAreaInterPtrSz<SrcPtr>, IntegerAreaInterPtr<SrcPtr> >
{
};
// CommonArea
template <typename SrcPtr> struct CommonAreaInterPtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef float index_type;
SrcPtr src;
float area_width, area_height;
__device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
typedef typename VecTraits<src_type>::elem_type src_elem_type;
typedef typename LargerType<float, src_elem_type>::type work_elem_type;
typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
const float fsx1 = x;
const float fsx2 = fsx1 + area_width;
const int sx1 = __float2int_rd(fsx1);
const int sx2 = __float2int_ru(fsx2);
const float fsy1 = y;
const float fsy2 = fsy1 + area_height;
const int sy1 = __float2int_rd(fsy1);
const int sy2 = __float2int_ru(fsy2);
work_type out = VecTraits<work_type>::all(0);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + saturate_cast<work_type>(src(dy, dx));
if (sx1 > fsx1)
out = out + saturate_cast<work_type>(src(dy, sx1 - 1)) * static_cast<work_elem_type>(sx1 - fsx1);
if (sx2 < fsx2)
out = out + saturate_cast<work_type>(src(dy, sx2)) * static_cast<work_elem_type>(fsx2 - sx2);
}
if (sy1 > fsy1)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + saturate_cast<work_type>(src(sy1 - 1, dx)) * static_cast<work_elem_type>(sy1 - fsy1);
}
if (sy2 < fsy2)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + saturate_cast<work_type>(src(sy2, dx)) * static_cast<work_elem_type>(fsy2 - sy2);
}
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + saturate_cast<work_type>(src(sy1 - 1, sx1 - 1)) * static_cast<work_elem_type>((sy1 - fsy1) * (sx1 - fsx1));
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + saturate_cast<work_type>(src(sy1 - 1, sx2)) * static_cast<work_elem_type>((sy1 - fsy1) * (fsx2 - sx2));
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + saturate_cast<work_type>(src(sy2, sx2)) * static_cast<work_elem_type>((fsy2 - sy2) * (fsx2 - sx2));
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + saturate_cast<work_type>(src(sy2, sx1 - 1)) * static_cast<work_elem_type>((fsy2 - sy2) * (sx1 - fsx1));
const work_elem_type scale = 1.0f / (area_width * area_height);
return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out * scale);
}
};
template <class SrcPtr> struct CommonAreaInterPtrSz : CommonAreaInterPtr<SrcPtr>
{
int rows, cols;
};
template <class SrcPtr>
__host__ CommonAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interArea(const SrcPtr& src, Size2f areaSize)
{
CommonAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
i.src = shrinkPtr(src);
i.area_width = areaSize.width;
i.area_height = areaSize.height;
i.rows = getRows(src);
i.cols = getCols(src);
return i;
}
template <class SrcPtr> struct PtrTraits< CommonAreaInterPtrSz<SrcPtr> > : PtrTraitsBase<CommonAreaInterPtrSz<SrcPtr>, CommonAreaInterPtr<SrcPtr> >
{
};
}}
#endif

View File

@ -0,0 +1,100 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_LUT_HPP__
#define __OPENCV_CUDEV_PTR2D_LUT_HPP__
#include "../common.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
template <class SrcPtr, class TablePtr> struct LutPtr
{
typedef typename PtrTraits<TablePtr>::value_type value_type;
typedef typename PtrTraits<SrcPtr>::index_type index_type;
SrcPtr src;
TablePtr tbl;
__device__ __forceinline__ typename PtrTraits<TablePtr>::value_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
{
return tbl(0, src(y, x));
}
};
template <class SrcPtr, class TablePtr> struct LutPtrSz : LutPtr<SrcPtr, TablePtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr, class TablePtr>
__host__ LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> lutPtr(const SrcPtr& src, const TablePtr& tbl)
{
CV_Assert( getRows(tbl) == 1 );
LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> ptr;
ptr.src = shrinkPtr(src);
ptr.tbl = shrinkPtr(tbl);
ptr.rows = getRows(src);
ptr.cols = getCols(src);
return ptr;
}
template <class SrcPtr, class TablePtr> struct PtrTraits< LutPtrSz<SrcPtr, TablePtr> > : PtrTraitsBase<LutPtrSz<SrcPtr, TablePtr>, LutPtr<SrcPtr, TablePtr> >
{
};
}}
#endif

View File

@ -0,0 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_MASK_HPP__
#define __OPENCV_CUDEV_PTR2D_MASK_HPP__
#include "../common.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
struct WithOutMask
{
typedef bool value_type;
typedef int index_type;
__device__ __forceinline__ bool operator ()(int, int) const
{
return true;
}
};
}}
#endif

View File

@ -0,0 +1,154 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_REMAP_HPP__
#define __OPENCV_CUDEV_PTR2D_REMAP_HPP__
#include "opencv2/core/base.hpp"
#include "../common.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
template <class SrcPtr, class MapPtr> struct RemapPtr1
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef typename PtrTraits<MapPtr>::index_type index_type;
SrcPtr src;
MapPtr map;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<MapPtr>::index_type y, typename PtrTraits<MapPtr>::index_type x) const
{
const typename PtrTraits<MapPtr>::value_type coord = map(y, x);
return src(coord.y, coord.x);
}
};
template <class SrcPtr, class MapXPtr, class MapYPtr> struct RemapPtr2
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef typename PtrTraits<MapXPtr>::index_type index_type;
SrcPtr src;
MapXPtr mapx;
MapYPtr mapy;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<MapXPtr>::index_type y, typename PtrTraits<MapXPtr>::index_type x) const
{
const typename PtrTraits<MapXPtr>::value_type nx = mapx(y, x);
const typename PtrTraits<MapYPtr>::value_type ny = mapy(y, x);
return src(ny, nx);
}
};
template <class SrcPtr, class MapPtr> struct RemapPtr1Sz : RemapPtr1<SrcPtr, MapPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr, class MapXPtr, class MapYPtr> struct RemapPtr2Sz : RemapPtr2<SrcPtr, MapXPtr, MapYPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr, class MapPtr>
__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type>
remapPtr(const SrcPtr& src, const MapPtr& map)
{
const int rows = getRows(map);
const int cols = getCols(map);
RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type> r;
r.src = shrinkPtr(src);
r.map = shrinkPtr(map);
r.rows = rows;
r.cols = cols;
return r;
}
template <class SrcPtr, class MapXPtr, class MapYPtr>
__host__ RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type>
remapPtr(const SrcPtr& src, const MapXPtr& mapx, const MapYPtr& mapy)
{
const int rows = getRows(mapx);
const int cols = getCols(mapx);
CV_Assert( getRows(mapy) == rows && getCols(mapy) == cols );
RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type> r;
r.src = shrinkPtr(src);
r.mapx = shrinkPtr(mapx);
r.mapy = shrinkPtr(mapy);
r.rows = rows;
r.cols = cols;
return r;
}
template <class SrcPtr, class MapPtr> struct PtrTraits< RemapPtr1Sz<SrcPtr, MapPtr> > : PtrTraitsBase<RemapPtr1Sz<SrcPtr, MapPtr>, RemapPtr1<SrcPtr, MapPtr> >
{
};
template <class SrcPtr, class MapXPtr, class MapYPtr> struct PtrTraits< RemapPtr2Sz<SrcPtr, MapXPtr, MapYPtr> > : PtrTraitsBase<RemapPtr2Sz<SrcPtr, MapXPtr, MapYPtr>, RemapPtr2<SrcPtr, MapXPtr, MapYPtr> >
{
};
}}
#endif

View File

@ -0,0 +1,103 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_RESIZE_HPP__
#define __OPENCV_CUDEV_PTR2D_RESIZE_HPP__
#include "opencv2/core/base.hpp"
#include "../common.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
template <class SrcPtr> struct ResizePtr
{
typedef typename PtrTraits<SrcPtr>::value_type value_type;
typedef typename PtrTraits<SrcPtr>::index_type index_type;
SrcPtr src;
float fx, fy;
__device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
{
const float yn = static_cast<float>(y * fy);
const float xn = static_cast<float>(x * fx);
return src(yn, xn);
}
};
template <class SrcPtr> struct ResizePtrSz : ResizePtr<SrcPtr>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr>
__host__ ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> resizePtr(const SrcPtr& src, float fx, float fy)
{
ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> r;
r.src = shrinkPtr(src);
r.fx = 1.0f / fx;
r.fy = 1.0f / fy;
r.rows = cv::saturate_cast<int>(getRows(src) * fy);
r.cols = cv::saturate_cast<int>(getCols(src) * fx);
return r;
}
template <class SrcPtr> struct PtrTraits< ResizePtrSz<SrcPtr> > : PtrTraitsBase<ResizePtrSz<SrcPtr>, ResizePtr<SrcPtr> >
{
};
}}
#endif

View File

@ -0,0 +1,148 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_TEXTURE_HPP__
#define __OPENCV_CUDEV_PTR2D_TEXTURE_HPP__
#include <cstring>
#include "../common.hpp"
#include "glob.hpp"
#include "gpumat.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
template <typename T> struct TexturePtr
{
typedef T value_type;
typedef float index_type;
cudaTextureObject_t texObj;
__device__ __forceinline__ T operator ()(float y, float x) const
{
return tex2D<T>(texObj, x, y);
}
};
template <typename T> struct Texture : TexturePtr<T>
{
int rows, cols;
__host__ explicit Texture(const GlobPtrSz<T>& mat,
bool normalizedCoords = false,
cudaTextureFilterMode filterMode = cudaFilterModePoint,
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
{
CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
rows = mat.rows;
cols = mat.cols;
cudaResourceDesc texRes;
std::memset(&texRes, 0, sizeof(texRes));
texRes.resType = cudaResourceTypePitch2D;
texRes.res.pitch2D.devPtr = mat.data;
texRes.res.pitch2D.height = mat.rows;
texRes.res.pitch2D.width = mat.cols;
texRes.res.pitch2D.pitchInBytes = mat.step;
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
cudaTextureDesc texDescr;
std::memset(&texDescr, 0, sizeof(texDescr));
texDescr.addressMode[0] = addressMode;
texDescr.addressMode[1] = addressMode;
texDescr.addressMode[2] = addressMode;
texDescr.filterMode = filterMode;
texDescr.readMode = cudaReadModeElementType;
texDescr.normalizedCoords = normalizedCoords;
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
}
__host__ explicit Texture(const GpuMat_<T>& mat,
bool normalizedCoords = false,
cudaTextureFilterMode filterMode = cudaFilterModePoint,
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
{
CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
rows = mat.rows;
cols = mat.cols;
cudaResourceDesc texRes;
std::memset(&texRes, 0, sizeof(texRes));
texRes.resType = cudaResourceTypePitch2D;
texRes.res.pitch2D.devPtr = mat.data;
texRes.res.pitch2D.height = mat.rows;
texRes.res.pitch2D.width = mat.cols;
texRes.res.pitch2D.pitchInBytes = mat.step;
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
cudaTextureDesc texDescr;
std::memset(&texDescr, 0, sizeof(texDescr));
texDescr.addressMode[0] = addressMode;
texDescr.addressMode[1] = addressMode;
texDescr.addressMode[2] = addressMode;
texDescr.filterMode = filterMode;
texDescr.readMode = cudaReadModeElementType;
texDescr.normalizedCoords = normalizedCoords;
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
}
__host__ ~Texture()
{
cudaDestroyTextureObject(this->texObj);
}
};
template <typename T> struct PtrTraits< Texture<T> > : PtrTraitsBase<Texture<T>, TexturePtr<T> >
{
};
}}
#endif

View File

@ -0,0 +1,101 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_TRAITS_HPP__
#define __OPENCV_CUDEV_PTR2D_TRAITS_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
template <class Ptr2DSz, class Ptr2D> struct PtrTraitsBase
{
typedef Ptr2DSz ptr_sz_type;
typedef Ptr2D ptr_type;
typedef typename Ptr2D::value_type value_type;
typedef typename Ptr2D::index_type index_type;
__host__ static Ptr2D shrinkPtr(const Ptr2DSz& ptr)
{
return ptr;
}
__host__ static int getRows(const Ptr2DSz& ptr)
{
return ptr.rows;
}
__host__ static int getCols(const Ptr2DSz& ptr)
{
return ptr.cols;
}
};
template <class Ptr2DSz> struct PtrTraits : PtrTraitsBase<Ptr2DSz, Ptr2DSz>
{
};
template <class Ptr2DSz>
__host__ typename PtrTraits<Ptr2DSz>::ptr_type shrinkPtr(const Ptr2DSz& ptr)
{
return PtrTraits<Ptr2DSz>::shrinkPtr(ptr);
}
template <class Ptr2DSz>
__host__ int getRows(const Ptr2DSz& ptr)
{
return PtrTraits<Ptr2DSz>::getRows(ptr);
}
template <class Ptr2DSz>
__host__ int getCols(const Ptr2DSz& ptr)
{
return PtrTraits<Ptr2DSz>::getCols(ptr);
}
}}
#endif

View File

@ -0,0 +1,151 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_TRANSFORM_HPP__
#define __OPENCV_CUDEV_PTR2D_TRANSFORM_HPP__
#include "../common.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
// UnaryTransformPtr
template <class SrcPtr, class Op> struct UnaryTransformPtr
{
typedef typename Op::result_type value_type;
typedef typename PtrTraits<SrcPtr>::index_type index_type;
SrcPtr src;
Op op;
__device__ __forceinline__ typename Op::result_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
{
return op(src(y, x));
}
};
template <class SrcPtr, class Op> struct UnaryTransformPtrSz : UnaryTransformPtr<SrcPtr, Op>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class SrcPtr, class Op>
__host__ UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Op>
transformPtr(const SrcPtr& src, const Op& op)
{
UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Op> ptr;
ptr.src = shrinkPtr(src);
ptr.op = op;
ptr.rows = getRows(src);
ptr.cols = getCols(src);
return ptr;
}
template <class SrcPtr, class Op> struct PtrTraits< UnaryTransformPtrSz<SrcPtr, Op> > : PtrTraitsBase<UnaryTransformPtrSz<SrcPtr, Op>, UnaryTransformPtr<SrcPtr, Op> >
{
};
// BinaryTransformPtr
template <class Src1Ptr, class Src2Ptr, class Op> struct BinaryTransformPtr
{
typedef typename Op::result_type value_type;
typedef typename PtrTraits<Src1Ptr>::index_type index_type;
Src1Ptr src1;
Src2Ptr src2;
Op op;
__device__ __forceinline__ typename Op::result_type operator ()(typename PtrTraits<Src1Ptr>::index_type y, typename PtrTraits<Src1Ptr>::index_type x) const
{
return op(src1(y, x), src2(y, x));
}
};
template <class Src1Ptr, class Src2Ptr, class Op> struct BinaryTransformPtrSz : BinaryTransformPtr<Src1Ptr, Src2Ptr, Op>
{
int rows, cols;
template <typename T>
__host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
{
gridCopy(*this, dst, stream);
}
};
template <class Src1Ptr, class Src2Ptr, class Op>
__host__ BinaryTransformPtrSz<typename PtrTraits<Src1Ptr>::ptr_type, typename PtrTraits<Src2Ptr>::ptr_type, Op>
transformPtr(const Src1Ptr& src1, const Src2Ptr& src2, const Op& op)
{
const int rows = getRows(src1);
const int cols = getCols(src1);
CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
BinaryTransformPtrSz<typename PtrTraits<Src1Ptr>::ptr_type, typename PtrTraits<Src2Ptr>::ptr_type, Op> ptr;
ptr.src1 = shrinkPtr(src1);
ptr.src2 = shrinkPtr(src2);
ptr.op = op;
ptr.rows = rows;
ptr.cols = cols;
return ptr;
}
template <class Src1Ptr, class Src2Ptr, class Op> struct PtrTraits< BinaryTransformPtrSz<Src1Ptr, Src2Ptr, Op> > : PtrTraitsBase<BinaryTransformPtrSz<Src1Ptr, Src2Ptr, Op>, BinaryTransformPtr<Src1Ptr, Src2Ptr, Op> >
{
};
}}
#endif

View File

@ -0,0 +1,152 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_WARPING_HPP__
#define __OPENCV_CUDEV_PTR2D_WARPING_HPP__
#include "../common.hpp"
#include "traits.hpp"
#include "remap.hpp"
#include "gpumat.hpp"
namespace cv { namespace cudev {
// affine
struct AffineMapPtr
{
typedef float2 value_type;
typedef float index_type;
const float* warpMat;
__device__ __forceinline__ float2 operator ()(float y, float x) const
{
const float xcoo = warpMat[0] * x + warpMat[1] * y + warpMat[2];
const float ycoo = warpMat[3] * x + warpMat[4] * y + warpMat[5];
return make_float2(xcoo, ycoo);
}
};
struct AffineMapPtrSz : AffineMapPtr
{
int rows, cols;
};
template <> struct PtrTraits<AffineMapPtrSz> : PtrTraitsBase<AffineMapPtrSz, AffineMapPtr>
{
};
__host__ static AffineMapPtrSz affineMap(Size dstSize, const GpuMat_<float>& warpMat)
{
CV_Assert( warpMat.rows == 2 && warpMat.cols == 3 );
CV_Assert( warpMat.isContinuous() );
AffineMapPtrSz map;
map.warpMat = warpMat[0];
map.rows = dstSize.height;
map.cols = dstSize.width;
return map;
}
template <class SrcPtr>
__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, AffineMapPtr>
warpAffinePtr(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
{
return remapPtr(src, affineMap(dstSize, warpMat));
}
// perspective
struct PerspectiveMapPtr
{
typedef float2 value_type;
typedef float index_type;
const float* warpMat;
__device__ __forceinline__ float2 operator ()(float y, float x) const
{
const float coeff = 1.0f / (warpMat[6] * x + warpMat[7] * y + warpMat[8]);
const float xcoo = coeff * (warpMat[0] * x + warpMat[1] * y + warpMat[2]);
const float ycoo = coeff * (warpMat[3] * x + warpMat[4] * y + warpMat[5]);
return make_float2(xcoo, ycoo);
}
};
struct PerspectiveMapPtrSz : PerspectiveMapPtr
{
int rows, cols;
};
template <> struct PtrTraits<PerspectiveMapPtrSz> : PtrTraitsBase<PerspectiveMapPtrSz, PerspectiveMapPtr>
{
};
__host__ static PerspectiveMapPtrSz perspectiveMap(Size dstSize, const GpuMat_<float>& warpMat)
{
CV_Assert( warpMat.rows == 3 && warpMat.cols == 3 );
CV_Assert( warpMat.isContinuous() );
PerspectiveMapPtrSz map;
map.warpMat = warpMat[0];
map.rows = dstSize.height;
map.cols = dstSize.width;
return map;
}
template <class SrcPtr>
__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, PerspectiveMapPtr>
warpPerspectivePtr(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
{
return remapPtr(src, perspectiveMap(dstSize, warpMat));
}
}}
#endif

View File

@ -0,0 +1,173 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_PTR2D_ZIP_HPP__
#define __OPENCV_CUDEV_PTR2D_ZIP_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "traits.hpp"
namespace cv { namespace cudev {
template <class PtrTuple> struct ZipPtr;
template <class Ptr0, class Ptr1> struct ZipPtr< tuple<Ptr0, Ptr1> > : tuple<Ptr0, Ptr1>
{
typedef tuple<typename PtrTraits<Ptr0>::value_type,
typename PtrTraits<Ptr1>::value_type> value_type;
typedef typename PtrTraits<Ptr0>::index_type index_type;
__host__ __device__ __forceinline__ ZipPtr() {}
__host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1>& t) : tuple<Ptr0, Ptr1>(t) {}
__device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
{
return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x));
}
};
template <class Ptr0, class Ptr1, class Ptr2> struct ZipPtr< tuple<Ptr0, Ptr1, Ptr2> > : tuple<Ptr0, Ptr1, Ptr2>
{
typedef tuple<typename PtrTraits<Ptr0>::value_type,
typename PtrTraits<Ptr1>::value_type,
typename PtrTraits<Ptr2>::value_type> value_type;
typedef typename PtrTraits<Ptr0>::index_type index_type;
__host__ __device__ __forceinline__ ZipPtr() {}
__host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1, Ptr2>& t) : tuple<Ptr0, Ptr1, Ptr2>(t) {}
__device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
{
return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x), cv::cudev::get<2>(*this)(y, x));
}
};
template <class Ptr0, class Ptr1, class Ptr2, class Ptr3> struct ZipPtr< tuple<Ptr0, Ptr1, Ptr2, Ptr3> > : tuple<Ptr0, Ptr1, Ptr2, Ptr3>
{
typedef tuple<typename PtrTraits<Ptr0>::value_type,
typename PtrTraits<Ptr1>::value_type,
typename PtrTraits<Ptr2>::value_type,
typename PtrTraits<Ptr3>::value_type> value_type;
typedef typename PtrTraits<Ptr0>::index_type index_type;
__host__ __device__ __forceinline__ ZipPtr() {}
__host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1, Ptr2, Ptr3>& t) : tuple<Ptr0, Ptr1, Ptr2, Ptr3>(t) {}
__device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
{
return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x), cv::cudev::get<2>(*this)(y, x), cv::cudev::get<3>(*this)(y, x));
}
};
template <class PtrTuple> struct ZipPtrSz : ZipPtr<PtrTuple>
{
int rows, cols;
__host__ __device__ __forceinline__ ZipPtrSz() {}
__host__ __device__ __forceinline__ ZipPtrSz(const PtrTuple& t) : ZipPtr<PtrTuple>(t) {}
};
template <class Ptr0, class Ptr1>
__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type> >
zipPtr(const Ptr0& ptr0, const Ptr1& ptr1)
{
const int rows = getRows(ptr0);
const int cols = getCols(ptr0);
CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type> >
z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1)));
z.rows = rows;
z.cols = cols;
return z;
}
template <class Ptr0, class Ptr1, class Ptr2>
__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type> >
zipPtr(const Ptr0& ptr0, const Ptr1& ptr1, const Ptr2& ptr2)
{
const int rows = getRows(ptr0);
const int cols = getCols(ptr0);
CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
CV_Assert( getRows(ptr2) == rows && getCols(ptr2) == cols );
ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type> >
z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1), shrinkPtr(ptr2)));
z.rows = rows;
z.cols = cols;
return z;
}
template <class Ptr0, class Ptr1, class Ptr2, class Ptr3>
__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type, typename PtrTraits<Ptr3>::ptr_type> >
zipPtr(const Ptr0& ptr0, const Ptr1& ptr1, const Ptr2& ptr2, const Ptr3& ptr3)
{
const int rows = getRows(ptr0);
const int cols = getCols(ptr0);
CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
CV_Assert( getRows(ptr2) == rows && getCols(ptr2) == cols );
CV_Assert( getRows(ptr3) == rows && getCols(ptr3) == cols );
ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type, typename PtrTraits<Ptr3>::ptr_type> >
z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1), shrinkPtr(ptr2), shrinkPtr(ptr3)));
z.rows = rows;
z.cols = cols;
return z;
}
template <class PtrTuple> struct PtrTraits< ZipPtrSz<PtrTuple> > : PtrTraitsBase<ZipPtrSz<PtrTuple>, ZipPtr<PtrTuple> >
{
};
}}
#endif

View File

@ -0,0 +1,156 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_ATOMIC_HPP__
#define __OPENCV_CUDEV_UTIL_ATOMIC_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
// atomicAdd
__device__ __forceinline__ int atomicAdd(int* address, int val)
{
return ::atomicAdd(address, val);
}
__device__ __forceinline__ uint atomicAdd(uint* address, uint val)
{
return ::atomicAdd(address, val);
}
__device__ __forceinline__ float atomicAdd(float* address, float val)
{
return ::atomicAdd(address, val);
}
__device__ static double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
// atomicMin
__device__ __forceinline__ int atomicMin(int* address, int val)
{
return ::atomicMin(address, val);
}
__device__ __forceinline__ uint atomicMin(uint* address, uint val)
{
return ::atomicMin(address, val);
}
__device__ static float atomicMin(float* address, float val)
{
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fminf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
}
__device__ static double atomicMin(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
}
// atomicMax
__device__ __forceinline__ int atomicMax(int* address, int val)
{
return ::atomicMax(address, val);
}
__device__ __forceinline__ uint atomicMax(uint* address, uint val)
{
return ::atomicMax(address, val);
}
__device__ static float atomicMax(float* address, float val)
{
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
}
__device__ static double atomicMax(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
}
}}
#endif

View File

@ -0,0 +1,175 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_TUPLE_DETAIL_HPP__
#define __OPENCV_CUDEV_UTIL_TUPLE_DETAIL_HPP__
#include <thrust/tuple.h>
namespace cv { namespace cudev {
namespace tuple_detail
{
using thrust::tuple;
using thrust::tuple_size;
using thrust::get;
using thrust::tuple_element;
using thrust::make_tuple;
using thrust::tie;
template <class Tuple, int SIZE, template <typename T> class CvtOp> struct ConvertTuple;
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 2, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 3, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 4, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 5, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 6, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
typename CvtOp<typename tuple_element<5, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 7, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
typename CvtOp<typename tuple_element<6, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 8, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
typename CvtOp<typename tuple_element<7, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 9, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
typename CvtOp<typename tuple_element<7, Tuple>::type>::type,
typename CvtOp<typename tuple_element<8, Tuple>::type>::type
> type;
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 10, CvtOp>
{
typedef tuple<
typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
typename CvtOp<typename tuple_element<7, Tuple>::type>::type,
typename CvtOp<typename tuple_element<8, Tuple>::type>::type,
typename CvtOp<typename tuple_element<9, Tuple>::type>::type
> type;
};
}
}}
#endif

View File

@ -0,0 +1,238 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_TYPE_TRAITS_DETAIL_HPP__
#define __OPENCV_CUDEV_UTIL_TYPE_TRAITS_DETAIL_HPP__
#include "../../common.hpp"
namespace cv { namespace cudev {
namespace type_traits_detail
{
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
// ReferenceTraits
template <class U> struct ReferenceTraits
{
enum { value = 0 };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = 1 };
typedef U type;
};
// PointerTraits
template <class U> struct PointerTraits
{
enum { value = 0 };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = 1 };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = 1 };
typedef U type;
};
// UnConst
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
// UnVolatile
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
// IsSimpleParameter
template <typename T> struct IsSimpleParameter
{
enum { value = IsIntegral<T>::value
|| IsFloat<T>::value
|| PointerTraits<typename ReferenceTraits<T>::type>::value};
};
// LargerDepth
template <bool, typename ThenType, typename ElseType> struct SelectIf
{
typedef ThenType type;
};
template <typename ThenType, typename ElseType> struct SelectIf<false, ThenType, ElseType>
{
typedef ElseType type;
};
template <typename A, typename B> struct LargerDepth
{
typedef typename SelectIf<sizeof(A) >= sizeof(B), A, B>::type type;
};
template <typename A> struct LargerDepth<A, float>
{
typedef float type;
};
template <typename A> struct LargerDepth<float, A>
{
typedef float type;
};
template <typename A> struct LargerDepth<A, double>
{
typedef double type;
};
template <typename A> struct LargerDepth<double, A>
{
typedef double type;
};
template <> struct LargerDepth<float, float>
{
typedef float type;
};
template <> struct LargerDepth<float, double>
{
typedef double type;
};
template <> struct LargerDepth<double, float>
{
typedef double type;
};
template <> struct LargerDepth<double, double>
{
typedef double type;
};
}
}}
#endif

View File

@ -0,0 +1,124 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_LIMITS_HPP__
#define __OPENCV_CUDEV_UTIL_LIMITS_HPP__
#include <limits.h>
#include <float.h>
#include "../common.hpp"
namespace cv { namespace cudev {
template <class T> struct numeric_limits;
template <> struct numeric_limits<bool>
{
__device__ __forceinline__ static bool min() { return false; }
__device__ __forceinline__ static bool max() { return true; }
static const bool is_signed = false;
};
template <> struct numeric_limits<schar>
{
__device__ __forceinline__ static schar min() { return SCHAR_MIN; }
__device__ __forceinline__ static schar max() { return SCHAR_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<uchar>
{
__device__ __forceinline__ static uchar min() { return 0; }
__device__ __forceinline__ static uchar max() { return UCHAR_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<short>
{
__device__ __forceinline__ static short min() { return SHRT_MIN; }
__device__ __forceinline__ static short max() { return SHRT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<ushort>
{
__device__ __forceinline__ static ushort min() { return 0; }
__device__ __forceinline__ static ushort max() { return USHRT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<int>
{
__device__ __forceinline__ static int min() { return INT_MIN; }
__device__ __forceinline__ static int max() { return INT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<uint>
{
__device__ __forceinline__ static uint min() { return 0; }
__device__ __forceinline__ static uint max() { return UINT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<float>
{
__device__ __forceinline__ static float min() { return FLT_MIN; }
__device__ __forceinline__ static float max() { return FLT_MAX; }
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
static const bool is_signed = true;
};
template <> struct numeric_limits<double>
{
__device__ __forceinline__ static double min() { return DBL_MIN; }
__device__ __forceinline__ static double max() { return DBL_MAX; }
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
static const bool is_signed = true;
};
}}
#endif

View File

@ -0,0 +1,264 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP__
#define __OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
template <typename T> __device__ __forceinline__ T saturate_cast(uchar v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(schar v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(ushort v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(short v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(uint v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(int v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(float v) { return T(v); }
template <typename T> __device__ __forceinline__ T saturate_cast(double v) { return T(v); }
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
uint res = 0;
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
uint res = 0;
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
uint res = 0;
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
uint res = 0;
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template <> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
uint res = 0;
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
uint res = 0;
uint vi = v;
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
uint res = 0;
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
uint res = 0;
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
uint res = 0;
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
uint res = 0;
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template <> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
uint res = 0;
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
ushort res = 0;
int vi = v;
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
ushort res = 0;
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
ushort res = 0;
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
ushort res = 0;
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
ushort res = 0;
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template <> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
ushort res = 0;
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
}
template <> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
short res = 0;
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ short saturate_cast<short>(int v)
{
short res = 0;
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
short res = 0;
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ short saturate_cast<short>(float v)
{
short res = 0;
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template <> __device__ __forceinline__ short saturate_cast<short>(double v)
{
short res = 0;
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
}
template <> __device__ __forceinline__ int saturate_cast<int>(uint v)
{
int res = 0;
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template <> __device__ __forceinline__ int saturate_cast<int>(double v)
{
return __double2int_rn(v);
}
template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template <> __device__ __forceinline__ uint saturate_cast<uint>(short v)
{
uint res = 0;
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template <> __device__ __forceinline__ uint saturate_cast<uint>(int v)
{
uint res = 0;
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
return __double2uint_rn(v);
}
}}
#endif

View File

@ -0,0 +1,913 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of NVIDIA Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_SIMD_FUNCTIONS_HPP__
#define __OPENCV_CUDEV_UTIL_SIMD_FUNCTIONS_HPP__
#include "../common.hpp"
/*
This header file contains inline functions that implement intra-word SIMD
operations, that are hardware accelerated on sm_3x (Kepler) GPUs. Efficient
emulation code paths are provided for earlier architectures (sm_1x, sm_2x)
to make the code portable across all GPUs supported by CUDA. The following
functions are currently implemented:
vadd2(a,b) per-halfword unsigned addition, with wrap-around: a + b
vsub2(a,b) per-halfword unsigned subtraction, with wrap-around: a - b
vabsdiff2(a,b) per-halfword unsigned absolute difference: |a - b|
vavg2(a,b) per-halfword unsigned average: (a + b) / 2
vavrg2(a,b) per-halfword unsigned rounded average: (a + b + 1) / 2
vseteq2(a,b) per-halfword unsigned comparison: a == b ? 1 : 0
vcmpeq2(a,b) per-halfword unsigned comparison: a == b ? 0xffff : 0
vsetge2(a,b) per-halfword unsigned comparison: a >= b ? 1 : 0
vcmpge2(a,b) per-halfword unsigned comparison: a >= b ? 0xffff : 0
vsetgt2(a,b) per-halfword unsigned comparison: a > b ? 1 : 0
vcmpgt2(a,b) per-halfword unsigned comparison: a > b ? 0xffff : 0
vsetle2(a,b) per-halfword unsigned comparison: a <= b ? 1 : 0
vcmple2(a,b) per-halfword unsigned comparison: a <= b ? 0xffff : 0
vsetlt2(a,b) per-halfword unsigned comparison: a < b ? 1 : 0
vcmplt2(a,b) per-halfword unsigned comparison: a < b ? 0xffff : 0
vsetne2(a,b) per-halfword unsigned comparison: a != b ? 1 : 0
vcmpne2(a,b) per-halfword unsigned comparison: a != b ? 0xffff : 0
vmax2(a,b) per-halfword unsigned maximum: max(a, b)
vmin2(a,b) per-halfword unsigned minimum: min(a, b)
vadd4(a,b) per-byte unsigned addition, with wrap-around: a + b
vsub4(a,b) per-byte unsigned subtraction, with wrap-around: a - b
vabsdiff4(a,b) per-byte unsigned absolute difference: |a - b|
vavg4(a,b) per-byte unsigned average: (a + b) / 2
vavrg4(a,b) per-byte unsigned rounded average: (a + b + 1) / 2
vseteq4(a,b) per-byte unsigned comparison: a == b ? 1 : 0
vcmpeq4(a,b) per-byte unsigned comparison: a == b ? 0xff : 0
vsetge4(a,b) per-byte unsigned comparison: a >= b ? 1 : 0
vcmpge4(a,b) per-byte unsigned comparison: a >= b ? 0xff : 0
vsetgt4(a,b) per-byte unsigned comparison: a > b ? 1 : 0
vcmpgt4(a,b) per-byte unsigned comparison: a > b ? 0xff : 0
vsetle4(a,b) per-byte unsigned comparison: a <= b ? 1 : 0
vcmple4(a,b) per-byte unsigned comparison: a <= b ? 0xff : 0
vsetlt4(a,b) per-byte unsigned comparison: a < b ? 1 : 0
vcmplt4(a,b) per-byte unsigned comparison: a < b ? 0xff : 0
vsetne4(a,b) per-byte unsigned comparison: a != b ? 1: 0
vcmpne4(a,b) per-byte unsigned comparison: a != b ? 0xff: 0
vmax4(a,b) per-byte unsigned maximum: max(a, b)
vmin4(a,b) per-byte unsigned minimum: min(a, b)
*/
namespace cv { namespace cudev {
// 2
__device__ __forceinline__ uint vadd2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s;
s = a ^ b; // sum bits
r = a + b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
r = r - s; // subtract out carry-out from low word
#endif
return r;
}
__device__ __forceinline__ uint vsub2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s;
s = a ^ b; // sum bits
r = a - b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // borrow to high word
r = r + s; // compensate for borrow from low word
#endif
return r;
}
__device__ __forceinline__ uint vabsdiff2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s, t, u, v;
s = a & 0x0000ffff; // extract low halfword
r = b & 0x0000ffff; // extract low halfword
u = ::max(r, s); // maximum of low halfwords
v = ::min(r, s); // minimum of low halfwords
s = a & 0xffff0000; // extract high halfword
r = b & 0xffff0000; // extract high halfword
t = ::max(r, s); // maximum of high halfwords
s = ::min(r, s); // minimum of high halfwords
r = u | t; // maximum of both halfwords
s = v | s; // minimum of both halfwords
r = r - s; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
__device__ __forceinline__ uint vavg2(uint a, uint b)
{
uint r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
s = s >> 1;
s = r + s;
return s;
}
__device__ __forceinline__ uint vavrg2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
uint s;
s = a ^ b;
r = a | b;
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
s = s >> 1;
r = r - s;
#endif
return r;
}
__device__ __forceinline__ uint vseteq2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
uint c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpeq2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vseteq2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetge2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpge2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetge2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetgt2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpgt2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetgt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetle2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmple2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetle2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetlt2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmplt2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetlt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetne2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
uint c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpne2(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetne2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vmax2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::max(r, s); // maximum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::max(r, s); // maximum of high halfwords
r = t | u; // combine halfword maximums
#endif
return r;
}
__device__ __forceinline__ uint vmin2(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::min(r, s); // minimum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::min(r, s); // minimum of high halfwords
r = t | u; // combine halfword minimums
#endif
return r;
}
// 4
__device__ __forceinline__ uint vadd4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s, t;
s = a ^ b; // sum bits
r = a & 0x7f7f7f7f; // clear msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // msb sum bits
r = r + t; // add without msbs, record carry-out in msbs
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
#endif /* CV_CUDEV_ARCH >= 300 */
return r;
}
__device__ __forceinline__ uint vsub4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s, t;
s = a ^ ~b; // inverted sum bits
r = a | 0x80808080; // set msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // inverted msb sum bits
r = r - t; // subtract w/o msbs, record inverted borrows in msb
r = r ^ s; // combine inverted msb sum bits and borrows
#endif
return r;
}
__device__ __forceinline__ uint vavg4(uint a, uint b)
{
uint r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
s = s >> 1;
s = r + s;
return s;
}
__device__ __forceinline__ uint vavrg4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
uint c;
c = a ^ b;
r = a | b;
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
c = c >> 1;
r = r - c;
#endif
return r;
}
__device__ __forceinline__ uint vseteq4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
uint c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r & ~c; // msb = 1, if r was 0x00
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpeq4(uint a, uint b)
{
uint r, t;
#if CV_CUDEV_ARCH >= 300
r = vseteq4(a, b);
t = r << 8; // convert bool
r = t - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
t = a ^ b; // 0x00 if a == b
r = t | 0x80808080; // set msbs, to catch carry out
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
r = t & ~r; // msb = 1, if t was 0x00
t = r >> 7; // build mask
t = r - t; // from
r = t | r; // msbs
#endif
return r;
}
__device__ __forceinline__ uint vsetle4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmple4(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetle4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetlt4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmplt4(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetlt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetge4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpge4(uint a, uint b)
{
uint r, s;
#if CV_CUDEV_ARCH >= 300
r = vsetge4(a, b);
s = r << 8; // convert bool
r = s - r; // to mask
#else
asm ("not.b32 %0,%0;" : "+r"(b));
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
r = r & 0x80808080; // msb = carry-outs
s = r >> 7; // build mask
s = r - s; // from
r = s | r; // msbs
#endif
return r;
}
__device__ __forceinline__ uint vsetgt4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpgt4(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetgt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vsetne4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
uint c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert to bool
#endif
return r;
}
__device__ __forceinline__ uint vcmpne4(uint a, uint b)
{
uint r, c;
#if CV_CUDEV_ARCH >= 300
r = vsetne4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
__device__ __forceinline__ uint vabsdiff4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a ^ b; //
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
r = s ^ r; // select a when b >= a, else select b => min(a,b)
r = s - r; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
__device__ __forceinline__ uint vmax4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r; // byte-wise unsigned maximum
}
__device__ __forceinline__ uint vmin4(uint a, uint b)
{
uint r = 0;
#if CV_CUDEV_ARCH >= 300
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif CV_CUDEV_ARCH >= 200
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
uint s;
s = vcmpge4(b, a); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r;
}
}}
#endif

View File

@ -0,0 +1,80 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_TUPLE_HPP__
#define __OPENCV_CUDEV_UTIL_TUPLE_HPP__
#include "../common.hpp"
#include "detail/tuple.hpp"
namespace cv { namespace cudev {
using tuple_detail::tuple;
using tuple_detail::tuple_size;
using tuple_detail::get;
using tuple_detail::tuple_element;
using tuple_detail::make_tuple;
using tuple_detail::tie;
template <typename T> struct TupleTraits
{
enum { is_tuple = 0 };
enum { size = 1 };
};
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9>
struct TupleTraits< tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >
{
enum { is_tuple = 1 };
enum { size = tuple_size< tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value };
};
template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple
{
typedef typename tuple_detail::ConvertTuple<Tuple, tuple_size<Tuple>::value, CvtOp>::type type;
};
}}
#endif

View File

@ -0,0 +1,169 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_TYPE_TRAITS_HPP__
#define __OPENCV_CUDEV_UTIL_TYPE_TRAITS_HPP__
#include "../common.hpp"
#include "vec_traits.hpp"
#include "detail/type_traits.hpp"
namespace cv { namespace cudev {
// NullType
struct NullType {};
// Int2Type
template <int A> struct Int2Type
{
enum { value = A };
};
// ArrayWrapper
template <typename T, int COUNT> struct ArrayWrapper
{
T array[COUNT];
};
// Log2 (compile time calculation)
template <int N, int CURRENT_VAL = N, int COUNT = 0> struct Log2
{
enum { value = Log2<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE };
};
template <int N, int COUNT> struct Log2<N, 0, COUNT>
{
enum { value = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1 };
};
// IsPowerOf2
template <int N> struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
// SelectIf
template <bool, typename ThenType, typename ElseType> struct SelectIf
{
typedef ThenType type;
};
template <typename ThenType, typename ElseType> struct SelectIf<false, ThenType, ElseType>
{
typedef ElseType type;
};
// EnableIf
template <bool, typename T = void> struct EnableIf {};
template <typename T> struct EnableIf<true, T> { typedef T type; };
// DisableIf
template <bool, typename T = void> struct DisableIf {};
template <typename T> struct DisableIf<false, T> { typedef T type; };
// TypesEquals
template <typename A, typename B> struct TypesEquals
{
enum { value = 0 };
};
template <typename A> struct TypesEquals<A, A>
{
enum { value = 1 };
};
// TypeTraits
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type non_const_type;
typedef typename type_traits_detail::UnVolatile<T>::type non_volatile_type;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type unqualified_type;
typedef typename type_traits_detail::PointerTraits<unqualified_type>::type pointee_type;
typedef typename type_traits_detail::ReferenceTraits<T>::type referred_type;
enum { is_const = type_traits_detail::UnConst<T>::value };
enum { is_volatile = type_traits_detail::UnVolatile<T>::value };
enum { is_reference = type_traits_detail::ReferenceTraits<unqualified_type>::value };
enum { is_pointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<unqualified_type>::type>::value };
enum { is_unsigned_int = type_traits_detail::IsUnsignedIntegral<unqualified_type>::value };
enum { is_signed_int = type_traits_detail::IsSignedIntergral<unqualified_type>::value };
enum { is_integral = type_traits_detail::IsIntegral<unqualified_type>::value };
enum { is_float = type_traits_detail::IsFloat<unqualified_type>::value };
enum { is_scalar = is_integral || is_float };
enum { is_vec = type_traits_detail::IsVec<unqualified_type>::value };
typedef typename SelectIf<type_traits_detail::IsSimpleParameter<unqualified_type>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type parameter_type;
};
// LargerType
template <typename A, typename B> struct LargerType
{
typedef typename SelectIf<
VecTraits<A>::cn != VecTraits<B>::cn,
void,
typename MakeVec<
typename type_traits_detail::LargerDepth<
typename VecTraits<A>::elem_type,
typename VecTraits<B>::elem_type
>::type,
VecTraits<A>::cn
>::type
>::type type;
};
}}
#endif

View File

@ -0,0 +1,923 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_VEC_MATH_HPP__
#define __OPENCV_CUDEV_UTIL_VEC_MATH_HPP__
#include "vec_traits.hpp"
#include "saturate_cast.hpp"
namespace cv { namespace cudev {
// saturate_cast
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
}
template<typename T> __device__ __forceinline__ T saturate_cast(const uchar1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const char1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const ushort1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const short1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uint1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const int1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const float1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const double1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uchar2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const char2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const ushort2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const short2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uint2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const int2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const float2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const double2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uchar3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const char3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const ushort3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const short3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uint3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const int3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const float3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const double3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uchar4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const char4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const ushort4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const short4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const uint4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const int4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const float4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
template<typename T> __device__ __forceinline__ T saturate_cast(const double4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
// unary operators
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(op (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
// unary functions
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
// binary operators (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(a.x op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
// binary operators (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(a.x op s); \
} \
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(s op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
} \
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
} \
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
} \
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
// binary function (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
// binary function (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
}}
#endif

View File

@ -0,0 +1,320 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_UTIL_VEC_TRAITS_HPP__
#define __OPENCV_CUDEV_UTIL_VEC_TRAITS_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
// MakeVec
template<typename T, int CN> struct MakeVec;
#define CV_CUDEV_MAKE_VEC_INST(elem_type) \
template<> struct MakeVec<elem_type, 1> { typedef elem_type type; }; \
template<> struct MakeVec<elem_type, 2> { typedef elem_type ## 2 type; }; \
template<> struct MakeVec<elem_type, 3> { typedef elem_type ## 3 type; }; \
template<> struct MakeVec<elem_type, 4> { typedef elem_type ## 4 type; };
CV_CUDEV_MAKE_VEC_INST(uchar)
CV_CUDEV_MAKE_VEC_INST(ushort)
CV_CUDEV_MAKE_VEC_INST(short)
CV_CUDEV_MAKE_VEC_INST(int)
CV_CUDEV_MAKE_VEC_INST(uint)
CV_CUDEV_MAKE_VEC_INST(float)
CV_CUDEV_MAKE_VEC_INST(double)
#undef CV_CUDEV_MAKE_VEC_INST
template<> struct MakeVec<schar, 1> { typedef char type; };
template<> struct MakeVec<schar, 2> { typedef char2 type; };
template<> struct MakeVec<schar, 3> { typedef char3 type; };
template<> struct MakeVec<schar, 4> { typedef char4 type; };
template<> struct MakeVec<bool, 1> { typedef uchar type; };
template<> struct MakeVec<bool, 2> { typedef uchar2 type; };
template<> struct MakeVec<bool, 3> { typedef uchar3 type; };
template<> struct MakeVec<bool, 4> { typedef uchar4 type; };
// VecTraits
template<typename T> struct VecTraits;
#define CV_CUDEV_VEC_TRAITS_INST(type) \
template <> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
__host__ __device__ __forceinline__ static type all(type v) {return v;} \
__host__ __device__ __forceinline__ static type make(type x) {return x;} \
__host__ __device__ __forceinline__ static type make(const type* v) {return *v;} \
}; \
template <> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
__host__ __device__ __forceinline__ static type ## 1 all(type v) {return make_ ## type ## 1(v);} \
__host__ __device__ __forceinline__ static type ## 1 make(type x) {return make_ ## type ## 1(x);} \
__host__ __device__ __forceinline__ static type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template <> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
__host__ __device__ __forceinline__ static type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
__host__ __device__ __forceinline__ static type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
__host__ __device__ __forceinline__ static type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template <> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
__host__ __device__ __forceinline__ static type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
__host__ __device__ __forceinline__ static type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
__host__ __device__ __forceinline__ static type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template <> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
__host__ __device__ __forceinline__ static type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
__host__ __device__ __forceinline__ static type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
__host__ __device__ __forceinline__ static type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
};
CV_CUDEV_VEC_TRAITS_INST(uchar)
CV_CUDEV_VEC_TRAITS_INST(ushort)
CV_CUDEV_VEC_TRAITS_INST(short)
CV_CUDEV_VEC_TRAITS_INST(int)
CV_CUDEV_VEC_TRAITS_INST(uint)
CV_CUDEV_VEC_TRAITS_INST(float)
CV_CUDEV_VEC_TRAITS_INST(double)
#undef CV_CUDEV_VEC_TRAITS_INST
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
__host__ __device__ __forceinline__ static schar all(schar v) {return v;}
__host__ __device__ __forceinline__ static schar make(schar x) {return x;}
__host__ __device__ __forceinline__ static schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
__host__ __device__ __forceinline__ static char1 all(schar v) {return make_char1(v);}
__host__ __device__ __forceinline__ static char1 make(schar x) {return make_char1(x);}
__host__ __device__ __forceinline__ static char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
__host__ __device__ __forceinline__ static char2 all(schar v) {return make_char2(v, v);}
__host__ __device__ __forceinline__ static char2 make(schar x, schar y) {return make_char2(x, y);}
__host__ __device__ __forceinline__ static char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
__host__ __device__ __forceinline__ static char3 all(schar v) {return make_char3(v, v, v);}
__host__ __device__ __forceinline__ static char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
__host__ __device__ __forceinline__ static char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
__host__ __device__ __forceinline__ static char4 all(schar v) {return make_char4(v, v, v, v);}
__host__ __device__ __forceinline__ static char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
__host__ __device__ __forceinline__ static char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
}}
// DataType
namespace cv {
template <> class DataType<uint>
{
public:
typedef uint value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_32S,
channels = 1,
fmt = (int)'i',
type = CV_MAKE_TYPE(depth, channels)
};
};
#define CV_CUDEV_DATA_TYPE_INST(_depth_type, _channel_num) \
template <> class DataType< _depth_type ## _channel_num > \
{ \
public: \
typedef _depth_type ## _channel_num value_type; \
typedef value_type work_type; \
typedef _depth_type channel_type; \
typedef value_type vec_type; \
enum { generic_type = 0, \
depth = DataType<channel_type>::depth, \
channels = _channel_num, \
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8), \
type = CV_MAKE_TYPE(depth, channels) \
}; \
};
CV_CUDEV_DATA_TYPE_INST(uchar, 1)
CV_CUDEV_DATA_TYPE_INST(uchar, 2)
CV_CUDEV_DATA_TYPE_INST(uchar, 3)
CV_CUDEV_DATA_TYPE_INST(uchar, 4)
CV_CUDEV_DATA_TYPE_INST(ushort, 1)
CV_CUDEV_DATA_TYPE_INST(ushort, 2)
CV_CUDEV_DATA_TYPE_INST(ushort, 3)
CV_CUDEV_DATA_TYPE_INST(ushort, 4)
CV_CUDEV_DATA_TYPE_INST(short, 1)
CV_CUDEV_DATA_TYPE_INST(short, 2)
CV_CUDEV_DATA_TYPE_INST(short, 3)
CV_CUDEV_DATA_TYPE_INST(short, 4)
CV_CUDEV_DATA_TYPE_INST(int, 1)
CV_CUDEV_DATA_TYPE_INST(int, 2)
CV_CUDEV_DATA_TYPE_INST(int, 3)
CV_CUDEV_DATA_TYPE_INST(int, 4)
CV_CUDEV_DATA_TYPE_INST(uint, 1)
CV_CUDEV_DATA_TYPE_INST(uint, 2)
CV_CUDEV_DATA_TYPE_INST(uint, 3)
CV_CUDEV_DATA_TYPE_INST(uint, 4)
CV_CUDEV_DATA_TYPE_INST(float, 1)
CV_CUDEV_DATA_TYPE_INST(float, 2)
CV_CUDEV_DATA_TYPE_INST(float, 3)
CV_CUDEV_DATA_TYPE_INST(float, 4)
CV_CUDEV_DATA_TYPE_INST(double, 1)
CV_CUDEV_DATA_TYPE_INST(double, 2)
CV_CUDEV_DATA_TYPE_INST(double, 3)
CV_CUDEV_DATA_TYPE_INST(double, 4)
#undef CV_CUDEV_DATA_TYPE_INST
template<> class DataType<char1>
{
public:
typedef char1 value_type;
typedef value_type work_type;
typedef schar channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 1,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKE_TYPE(depth, channels)
};
};
template<> class DataType<char2>
{
public:
typedef char2 value_type;
typedef value_type work_type;
typedef schar channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 2,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKE_TYPE(depth, channels)
};
};
template<> class DataType<char3>
{
public:
typedef char3 value_type;
typedef value_type work_type;
typedef schar channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 3,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKE_TYPE(depth, channels)
};
};
template<> class DataType<char4>
{
public:
typedef char4 value_type;
typedef value_type work_type;
typedef schar channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 4,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKE_TYPE(depth, channels)
};
};
}
#endif

View File

@ -0,0 +1,222 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDEV_WARP_REDUCE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
#include "../../warp/shuffle.hpp"
namespace cv { namespace cudev {
namespace warp_reduce_detail
{
// GetType
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
// For
template <int I, int N> struct For
{
template <class PointerTuple, class ValTuple>
__device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
{
get<I>(smem)[tid] = get<I>(val);
For<I + 1, N>::loadToSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple, class OpTuple>
__device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
{
typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
For<I + 1, N>::merge(smem, val, tid, delta, op);
}
#if CV_CUDEV_ARCH >= 300
template <class ValTuple, class OpTuple>
__device__ static void mergeShfl(const ValTuple& val, uint delta, uint width, const OpTuple& op)
{
typename GetType<typename tuple_element<I, ValTuple>::type>::type reg = shfl_down(get<I>(val), delta, width);
get<I>(val) = get<I>(op)(get<I>(val), reg);
For<I + 1, N>::mergeShfl(val, delta, width, op);
}
#endif
};
template <int N> struct For<N, N>
{
template <class PointerTuple, class ValTuple>
__device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ValTuple&, uint)
{
}
template <class PointerTuple, class ValTuple, class OpTuple>
__device__ __forceinline__ static void merge(const PointerTuple&, const ValTuple&, uint, uint, const OpTuple&)
{
}
#if CV_CUDEV_ARCH >= 300
template <class ValTuple, class OpTuple>
__device__ __forceinline__ static void mergeShfl(const ValTuple&, uint, uint, const OpTuple&)
{
}
#endif
};
// loadToSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
{
smem[tid] = val;
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid)
{
For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
}
// merge
template <typename T, class Op>
__device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op)
{
T reg = smem[tid + delta];
smem[tid] = val = op(val, reg);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid,
uint delta,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
}
// mergeShfl
#if CV_CUDEV_ARCH >= 300
template <typename T, class Op>
__device__ __forceinline__ void mergeShfl(T& val, uint delta, uint width, const Op& op)
{
T reg = shfl_down(val, delta, width);
val = op(val, reg);
}
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void mergeShfl(const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint delta,
uint width,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, tuple_size<tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
}
#endif
// WarpReductor
struct WarpReductor
{
template <typename Pointer, typename Reference, class Op>
__device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
{
#if CV_CUDEV_ARCH >= 300
(void) smem;
(void) tid;
mergeShfl(val, 16, 32, op);
mergeShfl(val, 8, 32, op);
mergeShfl(val, 4, 32, op);
mergeShfl(val, 2, 32, op);
mergeShfl(val, 1, 32, op);
#else
loadToSmem(smem, val, tid);
if (tid < 16)
{
merge(smem, val, tid, 16, op);
merge(smem, val, tid, 8, op);
merge(smem, val, tid, 4, op);
merge(smem, val, tid, 2, op);
merge(smem, val, tid, 1, op);
}
#endif
}
};
}
}}
#endif

View File

@ -0,0 +1,239 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__
#define __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
namespace cv { namespace cudev {
namespace warp_reduce_key_val_detail
{
// GetType
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
// For
template <int I, int N> struct For
{
template <class PointerTuple, class ReferenceTuple>
__device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
{
get<I>(smem)[tid] = get<I>(data);
For<I + 1, N>::loadToSmem(smem, data, tid);
}
template <class PointerTuple, class ReferenceTuple>
__device__ static void copy(const PointerTuple& svals, const ReferenceTuple& val, uint tid, uint delta)
{
get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
For<I + 1, N>::copy(svals, val, tid, delta);
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
__device__ static void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
const ValPointerTuple& svals, const ValReferenceTuple& val,
const CmpTuple& cmp,
uint tid, uint delta)
{
typename GetType<typename tuple_element<I, KeyPointerTuple>::type>::type reg = get<I>(skeys)[tid + delta];
if (get<I>(cmp)(reg, get<I>(key)))
{
get<I>(skeys)[tid] = get<I>(key) = reg;
get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
}
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
}
};
template <int N> struct For<N, N>
{
template <class PointerTuple, class ReferenceTuple>
__device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ReferenceTuple&, uint)
{
}
template <class PointerTuple, class ReferenceTuple>
__device__ __forceinline__ static void copy(const PointerTuple&, const ReferenceTuple&, uint, uint)
{
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
__device__ __forceinline__ static void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
const ValPointerTuple&, const ValReferenceTuple&,
const CmpTuple&,
uint, uint)
{
}
};
// loadToSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
{
smem[tid] = data;
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
uint tid)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
}
// copyVals
template <typename V>
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, uint tid, uint delta)
{
svals[tid] = val = svals[tid + delta];
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyVals(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid, uint delta)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
}
// merge
template <typename K, typename V, class Cmp>
__device__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, uint tid, uint delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ void merge(volatile K* skeys, K& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp, uint tid, uint delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void merge(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
uint tid, uint delta)
{
For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
}
// WarpReductor
struct WarpReductor
{
template <class KP, class KR, class VP, class VR, class Cmp>
__device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
{
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (tid < 16)
{
merge(skeys, key, svals, val, cmp, tid, 16);
merge(skeys, key, svals, val, cmp, tid, 8);
merge(skeys, key, svals, val, cmp, tid, 4);
merge(skeys, key, svals, val, cmp, tid, 2);
merge(skeys, key, svals, val, cmp, tid, 1);
}
}
};
}
}}
#endif

View File

@ -0,0 +1,206 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_REDUCE_HPP__
#define __OPENCV_CUDEV_WARP_REDUCE_HPP__
#include "../common.hpp"
#include "../util/tuple.hpp"
#include "detail/reduce.hpp"
#include "detail/reduce_key_val.hpp"
namespace cv { namespace cudev {
// warpReduce
template <typename T, class Op>
__device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op)
{
warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
uint tid,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
warp_reduce_detail::WarpReductor::template reduce<
const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
}
// warpReduceKeyVal
template <typename K, typename V, class Cmp>
__device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, uint tid, const Cmp& cmp)
{
warp_reduce_key_val_detail::WarpReductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid, const Cmp& cmp)
{
warp_reduce_key_val_detail::WarpReductor::template reduce<volatile K*, K&,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void warpReduceKeyVal(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
uint tid,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
{
warp_reduce_key_val_detail::WarpReductor::template reduce<
const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
>(skeys, key, svals, val, tid, cmp);
}
// smem_tuple
template <typename T0>
__device__ __forceinline__
tuple<volatile T0*>
smem_tuple(T0* t0)
{
return make_tuple((volatile T0*) t0);
}
template <typename T0, typename T1>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*>
smem_tuple(T0* t0, T1* t1)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1);
}
template <typename T0, typename T1, typename T2>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*>
smem_tuple(T0* t0, T1* t1, T2* t2)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
}
template <typename T0, typename T1, typename T2, typename T3>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
__device__ __forceinline__
tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
{
return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
}
}}
#endif

View File

@ -0,0 +1,99 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_SCAN_HPP__
#define __OPENCV_CUDEV_WARP_SCAN_HPP__
#include "../common.hpp"
#include "warp.hpp"
#include "shuffle.hpp"
namespace cv { namespace cudev {
template <typename T>
__device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
{
#if CV_CUDEV_ARCH >= 300
(void) smem;
(void) tid;
const uint laneId = Warp::laneId();
// scan on shufl functions
#pragma unroll
for (int i = 1; i <= (WARP_SIZE / 2); i *= 2)
{
const T val = shfl_up(data, i);
if (laneId >= i)
data += val;
}
return data;
#else
uint pos = 2 * tid - (tid & (WARP_SIZE - 1));
smem[pos] = 0;
pos += WARP_SIZE;
smem[pos] = data;
smem[pos] += smem[pos - 1];
smem[pos] += smem[pos - 2];
smem[pos] += smem[pos - 4];
smem[pos] += smem[pos - 8];
smem[pos] += smem[pos - 16];
return smem[pos];
#endif
}
template <typename T>
__device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint tid)
{
return warpScanInclusive(data, smem, tid) - data;
}
}}
#endif

View File

@ -0,0 +1,424 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_SHUFFLE_HPP__
#define __OPENCV_CUDEV_WARP_SHUFFLE_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
namespace cv { namespace cudev {
#if CV_CUDEV_ARCH >= 300
// shfl
__device__ __forceinline__ uchar shfl(uchar val, int srcLane, int width = warpSize)
{
return (uchar) __shfl((int) val, srcLane, width);
}
__device__ __forceinline__ schar shfl(schar val, int srcLane, int width = warpSize)
{
return (schar) __shfl((int) val, srcLane, width);
}
__device__ __forceinline__ ushort shfl(ushort val, int srcLane, int width = warpSize)
{
return (ushort) __shfl((int) val, srcLane, width);
}
__device__ __forceinline__ short shfl(short val, int srcLane, int width = warpSize)
{
return (short) __shfl((int) val, srcLane, width);
}
__device__ __forceinline__ int shfl(int val, int srcLane, int width = warpSize)
{
return __shfl(val, srcLane, width);
}
__device__ __forceinline__ uint shfl(uint val, int srcLane, int width = warpSize)
{
return (uint) __shfl((int) val, srcLane, width);
}
__device__ __forceinline__ float shfl(float val, int srcLane, int width = warpSize)
{
return __shfl(val, srcLane, width);
}
__device__ double shfl(double val, int srcLane, int width = warpSize)
{
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo, srcLane, width);
hi = __shfl(hi, srcLane, width);
return __hiloint2double(hi, lo);
}
#define CV_CUDEV_SHFL_VEC_INST(input_type) \
__device__ __forceinline__ input_type ## 1 shfl(const input_type ## 1 & val, int srcLane, int width = warpSize) \
{ \
return VecTraits<input_type ## 1>::make( \
shfl(val.x, srcLane, width) \
); \
} \
__device__ __forceinline__ input_type ## 2 shfl(const input_type ## 2 & val, int srcLane, int width = warpSize) \
{ \
return VecTraits<input_type ## 2>::make( \
shfl(val.x, srcLane, width), \
shfl(val.y, srcLane, width) \
); \
} \
__device__ __forceinline__ input_type ## 3 shfl(const input_type ## 3 & val, int srcLane, int width = warpSize) \
{ \
return VecTraits<input_type ## 3>::make( \
shfl(val.x, srcLane, width), \
shfl(val.y, srcLane, width), \
shfl(val.z, srcLane, width) \
); \
} \
__device__ __forceinline__ input_type ## 4 shfl(const input_type ## 4 & val, int srcLane, int width = warpSize) \
{ \
return VecTraits<input_type ## 4>::make( \
shfl(val.x, srcLane, width), \
shfl(val.y, srcLane, width), \
shfl(val.z, srcLane, width), \
shfl(val.w, srcLane, width) \
); \
}
CV_CUDEV_SHFL_VEC_INST(uchar)
CV_CUDEV_SHFL_VEC_INST(char)
CV_CUDEV_SHFL_VEC_INST(ushort)
CV_CUDEV_SHFL_VEC_INST(short)
CV_CUDEV_SHFL_VEC_INST(uint)
CV_CUDEV_SHFL_VEC_INST(int)
CV_CUDEV_SHFL_VEC_INST(float)
CV_CUDEV_SHFL_VEC_INST(double)
#undef CV_CUDEV_SHFL_VEC_INST
// shfl_up
__device__ __forceinline__ uchar shfl_up(uchar val, uint delta, int width = warpSize)
{
return (uchar) __shfl_up((int) val, delta, width);
}
__device__ __forceinline__ schar shfl_up(schar val, uint delta, int width = warpSize)
{
return (schar) __shfl_up((int) val, delta, width);
}
__device__ __forceinline__ ushort shfl_up(ushort val, uint delta, int width = warpSize)
{
return (ushort) __shfl_up((int) val, delta, width);
}
__device__ __forceinline__ short shfl_up(short val, uint delta, int width = warpSize)
{
return (short) __shfl_up((int) val, delta, width);
}
__device__ __forceinline__ int shfl_up(int val, uint delta, int width = warpSize)
{
return __shfl_up(val, delta, width);
}
__device__ __forceinline__ uint shfl_up(uint val, uint delta, int width = warpSize)
{
return (uint) __shfl_up((int) val, delta, width);
}
__device__ __forceinline__ float shfl_up(float val, uint delta, int width = warpSize)
{
return __shfl_up(val, delta, width);
}
__device__ double shfl_up(double val, uint delta, int width = warpSize)
{
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo, delta, width);
hi = __shfl_up(hi, delta, width);
return __hiloint2double(hi, lo);
}
#define CV_CUDEV_SHFL_UP_VEC_INST(input_type) \
__device__ __forceinline__ input_type ## 1 shfl_up(const input_type ## 1 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 1>::make( \
shfl_up(val.x, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 2 shfl_up(const input_type ## 2 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 2>::make( \
shfl_up(val.x, delta, width), \
shfl_up(val.y, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 3 shfl_up(const input_type ## 3 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 3>::make( \
shfl_up(val.x, delta, width), \
shfl_up(val.y, delta, width), \
shfl_up(val.z, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 4 shfl_up(const input_type ## 4 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 4>::make( \
shfl_up(val.x, delta, width), \
shfl_up(val.y, delta, width), \
shfl_up(val.z, delta, width), \
shfl_up(val.w, delta, width) \
); \
}
CV_CUDEV_SHFL_UP_VEC_INST(uchar)
CV_CUDEV_SHFL_UP_VEC_INST(char)
CV_CUDEV_SHFL_UP_VEC_INST(ushort)
CV_CUDEV_SHFL_UP_VEC_INST(short)
CV_CUDEV_SHFL_UP_VEC_INST(uint)
CV_CUDEV_SHFL_UP_VEC_INST(int)
CV_CUDEV_SHFL_UP_VEC_INST(float)
CV_CUDEV_SHFL_UP_VEC_INST(double)
#undef CV_CUDEV_SHFL_UP_VEC_INST
// shfl_down
__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
{
return (uchar) __shfl_down((int) val, delta, width);
}
__device__ __forceinline__ schar shfl_down(schar val, uint delta, int width = warpSize)
{
return (schar) __shfl_down((int) val, delta, width);
}
__device__ __forceinline__ ushort shfl_down(ushort val, uint delta, int width = warpSize)
{
return (ushort) __shfl_down((int) val, delta, width);
}
__device__ __forceinline__ short shfl_down(short val, uint delta, int width = warpSize)
{
return (short) __shfl_down((int) val, delta, width);
}
__device__ __forceinline__ int shfl_down(int val, uint delta, int width = warpSize)
{
return __shfl_down(val, delta, width);
}
__device__ __forceinline__ uint shfl_down(uint val, uint delta, int width = warpSize)
{
return (uint) __shfl_down((int) val, delta, width);
}
__device__ __forceinline__ float shfl_down(float val, uint delta, int width = warpSize)
{
return __shfl_down(val, delta, width);
}
__device__ double shfl_down(double val, uint delta, int width = warpSize)
{
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo, delta, width);
hi = __shfl_down(hi, delta, width);
return __hiloint2double(hi, lo);
}
#define CV_CUDEV_SHFL_DOWN_VEC_INST(input_type) \
__device__ __forceinline__ input_type ## 1 shfl_down(const input_type ## 1 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 1>::make( \
shfl_down(val.x, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 2 shfl_down(const input_type ## 2 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 2>::make( \
shfl_down(val.x, delta, width), \
shfl_down(val.y, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 3 shfl_down(const input_type ## 3 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 3>::make( \
shfl_down(val.x, delta, width), \
shfl_down(val.y, delta, width), \
shfl_down(val.z, delta, width) \
); \
} \
__device__ __forceinline__ input_type ## 4 shfl_down(const input_type ## 4 & val, uint delta, int width = warpSize) \
{ \
return VecTraits<input_type ## 4>::make( \
shfl_down(val.x, delta, width), \
shfl_down(val.y, delta, width), \
shfl_down(val.z, delta, width), \
shfl_down(val.w, delta, width) \
); \
}
CV_CUDEV_SHFL_DOWN_VEC_INST(uchar)
CV_CUDEV_SHFL_DOWN_VEC_INST(char)
CV_CUDEV_SHFL_DOWN_VEC_INST(ushort)
CV_CUDEV_SHFL_DOWN_VEC_INST(short)
CV_CUDEV_SHFL_DOWN_VEC_INST(uint)
CV_CUDEV_SHFL_DOWN_VEC_INST(int)
CV_CUDEV_SHFL_DOWN_VEC_INST(float)
CV_CUDEV_SHFL_DOWN_VEC_INST(double)
#undef CV_CUDEV_SHFL_DOWN_VEC_INST
// shfl_xor
__device__ __forceinline__ uchar shfl_xor(uchar val, int laneMask, int width = warpSize)
{
return (uchar) __shfl_xor((int) val, laneMask, width);
}
__device__ __forceinline__ schar shfl_xor(schar val, int laneMask, int width = warpSize)
{
return (schar) __shfl_xor((int) val, laneMask, width);
}
__device__ __forceinline__ ushort shfl_xor(ushort val, int laneMask, int width = warpSize)
{
return (ushort) __shfl_xor((int) val, laneMask, width);
}
__device__ __forceinline__ short shfl_xor(short val, int laneMask, int width = warpSize)
{
return (short) __shfl_xor((int) val, laneMask, width);
}
__device__ __forceinline__ int shfl_xor(int val, int laneMask, int width = warpSize)
{
return __shfl_xor(val, laneMask, width);
}
__device__ __forceinline__ uint shfl_xor(uint val, int laneMask, int width = warpSize)
{
return (uint) __shfl_xor((int) val, laneMask, width);
}
__device__ __forceinline__ float shfl_xor(float val, int laneMask, int width = warpSize)
{
return __shfl_xor(val, laneMask, width);
}
__device__ double shfl_xor(double val, int laneMask, int width = warpSize)
{
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_xor(lo, laneMask, width);
hi = __shfl_xor(hi, laneMask, width);
return __hiloint2double(hi, lo);
}
#define CV_CUDEV_SHFL_XOR_VEC_INST(input_type) \
__device__ __forceinline__ input_type ## 1 shfl_xor(const input_type ## 1 & val, int laneMask, int width = warpSize) \
{ \
return VecTraits<input_type ## 1>::make( \
shfl_xor(val.x, laneMask, width) \
); \
} \
__device__ __forceinline__ input_type ## 2 shfl_xor(const input_type ## 2 & val, int laneMask, int width = warpSize) \
{ \
return VecTraits<input_type ## 2>::make( \
shfl_xor(val.x, laneMask, width), \
shfl_xor(val.y, laneMask, width) \
); \
} \
__device__ __forceinline__ input_type ## 3 shfl_xor(const input_type ## 3 & val, int laneMask, int width = warpSize) \
{ \
return VecTraits<input_type ## 3>::make( \
shfl_xor(val.x, laneMask, width), \
shfl_xor(val.y, laneMask, width), \
shfl_xor(val.z, laneMask, width) \
); \
} \
__device__ __forceinline__ input_type ## 4 shfl_xor(const input_type ## 4 & val, int laneMask, int width = warpSize) \
{ \
return VecTraits<input_type ## 4>::make( \
shfl_xor(val.x, laneMask, width), \
shfl_xor(val.y, laneMask, width), \
shfl_xor(val.z, laneMask, width), \
shfl_xor(val.w, laneMask, width) \
); \
}
CV_CUDEV_SHFL_XOR_VEC_INST(uchar)
CV_CUDEV_SHFL_XOR_VEC_INST(char)
CV_CUDEV_SHFL_XOR_VEC_INST(ushort)
CV_CUDEV_SHFL_XOR_VEC_INST(short)
CV_CUDEV_SHFL_XOR_VEC_INST(uint)
CV_CUDEV_SHFL_XOR_VEC_INST(int)
CV_CUDEV_SHFL_XOR_VEC_INST(float)
CV_CUDEV_SHFL_XOR_VEC_INST(double)
#undef CV_CUDEV_SHFL_XOR_VEC_INST
#endif // CV_CUDEV_ARCH >= 300
}}
#endif

View File

@ -0,0 +1,122 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_WARP_WARP_HPP__
#define __OPENCV_CUDEV_WARP_WARP_HPP__
#include "../common.hpp"
namespace cv { namespace cudev {
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE
};
struct Warp
{
__device__ __forceinline__ static uint laneId()
{
uint ret;
asm("mov.u32 %0, %laneid;" : "=r"(ret));
return ret;
}
__device__ __forceinline__ static uint warpId()
{
const uint tid = (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
return tid / WARP_SIZE;
}
};
template <class It, typename T>
__device__ __forceinline__ void warpFill(It beg, It end, const T& value)
{
for(It t = beg + Warp::laneId(); t < end; t += WARP_SIZE)
*t = value;
}
template <class InIt, class OutIt>
__device__ __forceinline__ OutIt warpCopy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + Warp::laneId(); t < end; t += WARP_SIZE, out += WARP_SIZE)
*out = *t;
return out;
}
template <class InIt, class OutIt, class UnOp>
__device__ __forceinline__ OutIt warpTransform(InIt beg, InIt end, OutIt out, const UnOp& op)
{
for(InIt t = beg + Warp::laneId(); t < end; t += WARP_SIZE, out += WARP_SIZE)
*out = op(*t);
return out;
}
template <class InIt1, class InIt2, class OutIt, class BinOp>
__device__ __forceinline__ OutIt warpTransform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, const BinOp& op)
{
uint lane = Warp::laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += WARP_SIZE, t2 += WARP_SIZE, out += WARP_SIZE)
*out = op(*t1, *t2);
return out;
}
template<typename OutIt, typename T>
__device__ __forceinline__ void warpYota(OutIt beg, OutIt end, T value)
{
uint lane = Warp::laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += WARP_SIZE, value += WARP_SIZE)
*t = value;
}
}}
#endif

View File

@ -0,0 +1,33 @@
set(test_deps opencv_core opencv_imgproc opencv_highgui opencv_ts ${OPENCV_MODULE_opencv_ts_DEPS})
ocv_check_dependencies(${test_deps})
if(OCV_DEPENDENCIES_FOUND)
set(the_target "opencv_test_${name}")
ocv_module_include_directories("${test_deps}" "${the_module}")
file(GLOB test_srcs "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.cu")
file(GLOB test_hdrs "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs})
CUDA_ADD_EXECUTABLE(${the_target} ${OPENCV_TEST_${the_module}_SOURCES})
target_link_libraries(${the_target} ${test_deps} ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES})
add_dependencies(opencv_tests ${the_target})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
)
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy")
endif()
enable_testing()
get_target_property(LOC ${the_target} LOCATION)
add_test(${the_target} "${LOC}")
endif()

View File

@ -0,0 +1,168 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
////////////////////////////////////////////////////////////////////////////////
// SqrtTest
template <typename T>
class SqrtTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst = sqrt_(d_src);
Mat dst_gold;
cv::sqrt(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_expr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = sqrt_(d_src1 * d_src2);
Mat dst_gold;
cv::multiply(src1, src2, dst_gold);
cv::sqrt(dst_gold, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(SqrtTest, float);
TYPED_TEST(SqrtTest, GpuMat)
{
SqrtTest<TypeParam>::test_gpumat();
}
TYPED_TEST(SqrtTest, Expr)
{
SqrtTest<TypeParam>::test_expr();
}
////////////////////////////////////////////////////////////////////////////////
// MagnitudeTest
template <typename T>
class MagnitudeTest : public ::testing::Test
{
public:
void test_accuracy()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst1 = hypot_(d_src1, d_src2);
GpuMat_<T> dst2 = magnitude_(d_src1, d_src2);
GpuMat_<T> dst3 = sqrt_(sqr_(d_src1) + sqr_(d_src2));
EXPECT_MAT_NEAR(dst1, dst2, 1e-4);
EXPECT_MAT_NEAR(dst2, dst3, 0.0);
}
};
TYPED_TEST_CASE(MagnitudeTest, float);
TYPED_TEST(MagnitudeTest, Accuracy)
{
MagnitudeTest<TypeParam>::test_accuracy();
}
////////////////////////////////////////////////////////////////////////////////
// PowTest
template <typename T>
class PowTest : public ::testing::Test
{
public:
void test_accuracy()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst1 = pow_(d_src, 0.5);
GpuMat_<T> dst2 = sqrt_(d_src);
EXPECT_MAT_NEAR(dst1, dst2, 1e-5);
}
};
TYPED_TEST_CASE(PowTest, float);
TYPED_TEST(PowTest, Accuracy)
{
PowTest<TypeParam>::test_accuracy();
}

View File

@ -0,0 +1,395 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
typedef ::testing::Types<short, int, float> SignedTypes;
////////////////////////////////////////////////////////////////////////////////
// UnaryMinusTest
template <typename T>
class UnaryMinusTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst = -d_src;
Mat dst_gold;
src.convertTo(dst_gold, src.depth(), -1);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_globptr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GlobPtrSz<T> d_src_ptr = d_src;
GpuMat_<T> dst = -d_src_ptr;
Mat dst_gold;
src.convertTo(dst_gold, src.depth(), -1);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_texptr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
Texture<T> tex_src(d_src);
GpuMat_<T> dst = -tex_src;
Mat dst_gold;
src.convertTo(dst_gold, src.depth(), -1);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_expr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = -(d_src1 + d_src2);
Mat dst_gold;
cv::add(src1, src2, dst_gold);
dst_gold.convertTo(dst_gold, dst_gold.depth(), -1);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(UnaryMinusTest, SignedTypes);
TYPED_TEST(UnaryMinusTest, GpuMat)
{
UnaryMinusTest<TypeParam>::test_gpumat();
}
TYPED_TEST(UnaryMinusTest, GlobPtrSz)
{
UnaryMinusTest<TypeParam>::test_globptr();
}
TYPED_TEST(UnaryMinusTest, TexturePtr)
{
UnaryMinusTest<TypeParam>::test_texptr();
}
TYPED_TEST(UnaryMinusTest, Expr)
{
UnaryMinusTest<TypeParam>::test_expr();
}
////////////////////////////////////////////////////////////////////////////////
// PlusTest
template <typename T>
class PlusTest : public ::testing::Test
{
public:
void test_gpumat_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = d_src1 + d_src2;
Mat dst_gold;
cv::add(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_texptr_scalar()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
Texture<T> tex_src(d_src);
GpuMat_<T> dst = tex_src + static_cast<T>(5);
Mat dst_gold;
cv::add(src, 5, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_expr_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
Mat src3 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2), d_src3(src3);
GpuMat_<T> dst = d_src1 + d_src2 + d_src3;
Mat dst_gold;
cv::add(src1, src2, dst_gold);
cv::add(dst_gold, src3, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_scalar_expr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = static_cast<T>(5) + (d_src1 + d_src2);
Mat dst_gold;
cv::add(src1, src2, dst_gold);
cv::add(dst_gold, 5, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(PlusTest, AllTypes);
TYPED_TEST(PlusTest, GpuMat_GpuMat)
{
PlusTest<TypeParam>::test_gpumat_gpumat();
}
TYPED_TEST(PlusTest, TexturePtr_Scalar)
{
PlusTest<TypeParam>::test_texptr_scalar();
}
TYPED_TEST(PlusTest, Expr_GpuMat)
{
PlusTest<TypeParam>::test_expr_gpumat();
}
TYPED_TEST(PlusTest, Scalar_Expr)
{
PlusTest<TypeParam>::test_scalar_expr();
}
////////////////////////////////////////////////////////////////////////////////
// MinusTest
template <typename T>
class MinusTest : public ::testing::Test
{
public:
void test_gpumat_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = d_src1 - d_src2;
Mat dst_gold;
cv::subtract(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_texptr_scalar()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
Texture<T> tex_src(d_src);
GpuMat_<T> dst = tex_src - static_cast<T>(5);
Mat dst_gold;
cv::subtract(src, 5, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_expr_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
Mat src3 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2), d_src3(src3);
GpuMat_<T> dst = (d_src1 + d_src2) - d_src3;
Mat dst_gold;
cv::add(src1, src2, dst_gold);
cv::subtract(dst_gold, src3, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_scalar_expr()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = static_cast<T>(5) - (d_src1 + d_src2);
Mat dst_gold;
cv::add(src1, src2, dst_gold);
cv::subtract(5, dst_gold, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(MinusTest, SignedTypes);
TYPED_TEST(MinusTest, GpuMat_GpuMat)
{
MinusTest<TypeParam>::test_gpumat_gpumat();
}
TYPED_TEST(MinusTest, TexturePtr_Scalar)
{
MinusTest<TypeParam>::test_texptr_scalar();
}
TYPED_TEST(MinusTest, Expr_GpuMat)
{
MinusTest<TypeParam>::test_expr_gpumat();
}
TYPED_TEST(MinusTest, Scalar_Expr)
{
MinusTest<TypeParam>::test_scalar_expr();
}
////////////////////////////////////////////////////////////////////////////////
// AbsDiffTest
template <typename T>
class AbsDiffTest : public ::testing::Test
{
public:
void test_accuracy()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst1 = absdiff_(d_src1, d_src2);
GpuMat_<T> dst2 = abs_(d_src1 - d_src2);
EXPECT_MAT_NEAR(dst1, dst2, 0.0);
}
};
TYPED_TEST_CASE(AbsDiffTest, SignedTypes);
TYPED_TEST(AbsDiffTest, Accuracy)
{
AbsDiffTest<TypeParam>::test_accuracy();
}

View File

@ -0,0 +1,146 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
typedef ::testing::Types<uchar, ushort, short, int> IntTypes;
////////////////////////////////////////////////////////////////////////////////
// BitNotTest
template <typename T>
class BitNotTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst = ~d_src;
Mat dst_gold;
cv::bitwise_not(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(BitNotTest, IntTypes);
TYPED_TEST(BitNotTest, GpuMat)
{
BitNotTest<TypeParam>::test_gpumat();
}
////////////////////////////////////////////////////////////////////////////////
// BitAndTest
template <typename T>
class BitAndTest : public ::testing::Test
{
public:
void test_gpumat_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = d_src1 & d_src2;
Mat dst_gold;
cv::bitwise_and(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(BitAndTest, IntTypes);
TYPED_TEST(BitAndTest, GpuMat_GpuMat)
{
BitAndTest<TypeParam>::test_gpumat_gpumat();
}
////////////////////////////////////////////////////////////////////////////////
// LShiftTest
template <typename T>
class LShiftTest : public ::testing::Test
{
public:
void test_accuracy()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst1 = d_src << 2;
GpuMat_<T> dst2 = d_src * 4;
EXPECT_MAT_NEAR(dst1, dst2, 0.0);
}
};
TYPED_TEST_CASE(LShiftTest, int);
TYPED_TEST(LShiftTest, Accuracy)
{
LShiftTest<TypeParam>::test_accuracy();
}

View File

@ -0,0 +1,151 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
////////////////////////////////////////////////////////////////////////////////
// LessTest
template <typename T>
class LessTest : public ::testing::Test
{
public:
void test_gpumat_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<uchar> dst = (d_src1 < d_src2) * 255;
Mat dst_gold;
cv::compare(src1, src2, dst_gold, CMP_LT);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(LessTest, AllTypes);
TYPED_TEST(LessTest, GpuMat_GpuMat)
{
LessTest<TypeParam>::test_gpumat_gpumat();
}
////////////////////////////////////////////////////////////////////////////////
// MinTest
template <typename T>
class MinTest : public ::testing::Test
{
public:
void test_gpumat_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src1 = randomMat(size, type);
Mat src2 = randomMat(size, type);
GpuMat_<T> d_src1(src1), d_src2(src2);
GpuMat_<T> dst = min_(d_src1, d_src2);
Mat dst_gold;
cv::min(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(MinTest, AllTypes);
TYPED_TEST(MinTest, GpuMat_GpuMat)
{
MinTest<TypeParam>::test_gpumat_gpumat();
}
////////////////////////////////////////////////////////////////////////////////
// ThreshBinaryTest
typedef ::testing::Types<uchar, short, float> ThreshTypes;
template <typename T>
class ThreshBinaryTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst = threshBinary_(d_src, 128, 0);
Mat dst_gold;
cv::threshold(src, dst_gold, 128, 0, THRESH_BINARY);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(ThreshBinaryTest, ThreshTypes);
TYPED_TEST(ThreshBinaryTest, GpuMat)
{
ThreshBinaryTest<TypeParam>::test_gpumat();
}

View File

@ -0,0 +1,180 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
namespace cv {
enum {
COLOR_BGR2BGR = COLOR_BGR2RGB,
COLOR_BGR2LRGB = COLOR_BGR2RGB,
COLOR_BGR2LBGR = COLOR_BGR2RGB
};
}
#define CVT_COLOR_TEST(src_space, dst_space, src_cn, dst_cn) \
TEST(CvtColor, src_space ## _to_ ## dst_space) \
{ \
const Size size = randomSize(100, 400); \
Mat bgrb = randomMat(size, CV_8UC3); \
Mat srcb; \
cv::cvtColor(bgrb, srcb, COLOR_BGR ## 2 ## src_space, src_cn); \
GpuMat_<SelectIf<src_cn == 1, uchar, uchar ## src_cn>::type> d_srcb(srcb); \
GpuMat_<SelectIf<dst_cn == 1, uchar, uchar ## dst_cn>::type> dstb = src_space ## _to_ ## dst_space ## _(d_srcb); \
Mat dstb_gold; \
cv::cvtColor(srcb, dstb_gold, COLOR_ ## src_space ## 2 ## dst_space); \
EXPECT_MAT_NEAR(dstb_gold, dstb, 1.0); \
Mat bgrf = randomMat(size, CV_32FC3, 0, 1); \
Mat srcf; \
cv::cvtColor(bgrf, srcf, COLOR_BGR ## 2 ## src_space, src_cn); \
GpuMat_<SelectIf<src_cn == 1, float, float ## src_cn>::type> d_srcf(srcf); \
GpuMat_<SelectIf<dst_cn == 1, float, float ## dst_cn>::type> dstf = src_space ## _to_ ## dst_space ## _(d_srcf); \
Mat dstf_gold; \
cv::cvtColor(srcf, dstf_gold, COLOR_ ## src_space ## 2 ## dst_space); \
EXPECT_MAT_NEAR(dstf_gold, dstf, 1.0); \
}
// RGB <-> BGR
CVT_COLOR_TEST(BGR, RGB, 3, 3)
CVT_COLOR_TEST(BGR, BGRA, 3, 4)
CVT_COLOR_TEST(BGR, RGBA, 3, 4)
CVT_COLOR_TEST(BGRA, BGR, 4, 3)
CVT_COLOR_TEST(BGRA, RGB, 4, 3)
CVT_COLOR_TEST(BGRA, RGBA, 4, 4)
// RGB <-> Gray
CVT_COLOR_TEST(BGR, GRAY, 3, 1)
CVT_COLOR_TEST(RGB, GRAY, 3, 1)
CVT_COLOR_TEST(BGRA, GRAY, 4, 1)
CVT_COLOR_TEST(RGBA, GRAY, 4, 1)
CVT_COLOR_TEST(GRAY, BGR, 1, 3)
CVT_COLOR_TEST(GRAY, BGRA, 1, 4)
// RGB <-> YUV
CVT_COLOR_TEST(RGB, YUV, 3, 3)
CVT_COLOR_TEST(BGR, YUV, 3, 3)
CVT_COLOR_TEST(YUV, RGB, 3, 3)
CVT_COLOR_TEST(YUV, BGR, 3, 3)
// RGB <-> YCrCb
CVT_COLOR_TEST(RGB, YCrCb, 3, 3)
CVT_COLOR_TEST(BGR, YCrCb, 3, 3)
CVT_COLOR_TEST(YCrCb, RGB, 3, 3)
CVT_COLOR_TEST(YCrCb, BGR, 3, 3)
// RGB <-> XYZ
CVT_COLOR_TEST(RGB, XYZ, 3, 3)
CVT_COLOR_TEST(BGR, XYZ, 3, 3)
CVT_COLOR_TEST(XYZ, RGB, 3, 3)
CVT_COLOR_TEST(XYZ, BGR, 3, 3)
// RGB <-> HSV
CVT_COLOR_TEST(RGB, HSV, 3, 3)
CVT_COLOR_TEST(BGR, HSV, 3, 3)
CVT_COLOR_TEST(HSV, RGB, 3, 3)
CVT_COLOR_TEST(HSV, BGR, 3, 3)
CVT_COLOR_TEST(RGB, HSV_FULL, 3, 3)
CVT_COLOR_TEST(BGR, HSV_FULL, 3, 3)
CVT_COLOR_TEST(HSV, RGB_FULL, 3, 3)
CVT_COLOR_TEST(HSV, BGR_FULL, 3, 3)
// RGB <-> HLS
CVT_COLOR_TEST(RGB, HLS, 3, 3)
CVT_COLOR_TEST(BGR, HLS, 3, 3)
CVT_COLOR_TEST(HLS, RGB, 3, 3)
CVT_COLOR_TEST(HLS, BGR, 3, 3)
CVT_COLOR_TEST(RGB, HLS_FULL, 3, 3)
CVT_COLOR_TEST(BGR, HLS_FULL, 3, 3)
CVT_COLOR_TEST(HLS, RGB_FULL, 3, 3)
CVT_COLOR_TEST(HLS, BGR_FULL, 3, 3)
// RGB <-> Lab
CVT_COLOR_TEST(RGB, Lab, 3, 3)
CVT_COLOR_TEST(BGR, Lab, 3, 3)
CVT_COLOR_TEST(Lab, RGB, 3, 3)
CVT_COLOR_TEST(Lab, BGR, 3, 3)
CVT_COLOR_TEST(LRGB, Lab, 3, 3)
CVT_COLOR_TEST(LBGR, Lab, 3, 3)
CVT_COLOR_TEST(Lab, LRGB, 3, 3)
CVT_COLOR_TEST(Lab, LBGR, 3, 3)
// RGB <-> Luv
CVT_COLOR_TEST(RGB, Luv, 3, 3)
CVT_COLOR_TEST(BGR, Luv, 3, 3)
CVT_COLOR_TEST(Luv, RGB, 3, 3)
CVT_COLOR_TEST(Luv, BGR, 3, 3)
CVT_COLOR_TEST(LRGB, Luv, 3, 3)
CVT_COLOR_TEST(LBGR, Luv, 3, 3)
CVT_COLOR_TEST(Luv, LRGB, 3, 3)
CVT_COLOR_TEST(Luv, LBGR, 3, 3)

View File

@ -0,0 +1,83 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
////////////////////////////////////////////////////////////////////////////////
// CvtTest
template <typename T>
class CvtTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
GpuMat_<T> d_src(src);
GpuMat_<T> dst = cvt_<T>(cvt_<float>(d_src) * 2.0f - 10.0f);
Mat dst_gold;
src.convertTo(dst_gold, src.depth(), 2, -10);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(CvtTest, AllTypes);
TYPED_TEST(CvtTest, GpuMat)
{
CvtTest<TypeParam>::test_gpumat();
}

View File

@ -0,0 +1,109 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
TEST(Sobel, Accuracy)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
Texture<uchar> tex_src(d_src);
GpuMat_<short> dx = sobelX_(cvt_<int>(tex_src));
GpuMat_<short> dy = sobelY_(cvt_<int>(tex_src));
Mat dx_gold, dy_gold;
cv::Sobel(src, dx_gold, CV_16S, 1, 0, 3, 1, 0, BORDER_REPLICATE);
cv::Sobel(src, dy_gold, CV_16S, 0, 1, 3, 1, 0, BORDER_REPLICATE);
EXPECT_MAT_NEAR(dx_gold, dx, 0.0);
EXPECT_MAT_NEAR(dy_gold, dy, 0.0);
}
TEST(Scharr, Accuracy)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
Texture<uchar> tex_src(d_src);
GpuMat_<short> dx = scharrX_(cvt_<int>(tex_src));
GpuMat_<short> dy = scharrY_(cvt_<int>(tex_src));
Mat dx_gold, dy_gold;
cv::Scharr(src, dx_gold, CV_16S, 1, 0, 1, 0, BORDER_REPLICATE);
cv::Scharr(src, dy_gold, CV_16S, 0, 1, 1, 0, BORDER_REPLICATE);
EXPECT_MAT_NEAR(dx_gold, dx, 0.0);
EXPECT_MAT_NEAR(dy_gold, dy, 0.0);
}
TEST(Laplacian, Accuracy)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
Texture<uchar> tex_src(d_src);
GpuMat_<short> dst1 = laplacian_<1>(cvt_<int>(tex_src));
GpuMat_<short> dst3 = laplacian_<3>(cvt_<int>(tex_src));
Mat dst1_gold, dst3_gold;
cv::Laplacian(src, dst1_gold, CV_16S, 1, 1, 0, BORDER_REPLICATE);
cv::Laplacian(src, dst3_gold, CV_16S, 3, 1, 0, BORDER_REPLICATE);
EXPECT_MAT_NEAR(dst1_gold, dst1, 0.0);
EXPECT_MAT_NEAR(dst3_gold, dst3, 0.0);
}

View File

@ -0,0 +1,103 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
TEST(Integral, _8u)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uint> dst = integral_(d_src);
Mat dst_gold;
cv::integral(src, dst_gold);
dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(Integral, _32f)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
GpuMat_<float> d_src(src);
GpuMat_<float> dst = integral_(d_src);
Mat dst_gold;
cv::integral(src, dst_gold, CV_32F);
dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
ASSERT_PRED_FORMAT2(cvtest::MatComparator(1e-5, 0), dst_gold, Mat(dst));
}
TEST(Integral, _8u_opt)
{
const Size size(640, 480);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uint> dst = integral_(d_src);
Mat dst_gold;
cv::integral(src, dst_gold);
dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
}

View File

@ -0,0 +1,82 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
////////////////////////////////////////////////////////////////////////////////
// LutTest
template <typename T>
class LutTest : public ::testing::Test
{
public:
void test_gpumat()
{
const Size size = randomSize(100, 400);
const int type = DataType<T>::type;
Mat src = randomMat(size, type);
Mat tbl = randomMat(Size(256, 1), type);
GpuMat_<T> d_src(src), d_tbl(tbl);
GpuMat_<T> dst = lut_(d_src, d_tbl);
Mat dst_gold;
cv::LUT(src, tbl, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(LutTest, uchar);
TYPED_TEST(LutTest, GpuMat)
{
LutTest<TypeParam>::test_gpumat();
}

View File

@ -0,0 +1,46 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/ts.hpp"
CV_TEST_MAIN("cv")

View File

@ -0,0 +1,55 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/ts.hpp"
#include "opencv2/ts/gpu_test.hpp"
#endif

View File

@ -0,0 +1,81 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
TEST(PyrDown, _8uc1)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = pyrDown_(d_src);
Mat dst_gold;
cv::pyrDown(src, dst_gold);
ASSERT_MAT_NEAR(dst_gold, dst, 1.0);
}
TEST(PyrUp, _32fc4)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC4);
GpuMat_<float4> d_src(src);
GpuMat_<float4> dst = pyrDown_(d_src);
Mat dst_gold;
cv::pyrDown(src, dst_gold);
ASSERT_MAT_NEAR(dst_gold, dst, 1e-4);
}

View File

@ -0,0 +1,312 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
TEST(Sum, GpuMat)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<float> dst = sum_(d_src);
float res;
dst.download(_OutputArray(&res, 1));
Scalar dst_gold = cv::sum(src);
ASSERT_FLOAT_EQ(static_cast<float>(dst_gold[0]), res);
}
TEST(Sum, Expr)
{
const Size size = randomSize(100, 400);
Mat src1 = randomMat(size, CV_32FC1, 0, 1);
Mat src2 = randomMat(size, CV_32FC1, 0, 1);
GpuMat_<float> d_src1(src1), d_src2(src2);
GpuMat_<float> dst = sum_(abs_(d_src1 - d_src2));
float res;
dst.download(_OutputArray(&res, 1));
Scalar dst_gold = cv::norm(src1, src2, NORM_L1);
ASSERT_FLOAT_EQ(static_cast<float>(dst_gold[0]), res);
}
TEST(MinVal, GpuMat)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<float> dst = minVal_(d_src);
float res;
dst.download(_OutputArray(&res, 1));
double res_gold;
cv::minMaxLoc(src, &res_gold, 0);
ASSERT_FLOAT_EQ(static_cast<float>(res_gold), res);
}
TEST(MaxVal, Expr)
{
const Size size = randomSize(100, 400);
Mat src1 = randomMat(size, CV_32SC1);
Mat src2 = randomMat(size, CV_32SC1);
GpuMat_<int> d_src1(src1), d_src2(src2);
GpuMat_<float> dst = maxVal_(abs_(d_src1 - d_src2));
float res;
dst.download(_OutputArray(&res, 1));
double res_gold = cv::norm(src1, src2, NORM_INF);
ASSERT_FLOAT_EQ(static_cast<float>(res_gold), res);
}
TEST(MinMaxVal, GpuMat)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<float> dst = minMaxVal_(d_src);
float res[2];
dst.download(Mat(1, 2, CV_32FC1, res));
double res_gold[2];
cv::minMaxLoc(src, &res_gold[0], &res_gold[1]);
ASSERT_FLOAT_EQ(static_cast<float>(res_gold[0]), res[0]);
ASSERT_FLOAT_EQ(static_cast<float>(res_gold[1]), res[1]);
}
TEST(NonZeroCount, Accuracy)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1, 0, 5);
GpuMat_<uchar> d_src(src);
GpuMat_<int> dst1 = countNonZero_(d_src);
GpuMat_<int> dst2 = sum_(cvt_<int>(d_src) != 0);
EXPECT_MAT_NEAR(dst1, dst2, 0.0);
}
TEST(ReduceToRow, Sum)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<int> dst = reduceToRow_<Sum<int> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 0, REDUCE_SUM, CV_32S);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(ReduceToRow, Avg)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<float> dst = reduceToRow_<Avg<float> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 0, REDUCE_AVG, CV_32F);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
}
TEST(ReduceToRow, Min)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = reduceToRow_<Min<uchar> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 0, REDUCE_MIN);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(ReduceToRow, Max)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = reduceToRow_<Max<uchar> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 0, REDUCE_MAX);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(ReduceToColumn, Sum)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<int> dst = reduceToColumn_<Sum<int> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_SUM, CV_32S);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(ReduceToColumn, Avg)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<float> dst = reduceToColumn_<Avg<float> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_AVG, CV_32F);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
}
TEST(ReduceToColumn, Min)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = reduceToColumn_<Min<uchar> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_MIN);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(ReduceToColumn, Max)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = reduceToColumn_<Max<uchar> >(d_src);
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_MAX);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
static void calcHistGold(const cv::Mat& src, cv::Mat& hist)
{
hist.create(1, 256, CV_32SC1);
hist.setTo(cv::Scalar::all(0));
int* hist_row = hist.ptr<int>();
for (int y = 0; y < src.rows; ++y)
{
const uchar* src_row = src.ptr(y);
for (int x = 0; x < src.cols; ++x)
++hist_row[src_row[x]];
}
}
TEST(Histogram, GpuMat)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<int> dst = histogram_<256>(d_src);
Mat dst_gold;
calcHistGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}

View File

@ -0,0 +1,181 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
////////////////////////////////////////////////////////////////////////////////
// MergeTest
template <typename T>
class MergeTest : public ::testing::Test
{
public:
void test_c2()
{
const Size size = randomSize(100, 400);
const int src_type = DataType<T>::type;
Mat src1 = randomMat(size, src_type);
Mat src2 = randomMat(size, src_type);
GpuMat_<T> d_src1(src1);
GpuMat_<T> d_src2(src2);
GpuMat_<typename MakeVec<T, 2>::type> dst;
gridMerge(zipPtr(d_src1, d_src2), dst);
Mat dst_gold;
Mat srcs[] = {src1, src2};
cv::merge(srcs, 2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
void test_c3()
{
const Size size = randomSize(100, 400);
const int src_type = DataType<T>::type;
Mat src1 = randomMat(size, src_type);
Mat src2 = randomMat(size, src_type);
Mat src3 = randomMat(size, src_type);
GpuMat_<T> d_src1(src1);
GpuMat_<T> d_src2(src2);
GpuMat_<T> d_src3(src3);
GpuMat_<typename MakeVec<T, 3>::type> dst;
gridMerge(zipPtr(d_src1, d_src2, d_src3), dst);
Mat dst_gold;
Mat srcs[] = {src1, src2, src3};
cv::merge(srcs, 3, dst_gold);
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
}
};
TYPED_TEST_CASE(MergeTest, AllTypes);
TYPED_TEST(MergeTest, C2)
{
MergeTest<TypeParam>::test_c2();
}
TYPED_TEST(MergeTest, C3)
{
MergeTest<TypeParam>::test_c3();
}
////////////////////////////////////////////////////////////////////////////////
// SplitTest
template <typename T>
class SplitTest : public ::testing::Test
{
public:
void test_c3()
{
const Size size = randomSize(100, 400);
const int src_type = CV_MAKE_TYPE(DataType<T>::depth, 3);
Mat src = randomMat(size, src_type);
GpuMat_<typename MakeVec<T, 3>::type> d_src(src);
GpuMat_<T> dst1, dst2, dst3;
gridSplit(d_src, tie(dst1, dst2, dst3));
std::vector<Mat> dst;
cv::split(src, dst);
ASSERT_MAT_NEAR(dst[0], dst1, 0.0);
ASSERT_MAT_NEAR(dst[1], dst2, 0.0);
ASSERT_MAT_NEAR(dst[2], dst3, 0.0);
}
void test_c4()
{
const Size size = randomSize(100, 400);
const int src_type = CV_MAKE_TYPE(DataType<T>::depth, 4);
Mat src = randomMat(size, src_type);
GpuMat_<typename MakeVec<T, 4>::type> d_src(src);
GpuMat_<T> dst1, dst2, dst3, dst4;
gridSplit(d_src, tie(dst1, dst2, dst3, dst4));
std::vector<Mat> dst;
cv::split(src, dst);
ASSERT_MAT_NEAR(dst[0], dst1, 0.0);
ASSERT_MAT_NEAR(dst[1], dst2, 0.0);
ASSERT_MAT_NEAR(dst[2], dst3, 0.0);
ASSERT_MAT_NEAR(dst[3], dst4, 0.0);
}
};
TYPED_TEST_CASE(SplitTest, AllTypes);
TYPED_TEST(SplitTest, C3)
{
SplitTest<TypeParam>::test_c3();
}
TYPED_TEST(SplitTest, C4)
{
SplitTest<TypeParam>::test_c4();
}

View File

@ -0,0 +1,256 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
// remap
enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
static void generateMap(Mat& mapx, Mat& mapy, int remapMode)
{
for (int j = 0; j < mapx.rows; ++j)
{
for (int i = 0; i < mapx.cols; ++i)
{
switch (remapMode)
{
case HALF_SIZE:
if (i > mapx.cols*0.25 && i < mapx.cols*0.75 && j > mapx.rows*0.25 && j < mapx.rows*0.75)
{
mapx.at<float>(j,i) = 2.f * (i - mapx.cols * 0.25f) + 0.5f;
mapy.at<float>(j,i) = 2.f * (j - mapx.rows * 0.25f) + 0.5f;
}
else
{
mapx.at<float>(j,i) = 0.f;
mapy.at<float>(j,i) = 0.f;
}
break;
case UPSIDE_DOWN:
mapx.at<float>(j,i) = static_cast<float>(i);
mapy.at<float>(j,i) = static_cast<float>(mapx.rows - j);
break;
case REFLECTION_X:
mapx.at<float>(j,i) = static_cast<float>(mapx.cols - i);
mapy.at<float>(j,i) = static_cast<float>(j);
break;
case REFLECTION_BOTH:
mapx.at<float>(j,i) = static_cast<float>(mapx.cols - i);
mapy.at<float>(j,i) = static_cast<float>(mapx.rows - j);
break;
} // end of switch
}
}
}
static void test_remap(int remapMode)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
Mat mapx(size, CV_32FC1);
Mat mapy(size, CV_32FC1);
generateMap(mapx, mapy, remapMode);
GpuMat_<float> d_src(src);
GpuMat_<float> d_mapx(mapx);
GpuMat_<float> d_mapy(mapy);
GpuMat_<float> dst = remap_(interNearest(brdReplicate(d_src)), d_mapx, d_mapy);
Mat dst_gold;
cv::remap(src, dst_gold, mapx, mapy, INTER_NEAREST, BORDER_REPLICATE);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(Remap, HALF_SIZE)
{
test_remap(HALF_SIZE);
}
TEST(Remap, UPSIDE_DOWN)
{
test_remap(UPSIDE_DOWN);
}
TEST(Remap, REFLECTION_X)
{
test_remap(REFLECTION_X);
}
TEST(Remap, REFLECTION_BOTH)
{
test_remap(REFLECTION_BOTH);
}
// resize
TEST(Resize, Upscale)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
GpuMat_<float> d_src(src);
Texture<float> tex_src(d_src);
GpuMat_<float> dst1 = resize_(interCubic(tex_src), 2, 2);
Mat mapx(size.height * 2, size.width * 2, CV_32FC1);
Mat mapy(size.height * 2, size.width * 2, CV_32FC1);
for (int y = 0; y < mapx.rows; ++y)
{
for (int x = 0; x < mapx.cols; ++x)
{
mapx.at<float>(y, x) = static_cast<float>(x / 2);
mapy.at<float>(y, x) = static_cast<float>(y / 2);
}
}
GpuMat_<float> d_mapx(mapx);
GpuMat_<float> d_mapy(mapy);
GpuMat_<float> dst2 = remap_(interCubic(brdReplicate(d_src)), d_mapx, d_mapy);
EXPECT_MAT_NEAR(dst1, dst2, 0.0);
}
TEST(Resize, Downscale)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
const float fx = 1.0f / 3.0f;
const float fy = 1.0f / 3.0f;
GpuMat_<float> d_src(src);
Texture<float> tex_src(d_src);
GpuMat_<float> dst1 = resize_(interArea(tex_src, Size(3, 3)), fx, fy);
Mat mapx(cv::saturate_cast<int>(size.height * fy), cv::saturate_cast<int>(size.width * fx), CV_32FC1);
Mat mapy(cv::saturate_cast<int>(size.height * fy), cv::saturate_cast<int>(size.width * fx), CV_32FC1);
for (int y = 0; y < mapx.rows; ++y)
{
for (int x = 0; x < mapx.cols; ++x)
{
mapx.at<float>(y, x) = x / fx;
mapy.at<float>(y, x) = y / fy;
}
}
GpuMat_<float> d_mapx(mapx);
GpuMat_<float> d_mapy(mapy);
GpuMat_<float> dst2 = remap_(interArea(brdReplicate(d_src), Size(3, 3)), d_mapx, d_mapy);
EXPECT_MAT_NEAR(dst1, dst2, 0.0);
}
// warpAffine & warpPerspective
Mat createAffineTransfomMatrix(Size srcSize, float angle, bool perspective)
{
cv::Mat M(perspective ? 3 : 2, 3, CV_32FC1);
{
M.at<float>(0, 0) = std::cos(angle); M.at<float>(0, 1) = -std::sin(angle); M.at<float>(0, 2) = static_cast<float>(srcSize.width / 2);
M.at<float>(1, 0) = std::sin(angle); M.at<float>(1, 1) = std::cos(angle); M.at<float>(1, 2) = 0.0f;
}
if (perspective)
{
M.at<float>(2, 0) = 0.0f ; M.at<float>(2, 1) = 0.0f ; M.at<float>(2, 2) = 1.0f;
}
return M;
}
TEST(WarpAffine, Rotation)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
Mat M = createAffineTransfomMatrix(size, static_cast<float>(CV_PI / 4), false);
GpuMat_<float> d_src(src);
GpuMat_<float> d_M;
createContinuous(M.size(), M.type(), d_M);
d_M.upload(M);
GpuMat_<float> dst = warpAffine_(interNearest(brdConstant(d_src)), size, d_M);
Mat dst_gold;
cv::warpAffine(src, dst_gold, M, size, INTER_NEAREST | WARP_INVERSE_MAP);
EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-3);
}
TEST(WarpPerspective, Rotation)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC1, 0, 1);
Mat M = createAffineTransfomMatrix(size, static_cast<float>(CV_PI / 4), true);
GpuMat_<float> d_src(src);
GpuMat_<float> d_M;
createContinuous(M.size(), M.type(), d_M);
d_M.upload(M);
GpuMat_<float> dst = warpPerspective_(interNearest(brdConstant(d_src)), size, d_M);
Mat dst_gold;
cv::warpPerspective(src, dst_gold, M, size, INTER_NEAREST | WARP_INVERSE_MAP);
EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-3);
}

View File

@ -0,0 +1,81 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
using namespace cvtest;
TEST(Transpose, _8uc1)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_8UC1);
GpuMat_<uchar> d_src(src);
GpuMat_<uchar> dst = transpose_(d_src);
Mat dst_gold;
cv::transpose(src, dst_gold);
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST(Transpose, _32fc3)
{
const Size size = randomSize(100, 400);
Mat src = randomMat(size, CV_32FC3);
GpuMat_<float3> d_src(src);
GpuMat_<float3> dst = transpose_(d_src);
Mat dst_gold;
cv::transpose(src, dst_gold);
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
}