2010-12-20 17:07:19 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
|
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "opencv2/gpu/device/vecmath.hpp"
|
2011-01-24 18:11:02 +08:00
|
|
|
#include "opencv2/gpu/device/transform.hpp"
|
2011-07-21 16:47:44 +08:00
|
|
|
#include "opencv2/gpu/device/limits_gpu.hpp"
|
2011-01-24 18:11:02 +08:00
|
|
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
2010-12-20 17:07:19 +08:00
|
|
|
#include "internal_shared.hpp"
|
|
|
|
|
|
|
|
using namespace cv::gpu;
|
|
|
|
using namespace cv::gpu::device;
|
|
|
|
|
|
|
|
namespace cv { namespace gpu { namespace mathfunc
|
|
|
|
{
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Compare
|
|
|
|
|
|
|
|
template <typename T1, typename T2>
|
|
|
|
struct NotEqual
|
|
|
|
{
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2)
|
2010-12-20 17:07:19 +08:00
|
|
|
{
|
|
|
|
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T1, typename T2>
|
2011-05-31 16:31:10 +08:00
|
|
|
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
2010-12-20 17:07:19 +08:00
|
|
|
{
|
|
|
|
NotEqual<T1, T2> op;
|
2011-05-31 16:31:10 +08:00
|
|
|
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, stream);
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
2011-05-31 16:31:10 +08:00
|
|
|
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
2010-12-20 17:07:19 +08:00
|
|
|
{
|
2011-05-31 16:31:10 +08:00
|
|
|
compare_ne<uint, uint>(src1, src2, dst, stream);
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
2011-05-31 16:31:10 +08:00
|
|
|
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
2010-12-20 17:07:19 +08:00
|
|
|
{
|
2011-05-31 16:31:10 +08:00
|
|
|
compare_ne<float, float>(src1, src2, dst, stream);
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Unary bitwise logical matrix operations
|
|
|
|
|
|
|
|
enum { UN_OP_NOT };
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
struct UnOp;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnOp<T, UN_OP_NOT>
|
|
|
|
{
|
2011-06-14 19:27:32 +08:00
|
|
|
static __device__ __forceinline__ T call(T v) { return ~v; }
|
2010-12-20 17:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <int opid>
|
|
|
|
__global__ void bitwiseUnOpKernel(int rows, int width, const PtrStep src, PtrStep dst)
|
|
|
|
{
|
|
|
|
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
|
|
|
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
|
|
|
|
if (y < rows)
|
|
|
|
{
|
|
|
|
uchar* dst_ptr = dst.ptr(y) + x;
|
|
|
|
const uchar* src_ptr = src.ptr(y) + x;
|
|
|
|
if (x + sizeof(uint) - 1 < width)
|
|
|
|
{
|
|
|
|
*(uint*)dst_ptr = UnOp<uint, opid>::call(*(uint*)src_ptr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const uchar* src_end = src.ptr(y) + width;
|
|
|
|
while (src_ptr < src_end)
|
|
|
|
{
|
|
|
|
*dst_ptr++ = UnOp<uchar, opid>::call(*src_ptr++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <int opid>
|
|
|
|
void bitwiseUnOp(int rows, int width, const PtrStep src, PtrStep dst,
|
|
|
|
cudaStream_t stream)
|
|
|
|
{
|
|
|
|
dim3 threads(16, 16);
|
|
|
|
dim3 grid(divUp(width, threads.x * sizeof(uint)),
|
|
|
|
divUp(rows, threads.y));
|
|
|
|
|
|
|
|
bitwiseUnOpKernel<opid><<<grid, threads>>>(rows, width, src, dst);
|
2011-02-14 23:50:17 +08:00
|
|
|
cudaSafeCall( cudaGetLastError() );
|
2010-12-20 17:07:19 +08:00
|
|
|
|
|
|
|
if (stream == 0)
|
2011-05-31 16:31:10 +08:00
|
|
|
cudaSafeCall( cudaDeviceSynchronize() );
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
__global__ void bitwiseUnOpKernel(int rows, int cols, int cn, const PtrStep src,
|
|
|
|
const PtrStep mask, PtrStep dst)
|
|
|
|
{
|
|
|
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
|
|
|
|
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
|
|
|
{
|
|
|
|
T* dst_row = (T*)dst.ptr(y);
|
|
|
|
const T* src_row = (const T*)src.ptr(y);
|
|
|
|
|
|
|
|
dst_row[x] = UnOp<T, opid>::call(src_row[x]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
void bitwiseUnOp(int rows, int cols, int cn, const PtrStep src,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
dim3 threads(16, 16);
|
|
|
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
|
|
|
|
|
|
|
bitwiseUnOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src, mask, dst);
|
2011-02-14 23:50:17 +08:00
|
|
|
cudaSafeCall( cudaGetLastError() );
|
2010-12-20 17:07:19 +08:00
|
|
|
|
|
|
|
if (stream == 0)
|
2011-05-31 16:31:10 +08:00
|
|
|
cudaSafeCall( cudaDeviceSynchronize() );
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn,
|
|
|
|
const PtrStep src, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseUnOp<UN_OP_NOT>(rows, cols * elem_size1 * cn, src, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseUnOp<T, UN_OP_NOT>(rows, cols * cn, cn, src, mask, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void bitwiseMaskNotCaller<uchar>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskNotCaller<ushort>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskNotCaller<uint>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Binary bitwise logical matrix operations
|
|
|
|
|
|
|
|
enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR };
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
struct BinOp;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct BinOp<T, BIN_OP_OR>
|
|
|
|
{
|
2011-06-14 19:27:32 +08:00
|
|
|
static __device__ __forceinline__ T call(T a, T b) { return a | b; }
|
2010-12-20 17:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct BinOp<T, BIN_OP_AND>
|
|
|
|
{
|
2011-06-14 19:27:32 +08:00
|
|
|
static __device__ __forceinline__ T call(T a, T b) { return a & b; }
|
2010-12-20 17:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct BinOp<T, BIN_OP_XOR>
|
|
|
|
{
|
2011-06-14 19:27:32 +08:00
|
|
|
static __device__ __forceinline__ T call(T a, T b) { return a ^ b; }
|
2010-12-20 17:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <int opid>
|
|
|
|
__global__ void bitwiseBinOpKernel(int rows, int width, const PtrStep src1,
|
|
|
|
const PtrStep src2, PtrStep dst)
|
|
|
|
{
|
|
|
|
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
|
|
|
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
|
|
|
|
if (y < rows)
|
|
|
|
{
|
|
|
|
uchar* dst_ptr = dst.ptr(y) + x;
|
|
|
|
const uchar* src1_ptr = src1.ptr(y) + x;
|
|
|
|
const uchar* src2_ptr = src2.ptr(y) + x;
|
|
|
|
|
|
|
|
if (x + sizeof(uint) - 1 < width)
|
|
|
|
{
|
|
|
|
*(uint*)dst_ptr = BinOp<uint, opid>::call(*(uint*)src1_ptr, *(uint*)src2_ptr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const uchar* src1_end = src1.ptr(y) + width;
|
|
|
|
while (src1_ptr < src1_end)
|
|
|
|
{
|
|
|
|
*dst_ptr++ = BinOp<uchar, opid>::call(*src1_ptr++, *src2_ptr++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <int opid>
|
|
|
|
void bitwiseBinOp(int rows, int width, const PtrStep src1, const PtrStep src2,
|
|
|
|
PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
dim3 threads(16, 16);
|
|
|
|
dim3 grid(divUp(width, threads.x * sizeof(uint)), divUp(rows, threads.y));
|
|
|
|
|
|
|
|
bitwiseBinOpKernel<opid><<<grid, threads>>>(rows, width, src1, src2, dst);
|
2011-02-14 23:50:17 +08:00
|
|
|
cudaSafeCall( cudaGetLastError() );
|
2010-12-20 17:07:19 +08:00
|
|
|
|
|
|
|
if (stream == 0)
|
2011-05-31 16:31:10 +08:00
|
|
|
cudaSafeCall( cudaDeviceSynchronize() );
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
__global__ void bitwiseBinOpKernel(
|
|
|
|
int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
|
|
|
|
const PtrStep mask, PtrStep dst)
|
|
|
|
{
|
|
|
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
|
|
|
|
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
|
|
|
{
|
|
|
|
T* dst_row = (T*)dst.ptr(y);
|
|
|
|
const T* src1_row = (const T*)src1.ptr(y);
|
|
|
|
const T* src2_row = (const T*)src2.ptr(y);
|
|
|
|
|
|
|
|
dst_row[x] = BinOp<T, opid>::call(src1_row[x], src2_row[x]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T, int opid>
|
|
|
|
void bitwiseBinOp(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
dim3 threads(16, 16);
|
|
|
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
bitwiseBinOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src1, src2, mask, dst);
|
|
|
|
cudaSafeCall( cudaGetLastError() );
|
2010-12-20 17:07:19 +08:00
|
|
|
|
|
|
|
if (stream == 0)
|
2011-05-31 16:31:10 +08:00
|
|
|
cudaSafeCall( cudaDeviceSynchronize() );
|
2010-12-20 17:07:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
|
|
|
|
const PtrStep src2, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<BIN_OP_OR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<T, BIN_OP_OR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void bitwiseMaskOrCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskOrCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskOrCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
|
|
|
|
|
|
|
|
void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
|
|
|
|
const PtrStep src2, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<BIN_OP_AND>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<T, BIN_OP_AND>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void bitwiseMaskAndCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskAndCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskAndCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
|
|
|
|
|
|
|
|
void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
|
|
|
|
const PtrStep src2, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<BIN_OP_XOR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
|
|
|
|
const PtrStep mask, PtrStep dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
bitwiseBinOp<T, BIN_OP_XOR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void bitwiseMaskXorCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskXorCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
template void bitwiseMaskXorCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
|
|
|
|
|
2010-12-20 17:51:25 +08:00
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// min/max
|
|
|
|
|
|
|
|
struct MinOp
|
|
|
|
{
|
|
|
|
template <typename T>
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(T a, T b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return min(a, b);
|
|
|
|
}
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ float operator()(float a, float b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return fmin(a, b);
|
|
|
|
}
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ double operator()(double a, double b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return fmin(a, b);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct MaxOp
|
|
|
|
{
|
|
|
|
template <typename T>
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(T a, T b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return max(a, b);
|
|
|
|
}
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ float operator()(float a, float b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return fmax(a, b);
|
|
|
|
}
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ double operator()(double a, double b)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
return fmax(a, b);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ScalarMinOp
|
|
|
|
{
|
|
|
|
T s;
|
|
|
|
|
|
|
|
explicit ScalarMinOp(T s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(T a)
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return min(a, s);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <> struct ScalarMinOp<float>
|
|
|
|
{
|
|
|
|
float s;
|
|
|
|
|
|
|
|
explicit ScalarMinOp(float s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ float operator()(float a)
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return fmin(a, s);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <> struct ScalarMinOp<double>
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
double s;
|
|
|
|
|
|
|
|
explicit ScalarMinOp(double s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ double operator()(double a)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return fmin(a, s);
|
2010-12-20 17:51:25 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ScalarMaxOp
|
|
|
|
{
|
|
|
|
T s;
|
|
|
|
|
|
|
|
explicit ScalarMaxOp(T s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(T a)
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return max(a, s);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <> struct ScalarMaxOp<float>
|
|
|
|
{
|
|
|
|
float s;
|
|
|
|
|
|
|
|
explicit ScalarMaxOp(float s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ float operator()(float a)
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return fmax(a, s);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <> struct ScalarMaxOp<double>
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
|
|
|
double s;
|
|
|
|
|
|
|
|
explicit ScalarMaxOp(double s_) : s(s_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ double operator()(double a)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return fmax(a, s);
|
2010-12-20 17:51:25 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
MinOp op;
|
|
|
|
transform(src1, src2, dst, op, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
2011-02-14 23:50:17 +08:00
|
|
|
template void min_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream);
|
2010-12-20 17:51:25 +08:00
|
|
|
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
MaxOp op;
|
|
|
|
transform(src1, src2, dst, op, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
2011-02-14 23:50:17 +08:00
|
|
|
template void max_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream);
|
2010-12-20 17:51:25 +08:00
|
|
|
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
|
|
|
|
|
|
|
template <typename T>
|
2011-02-14 23:50:17 +08:00
|
|
|
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ScalarMinOp<T> op(src2);
|
2010-12-20 17:51:25 +08:00
|
|
|
transform(src1, dst, op, stream);
|
|
|
|
}
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template void min_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
|
|
|
template void min_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
2010-12-20 17:51:25 +08:00
|
|
|
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
|
|
|
|
|
|
|
template <typename T>
|
2011-02-14 23:50:17 +08:00
|
|
|
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
2010-12-20 17:51:25 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ScalarMaxOp<T> op(src2);
|
2010-12-20 17:51:25 +08:00
|
|
|
transform(src1, dst, op, stream);
|
|
|
|
}
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template void max_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
|
|
|
template void max_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
2010-12-20 17:51:25 +08:00
|
|
|
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
2011-01-24 18:11:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// threshold
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ThreshBinary
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& src) const
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return src > thresh ? maxVal : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
T thresh;
|
|
|
|
T maxVal;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ThreshBinaryInv
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& src) const
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return src > thresh ? 0 : maxVal;
|
2011-01-24 18:11:02 +08:00
|
|
|
}
|
2011-02-14 23:50:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
T thresh;
|
|
|
|
T maxVal;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ThreshTrunc
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& src) const
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return min(src, thresh);
|
2011-01-24 18:11:02 +08:00
|
|
|
}
|
2011-02-14 23:50:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
T thresh;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
2011-02-14 23:50:17 +08:00
|
|
|
template <> struct ThreshTrunc<float>
|
|
|
|
{
|
|
|
|
ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
|
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ float operator()(const float& src) const
|
2011-02-14 23:50:17 +08:00
|
|
|
{
|
|
|
|
return fmin(src, thresh);
|
|
|
|
}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
private:
|
|
|
|
float thresh;
|
|
|
|
};
|
|
|
|
template <> struct ThreshTrunc<double>
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ double operator()(const double& src) const
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return fmin(src, thresh);
|
2011-01-24 18:11:02 +08:00
|
|
|
}
|
2011-02-14 23:50:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
double thresh;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ThreshToZero
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
|
|
|
public:
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshToZero(T thresh_, T) : thresh(thresh_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& src) const
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return src > thresh ? src : 0;
|
2011-01-24 18:11:02 +08:00
|
|
|
}
|
2011-02-14 23:50:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
T thresh;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <typename T> struct ThreshToZeroInv
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
|
|
|
public:
|
2011-02-14 23:50:17 +08:00
|
|
|
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
|
2011-01-24 18:11:02 +08:00
|
|
|
|
2011-06-14 19:27:32 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& src) const
|
2011-01-24 18:11:02 +08:00
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
return src > thresh ? 0 : src;
|
2011-01-24 18:11:02 +08:00
|
|
|
}
|
2011-02-14 23:50:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
T thresh;
|
2011-01-24 18:11:02 +08:00
|
|
|
};
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template <template <typename> class Op, typename T>
|
|
|
|
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
2011-01-24 18:11:02 +08:00
|
|
|
cudaStream_t stream)
|
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
Op<T> op(thresh, maxVal);
|
2011-01-24 18:11:02 +08:00
|
|
|
transform(src, dst, op, stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
2011-02-14 23:50:17 +08:00
|
|
|
void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, T thresh, T maxVal, int type,
|
2011-01-24 18:11:02 +08:00
|
|
|
cudaStream_t stream)
|
|
|
|
{
|
2011-02-14 23:50:17 +08:00
|
|
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
2011-01-24 18:11:02 +08:00
|
|
|
cudaStream_t stream);
|
|
|
|
|
|
|
|
static const caller_t callers[] =
|
|
|
|
{
|
|
|
|
threshold_caller<ThreshBinary, T>,
|
|
|
|
threshold_caller<ThreshBinaryInv, T>,
|
|
|
|
threshold_caller<ThreshTrunc, T>,
|
|
|
|
threshold_caller<ThreshToZero, T>,
|
|
|
|
threshold_caller<ThreshToZeroInv, T>
|
|
|
|
};
|
|
|
|
|
|
|
|
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
|
|
|
|
}
|
|
|
|
|
2011-02-14 23:50:17 +08:00
|
|
|
template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, uchar thresh, uchar maxVal, int type, cudaStream_t stream);
|
|
|
|
template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, schar thresh, schar maxVal, int type, cudaStream_t stream);
|
|
|
|
template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, ushort thresh, ushort maxVal, int type, cudaStream_t stream);
|
|
|
|
template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, short thresh, short maxVal, int type, cudaStream_t stream);
|
|
|
|
template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, int thresh, int maxVal, int type, cudaStream_t stream);
|
2011-01-24 18:11:02 +08:00
|
|
|
template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
|
2011-02-14 23:50:17 +08:00
|
|
|
template void threshold_gpu<double>(const DevMem2D& src, const DevMem2D& dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
2011-06-30 22:39:48 +08:00
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// subtract
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
class SubtractOp
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
__device__ __forceinline__ T operator()(const T& l, const T& r) const
|
|
|
|
{
|
|
|
|
return l - r;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, SubtractOp<T>(), stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
2011-07-21 16:47:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// pow
|
|
|
|
|
|
|
|
template<typename T, bool Signed = device::numeric_limits_gpu<T>::is_signed>
|
|
|
|
struct PowOp
|
|
|
|
{
|
|
|
|
float power;
|
|
|
|
PowOp(float power_) : power(power_) {}
|
2011-07-22 21:24:27 +08:00
|
|
|
|
2011-07-21 16:47:44 +08:00
|
|
|
__device__ __forceinline__ T operator()(const T& e) const
|
|
|
|
{
|
2011-07-22 21:26:31 +08:00
|
|
|
return saturate_cast<T>(__powf((float)e, power));
|
2011-07-21 16:47:44 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
struct PowOp<T, true>
|
|
|
|
{
|
|
|
|
float power;
|
|
|
|
PowOp(float power_) : power(power_) {}
|
|
|
|
|
|
|
|
__device__ __forceinline__ float operator()(const T& e)
|
|
|
|
{
|
|
|
|
T res = saturate_cast<T>(__powf((float)e, power));
|
|
|
|
|
|
|
|
if ( (e < 0) && (1 & (int)power) )
|
|
|
|
res *= -1;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<>
|
|
|
|
struct PowOp<float>
|
|
|
|
{
|
|
|
|
float power;
|
|
|
|
PowOp(float power_) : power(power_) {}
|
|
|
|
|
|
|
|
__device__ __forceinline__ float operator()(const float& e)
|
|
|
|
{
|
|
|
|
return __powf(fabs(e), power);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
template void pow_caller<uchar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<schar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<uint>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
|
|
|
template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
2010-12-20 17:07:19 +08:00
|
|
|
}}}
|