mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #8406 from khnaba:dft-as-algorithm
This commit is contained in:
commit
e5dbd2c3a5
@ -239,6 +239,10 @@ enum DftFlags {
|
|||||||
into a real array and inverse transformation is executed, the function treats the input as a
|
into a real array and inverse transformation is executed, the function treats the input as a
|
||||||
packed complex-conjugate symmetrical array, and the output will also be a real array). */
|
packed complex-conjugate symmetrical array, and the output will also be a real array). */
|
||||||
DFT_REAL_OUTPUT = 32,
|
DFT_REAL_OUTPUT = 32,
|
||||||
|
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
|
||||||
|
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
|
||||||
|
already considered complex. */
|
||||||
|
DFT_COMPLEX_INPUT = 64,
|
||||||
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
|
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
|
||||||
DCT_INVERSE = DFT_INVERSE,
|
DCT_INVERSE = DFT_INVERSE,
|
||||||
/** performs a forward or inverse transform of every individual row of the input
|
/** performs a forward or inverse transform of every individual row of the input
|
||||||
|
@ -3342,6 +3342,9 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
|||||||
|
|
||||||
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
|
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
|
||||||
|
|
||||||
|
// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
|
||||||
|
CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
|
||||||
|
|
||||||
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
|
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
|
||||||
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
|
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
|
||||||
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
|
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
|
||||||
|
@ -788,6 +788,7 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr
|
|||||||
(obtained from dft_size ).
|
(obtained from dft_size ).
|
||||||
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
|
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
|
||||||
cases are always forward and inverse, respectively).
|
cases are always forward and inverse, respectively).
|
||||||
|
- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels.
|
||||||
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
|
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
|
||||||
real-complex transform, so the destination matrix must be real.
|
real-complex transform, so the destination matrix must be real.
|
||||||
@param stream Stream for the asynchronous version.
|
@param stream Stream for the asynchronous version.
|
||||||
@ -813,6 +814,35 @@ instead of the width.
|
|||||||
*/
|
*/
|
||||||
CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
|
CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
|
/** @brief Base class for DFT operator as a cv::Algorithm. :
|
||||||
|
*/
|
||||||
|
class CV_EXPORTS DFT : public Algorithm
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/** @brief Computes an FFT of a given image.
|
||||||
|
|
||||||
|
@param image Source image. Only CV_32FC1 images are supported for now.
|
||||||
|
@param result Result image.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
|
*/
|
||||||
|
virtual void compute(InputArray image, OutputArray result, Stream& stream = Stream::Null()) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Creates implementation for cuda::DFT.
|
||||||
|
|
||||||
|
@param dft_size The image size.
|
||||||
|
@param flags Optional flags:
|
||||||
|
- **DFT_ROWS** transforms each individual row of the source matrix.
|
||||||
|
- **DFT_SCALE** scales the result: divide it by the number of elements in the transform
|
||||||
|
(obtained from dft_size ).
|
||||||
|
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
|
||||||
|
cases are always forward and inverse, respectively).
|
||||||
|
- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels.
|
||||||
|
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
|
||||||
|
real-complex transform, so the destination matrix must be real.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS Ptr<DFT> createDFT(Size dft_size, int flags);
|
||||||
|
|
||||||
/** @brief Base class for convolution (or cross-correlation) operator. :
|
/** @brief Base class for convolution (or cross-correlation) operator. :
|
||||||
*/
|
*/
|
||||||
class CV_EXPORTS Convolution : public Algorithm
|
class CV_EXPORTS Convolution : public Algorithm
|
||||||
|
@ -286,111 +286,146 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
|
|||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// dft
|
// DFT function
|
||||||
|
|
||||||
void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
|
void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
|
||||||
|
{
|
||||||
|
if (getInputMat(_src, stream).channels() == 2)
|
||||||
|
flags |= DFT_COMPLEX_INPUT;
|
||||||
|
|
||||||
|
Ptr<DFT> dft = createDFT(dft_size, flags);
|
||||||
|
dft->compute(_src, _dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// DFT algorithm
|
||||||
|
|
||||||
|
#ifdef HAVE_CUFFT
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
class DFTImpl : public DFT
|
||||||
|
{
|
||||||
|
Size dft_size, dft_size_opt;
|
||||||
|
bool is_1d_input, is_row_dft, is_scaled_dft, is_inverse, is_complex_input, is_complex_output;
|
||||||
|
|
||||||
|
cufftType dft_type;
|
||||||
|
cufftHandle plan;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DFTImpl(Size dft_size, int flags)
|
||||||
|
: dft_size(dft_size),
|
||||||
|
dft_size_opt(dft_size),
|
||||||
|
is_1d_input((dft_size.height == 1) || (dft_size.width == 1)),
|
||||||
|
is_row_dft((flags & DFT_ROWS) != 0),
|
||||||
|
is_scaled_dft((flags & DFT_SCALE) != 0),
|
||||||
|
is_inverse((flags & DFT_INVERSE) != 0),
|
||||||
|
is_complex_input((flags & DFT_COMPLEX_INPUT) != 0),
|
||||||
|
is_complex_output(!(flags & DFT_REAL_OUTPUT)),
|
||||||
|
dft_type(!is_complex_input ? CUFFT_R2C : (is_complex_output ? CUFFT_C2C : CUFFT_C2R))
|
||||||
|
{
|
||||||
|
// We don't support unpacked output (in the case of real input)
|
||||||
|
CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
|
||||||
|
|
||||||
|
// We don't support real-to-real transform
|
||||||
|
CV_Assert( is_complex_input || is_complex_output );
|
||||||
|
|
||||||
|
if (is_1d_input && !is_row_dft)
|
||||||
|
{
|
||||||
|
// If the source matrix is single column handle it as single row
|
||||||
|
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
|
||||||
|
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_Assert( dft_size_opt.width > 1 );
|
||||||
|
|
||||||
|
if (is_1d_input || is_row_dft)
|
||||||
|
cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
|
||||||
|
else
|
||||||
|
cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
|
||||||
|
}
|
||||||
|
|
||||||
|
~DFTImpl()
|
||||||
|
{
|
||||||
|
cufftSafeCall( cufftDestroy(plan) );
|
||||||
|
}
|
||||||
|
|
||||||
|
void compute(InputArray _src, OutputArray _dst, Stream& stream)
|
||||||
|
{
|
||||||
|
GpuMat src = getInputMat(_src, stream);
|
||||||
|
|
||||||
|
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
|
||||||
|
CV_Assert( is_complex_input == (src.channels() == 2) );
|
||||||
|
|
||||||
|
// Make sure here we work with the continuous input,
|
||||||
|
// as CUFFT can't handle gaps
|
||||||
|
GpuMat src_cont;
|
||||||
|
if (src.isContinuous())
|
||||||
|
{
|
||||||
|
src_cont = src;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BufferPool pool(stream);
|
||||||
|
src_cont.allocator = pool.getAllocator();
|
||||||
|
createContinuous(src.rows, src.cols, src.type(), src_cont);
|
||||||
|
src.copyTo(src_cont, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
|
||||||
|
|
||||||
|
if (is_complex_input)
|
||||||
|
{
|
||||||
|
if (is_complex_output)
|
||||||
|
{
|
||||||
|
createContinuous(dft_size, CV_32FC2, _dst);
|
||||||
|
GpuMat dst = _dst.getGpuMat();
|
||||||
|
|
||||||
|
cufftSafeCall(cufftExecC2C(
|
||||||
|
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
|
||||||
|
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
createContinuous(dft_size, CV_32F, _dst);
|
||||||
|
GpuMat dst = _dst.getGpuMat();
|
||||||
|
|
||||||
|
cufftSafeCall(cufftExecC2R(
|
||||||
|
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We could swap dft_size for efficiency. Here we must reflect it
|
||||||
|
if (dft_size == dft_size_opt)
|
||||||
|
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
|
||||||
|
else
|
||||||
|
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
|
||||||
|
|
||||||
|
GpuMat dst = _dst.getGpuMat();
|
||||||
|
|
||||||
|
cufftSafeCall(cufftExecR2C(
|
||||||
|
plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_scaled_dft)
|
||||||
|
cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Ptr<DFT> cv::cuda::createDFT(Size dft_size, int flags)
|
||||||
{
|
{
|
||||||
#ifndef HAVE_CUFFT
|
#ifndef HAVE_CUFFT
|
||||||
(void) _src;
|
|
||||||
(void) _dst;
|
|
||||||
(void) dft_size;
|
(void) dft_size;
|
||||||
(void) flags;
|
(void) flags;
|
||||||
(void) stream;
|
CV_Error(Error::StsNotImplemented, "The library was build without CUFFT");
|
||||||
throw_no_cuda();
|
return Ptr<DFT>();
|
||||||
#else
|
#else
|
||||||
GpuMat src = getInputMat(_src, stream);
|
return makePtr<DFTImpl>(dft_size, flags);
|
||||||
|
|
||||||
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
|
|
||||||
|
|
||||||
// We don't support unpacked output (in the case of real input)
|
|
||||||
CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
|
|
||||||
|
|
||||||
const bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);
|
|
||||||
const bool is_row_dft = (flags & DFT_ROWS) != 0;
|
|
||||||
const bool is_scaled_dft = (flags & DFT_SCALE) != 0;
|
|
||||||
const bool is_inverse = (flags & DFT_INVERSE) != 0;
|
|
||||||
const bool is_complex_input = src.channels() == 2;
|
|
||||||
const bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
|
||||||
|
|
||||||
// We don't support real-to-real transform
|
|
||||||
CV_Assert( is_complex_input || is_complex_output );
|
|
||||||
|
|
||||||
// Make sure here we work with the continuous input,
|
|
||||||
// as CUFFT can't handle gaps
|
|
||||||
GpuMat src_cont;
|
|
||||||
if (src.isContinuous())
|
|
||||||
{
|
|
||||||
src_cont = src;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
BufferPool pool(stream);
|
|
||||||
src_cont.allocator = pool.getAllocator();
|
|
||||||
createContinuous(src.rows, src.cols, src.type(), src_cont);
|
|
||||||
src.copyTo(src_cont, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
Size dft_size_opt = dft_size;
|
|
||||||
if (is_1d_input && !is_row_dft)
|
|
||||||
{
|
|
||||||
// If the source matrix is single column handle it as single row
|
|
||||||
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
|
|
||||||
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_Assert( dft_size_opt.width > 1 );
|
|
||||||
|
|
||||||
cufftType dft_type = CUFFT_R2C;
|
|
||||||
if (is_complex_input)
|
|
||||||
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
|
|
||||||
|
|
||||||
cufftHandle plan;
|
|
||||||
if (is_1d_input || is_row_dft)
|
|
||||||
cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
|
|
||||||
else
|
|
||||||
cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
|
|
||||||
|
|
||||||
cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
|
|
||||||
|
|
||||||
if (is_complex_input)
|
|
||||||
{
|
|
||||||
if (is_complex_output)
|
|
||||||
{
|
|
||||||
createContinuous(dft_size, CV_32FC2, _dst);
|
|
||||||
GpuMat dst = _dst.getGpuMat();
|
|
||||||
|
|
||||||
cufftSafeCall(cufftExecC2C(
|
|
||||||
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
|
|
||||||
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
createContinuous(dft_size, CV_32F, _dst);
|
|
||||||
GpuMat dst = _dst.getGpuMat();
|
|
||||||
|
|
||||||
cufftSafeCall(cufftExecC2R(
|
|
||||||
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We could swap dft_size for efficiency. Here we must reflect it
|
|
||||||
if (dft_size == dft_size_opt)
|
|
||||||
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
|
|
||||||
else
|
|
||||||
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
|
|
||||||
|
|
||||||
GpuMat dst = _dst.getGpuMat();
|
|
||||||
|
|
||||||
cufftSafeCall(cufftExecR2C(
|
|
||||||
plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
|
|
||||||
}
|
|
||||||
|
|
||||||
cufftSafeCall( cufftDestroy(plan) );
|
|
||||||
|
|
||||||
if (is_scaled_dft)
|
|
||||||
cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -250,6 +250,33 @@ CUDA_TEST_P(Dft, C2C)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CUDA_TEST_P(Dft, Algorithm)
|
||||||
|
{
|
||||||
|
int cols = randomInt(2, 100);
|
||||||
|
int rows = randomInt(2, 100);
|
||||||
|
|
||||||
|
int flags = 0;
|
||||||
|
cv::Ptr<cv::cuda::DFT> dft = cv::cuda::createDFT(cv::Size(cols, rows), flags);
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; ++i)
|
||||||
|
{
|
||||||
|
SCOPED_TRACE("dft algorithm");
|
||||||
|
|
||||||
|
cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
|
||||||
|
|
||||||
|
cv::cuda::GpuMat d_b;
|
||||||
|
cv::cuda::GpuMat d_b_data;
|
||||||
|
dft->compute(loadMat(a), d_b);
|
||||||
|
|
||||||
|
cv::Mat b_gold;
|
||||||
|
cv::dft(a, b_gold, flags);
|
||||||
|
|
||||||
|
ASSERT_EQ(CV_32F, d_b.depth());
|
||||||
|
ASSERT_EQ(2, d_b.channels());
|
||||||
|
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
|
void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
|
||||||
|
Loading…
Reference in New Issue
Block a user