mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
[+] Added Brox optical flow (implementation courtesy of Michael Smirnov)
This commit is contained in:
parent
f838db92c7
commit
42c7aece36
1136
modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
Normal file
1136
modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
Normal file
File diff suppressed because it is too large
Load Diff
103
modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp
Normal file
103
modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp
Normal file
@ -0,0 +1,103 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// NVIDIA CUDA implementation of Brox et al Optical Flow algorithm
|
||||
//
|
||||
// Algorithm is explained in the original paper:
|
||||
// T. Brox, A. Bruhn, N. Papenberg, J. Weickert:
|
||||
// High accuracy optical flow estimation based on a theory for warping.
|
||||
// ECCV 2004.
|
||||
//
|
||||
// Implementation by Mikhail Smirnov
|
||||
// email: msmirnov@nvidia.com, devsupport@nvidia.com
|
||||
//
|
||||
// Credits for help with the code to:
|
||||
// Alexey Mendelenko, Anton Obukhov, and Alexander Kharlamov.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ncv_optical_flow_h_
|
||||
#define _ncv_optical_flow_h_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
/// \brief Model and solver parameters
|
||||
struct NCVBroxOpticalFlowDescriptor
|
||||
{
|
||||
/// flow smoothness
|
||||
Ncv32f alpha;
|
||||
/// gradient constancy importance
|
||||
Ncv32f gamma;
|
||||
/// pyramid scale factor
|
||||
Ncv32f scale_factor;
|
||||
/// number of lagged non-linearity iterations (inner loop)
|
||||
Ncv32u number_of_inner_iterations;
|
||||
/// number of warping iterations (number of pyramid levels)
|
||||
Ncv32u number_of_outer_iterations;
|
||||
/// number of linear system solver iterations
|
||||
Ncv32u number_of_solver_iterations;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// \brief Compute optical flow
|
||||
///
|
||||
/// Based on method by Brox et al [2004]
|
||||
/// \param [in] desc model and solver parameters
|
||||
/// \param [in] gpu_mem_allocator GPU memory allocator
|
||||
/// \param [in] frame0 source frame
|
||||
/// \param [in] frame1 frame to track
|
||||
/// \param [out] u flow horizontal component (along \b x axis)
|
||||
/// \param [out] v flow vertical component (along \b y axis)
|
||||
/// \return computation status
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
||||
INCVMemAllocator &gpu_mem_allocator,
|
||||
const NCVMatrix<Ncv32f> &frame0,
|
||||
const NCVMatrix<Ncv32f> &frame1,
|
||||
NCVMatrix<Ncv32f> &u,
|
||||
NCVMatrix<Ncv32f> &v,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
@ -1610,3 +1610,952 @@ NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
|
||||
{
|
||||
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);
|
||||
}
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Filter.cu
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
texture <float, 1, cudaReadModeElementType> texSrc;
|
||||
texture <float, 1, cudaReadModeElementType> texKernel;
|
||||
|
||||
|
||||
__forceinline__ __device__ float getValueMirrorRow(const int rowOffset,
|
||||
int i,
|
||||
int w)
|
||||
{
|
||||
if (i < 0) i = 1 - i;
|
||||
if (i >= w) i = w + w - i - 1;
|
||||
return tex1Dfetch (texSrc, rowOffset + i);
|
||||
}
|
||||
|
||||
|
||||
__forceinline__ __device__ float getValueMirrorColumn(const int offset,
|
||||
const int rowStep,
|
||||
int j,
|
||||
int h)
|
||||
{
|
||||
if (j < 0) j = 1 - j;
|
||||
if (j >= h) j = h + h - j - 1;
|
||||
return tex1Dfetch (texSrc, offset + j * rowStep);
|
||||
}
|
||||
|
||||
|
||||
__global__ void FilterRowBorderMirror_32f_C1R(Ncv32u srcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u dstStep,
|
||||
NcvRect32u roi,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier)
|
||||
{
|
||||
// position within ROI
|
||||
const int ix = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int iy = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (ix >= roi.width || iy >= roi.height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const int p = nKernelSize - nAnchor - 1;
|
||||
|
||||
const int j = roi.y + iy;
|
||||
|
||||
const int rowOffset = j * srcStep + roi.x;
|
||||
|
||||
float sum = 0.0f;
|
||||
for (int m = 0; m < nKernelSize; ++m)
|
||||
{
|
||||
sum += getValueMirrorRow (rowOffset, ix + m - p, roi.width)
|
||||
* tex1Dfetch (texKernel, m);
|
||||
}
|
||||
|
||||
pDst[iy * dstStep + ix] = sum * multiplier;
|
||||
}
|
||||
|
||||
|
||||
__global__ void FilterColumnBorderMirror_32f_C1R(Ncv32u srcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u dstStep,
|
||||
NcvRect32u roi,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier)
|
||||
{
|
||||
const int ix = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int iy = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (ix >= roi.width || iy >= roi.height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const int p = nKernelSize - nAnchor - 1;
|
||||
const int i = roi.x + ix;
|
||||
const int offset = i + roi.y * srcStep;
|
||||
|
||||
float sum = 0.0f;
|
||||
for (int m = 0; m < nKernelSize; ++m)
|
||||
{
|
||||
sum += getValueMirrorColumn (offset, srcStep, iy + m - p, roi.height)
|
||||
* tex1Dfetch (texKernel, m);
|
||||
}
|
||||
|
||||
pDst[ix + iy * dstStep] = sum * multiplier;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier)
|
||||
{
|
||||
ncvAssertReturn (pSrc != NULL &&
|
||||
pDst != NULL &&
|
||||
pKernel != NULL, NCV_NULL_PTR);
|
||||
|
||||
ncvAssertReturn (oROI.width > 0 && oROI.height > 0, NPPST_INVALID_ROI);
|
||||
|
||||
ncvAssertReturn (srcSize.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
dstSize.width * sizeof (Ncv32f) <= nDstStep &&
|
||||
oROI.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
oROI.width * sizeof (Ncv32f) <= nDstStep &&
|
||||
nSrcStep % sizeof (Ncv32f) == 0 &&
|
||||
nDstStep % sizeof (Ncv32f) == 0, NPPST_INVALID_STEP);
|
||||
|
||||
Ncv32u srcStep = nSrcStep / sizeof (Ncv32f);
|
||||
Ncv32u dstStep = nDstStep / sizeof (Ncv32f);
|
||||
|
||||
// adjust ROI size to be within source image
|
||||
if (oROI.x + oROI.width > srcSize.width)
|
||||
{
|
||||
oROI.width = srcSize.width - oROI.x;
|
||||
}
|
||||
|
||||
if (oROI.y + oROI.height > srcSize.height)
|
||||
{
|
||||
oROI.height = srcSize.height - oROI.y;
|
||||
}
|
||||
|
||||
cudaChannelFormatDesc floatChannel = cudaCreateChannelDesc <float> ();
|
||||
texSrc.normalized = false;
|
||||
texKernel.normalized = false;
|
||||
|
||||
cudaBindTexture (0, texSrc, pSrc, floatChannel, srcSize.height * nSrcStep);
|
||||
cudaBindTexture (0, texKernel, pKernel, floatChannel, nKernelSize * sizeof (Ncv32f));
|
||||
|
||||
dim3 ctaSize (32, 6);
|
||||
dim3 gridSize ((oROI.width + ctaSize.x - 1) / ctaSize.x,
|
||||
(oROI.height + ctaSize.y - 1) / ctaSize.y);
|
||||
|
||||
switch (borderType)
|
||||
{
|
||||
case nppStBorderNone:
|
||||
return NPPST_ERROR;
|
||||
case nppStBorderClamp:
|
||||
return NPPST_ERROR;
|
||||
case nppStBorderWrap:
|
||||
return NPPST_ERROR;
|
||||
case nppStBorderMirror:
|
||||
FilterRowBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
||||
break;
|
||||
default:
|
||||
return NPPST_ERROR;
|
||||
}
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier)
|
||||
{
|
||||
ncvAssertReturn (pSrc != NULL &&
|
||||
pDst != NULL &&
|
||||
pKernel != NULL, NCV_NULL_PTR);
|
||||
|
||||
ncvAssertReturn (oROI.width > 0 && oROI.height > 0, NPPST_INVALID_ROI);
|
||||
|
||||
ncvAssertReturn (srcSize.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
dstSize.width * sizeof (Ncv32f) <= nDstStep &&
|
||||
oROI.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
oROI.width * sizeof (Ncv32f) <= nDstStep &&
|
||||
nSrcStep % sizeof (Ncv32f) == 0 &&
|
||||
nDstStep % sizeof (Ncv32f) == 0, NPPST_INVALID_STEP);
|
||||
|
||||
Ncv32u srcStep = nSrcStep / sizeof (Ncv32f);
|
||||
Ncv32u dstStep = nDstStep / sizeof (Ncv32f);
|
||||
|
||||
// adjust ROI size to be within source image
|
||||
if (oROI.x + oROI.width > srcSize.width)
|
||||
{
|
||||
oROI.width = srcSize.width - oROI.x;
|
||||
}
|
||||
|
||||
if (oROI.y + oROI.height > srcSize.height)
|
||||
{
|
||||
oROI.height = srcSize.height - oROI.y;
|
||||
}
|
||||
|
||||
cudaChannelFormatDesc floatChannel = cudaCreateChannelDesc <float> ();
|
||||
texSrc.normalized = false;
|
||||
texKernel.normalized = false;
|
||||
|
||||
cudaBindTexture (0, texSrc, pSrc, floatChannel, srcSize.height * nSrcStep);
|
||||
cudaBindTexture (0, texKernel, pKernel, floatChannel, nKernelSize * sizeof (Ncv32f));
|
||||
|
||||
dim3 ctaSize (32, 6);
|
||||
dim3 gridSize ((oROI.width + ctaSize.x - 1) / ctaSize.x,
|
||||
(oROI.height + ctaSize.y - 1) / ctaSize.y);
|
||||
|
||||
switch (borderType)
|
||||
{
|
||||
case nppStBorderClamp:
|
||||
return NPPST_ERROR;
|
||||
case nppStBorderWrap:
|
||||
return NPPST_ERROR;
|
||||
case nppStBorderMirror:
|
||||
FilterColumnBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
||||
break;
|
||||
default:
|
||||
return NPPST_ERROR;
|
||||
}
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// FrameInterpolate.cu
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
inline Ncv32u iDivUp(Ncv32u num, Ncv32u denom)
|
||||
{
|
||||
return (num + denom - 1)/denom;
|
||||
}
|
||||
|
||||
|
||||
texture<float, 2, cudaReadModeElementType> tex_src1;
|
||||
texture<float, 2, cudaReadModeElementType> tex_src0;
|
||||
|
||||
|
||||
__global__ void BlendFramesKernel(const float *u, const float *v, // forward flow
|
||||
const float *ur, const float *vr, // backward flow
|
||||
const float *o0, const float *o1, // coverage masks
|
||||
int w, int h, int s,
|
||||
float theta, float *out)
|
||||
{
|
||||
const int ix = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
const int iy = threadIdx.y + blockDim.y * blockIdx.y;
|
||||
|
||||
const int pos = ix + s * iy;
|
||||
|
||||
if (ix >= w || iy >= h) return;
|
||||
|
||||
float _u = u[pos];
|
||||
float _v = v[pos];
|
||||
|
||||
float _ur = ur[pos];
|
||||
float _vr = vr[pos];
|
||||
|
||||
float x = (float)ix + 0.5f;
|
||||
float y = (float)iy + 0.5f;
|
||||
bool b0 = o0[pos] > 1e-4f;
|
||||
bool b1 = o1[pos] > 1e-4f;
|
||||
|
||||
if (b0 && b1)
|
||||
{
|
||||
// pixel is visible on both frames
|
||||
out[pos] = tex2D(tex_src0, x - _u * theta, y - _v * theta) * (1.0f - theta) +
|
||||
tex2D(tex_src1, x + _u * (1.0f - theta), y + _v * (1.0f - theta)) * theta;
|
||||
}
|
||||
else if (b0)
|
||||
{
|
||||
// visible on the first frame only
|
||||
out[pos] = tex2D(tex_src0, x - _u * theta, y - _v * theta);
|
||||
}
|
||||
else
|
||||
{
|
||||
// visible on the second frame only
|
||||
out[pos] = tex2D(tex_src1, x - _ur * (1.0f - theta), y - _vr * (1.0f - theta));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NCVStatus BlendFrames(const Ncv32f *src0,
|
||||
const Ncv32f *src1,
|
||||
const Ncv32f *ufi,
|
||||
const Ncv32f *vfi,
|
||||
const Ncv32f *ubi,
|
||||
const Ncv32f *vbi,
|
||||
const Ncv32f *o1,
|
||||
const Ncv32f *o2,
|
||||
Ncv32u width,
|
||||
Ncv32u height,
|
||||
Ncv32u stride,
|
||||
Ncv32f theta,
|
||||
Ncv32f *out)
|
||||
{
|
||||
tex_src1.addressMode[0] = cudaAddressModeClamp;
|
||||
tex_src1.addressMode[1] = cudaAddressModeClamp;
|
||||
tex_src1.filterMode = cudaFilterModeLinear;
|
||||
tex_src1.normalized = false;
|
||||
|
||||
tex_src0.addressMode[0] = cudaAddressModeClamp;
|
||||
tex_src0.addressMode[1] = cudaAddressModeClamp;
|
||||
tex_src0.filterMode = cudaFilterModeLinear;
|
||||
tex_src0.normalized = false;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc <float> ();
|
||||
const Ncv32u pitch = stride * sizeof (float);
|
||||
ncvAssertCUDAReturn (cudaBindTexture2D (0, tex_src1, src1, desc, width, height, pitch), NPPST_TEXTURE_BIND_ERROR);
|
||||
ncvAssertCUDAReturn (cudaBindTexture2D (0, tex_src0, src0, desc, width, height, pitch), NPPST_TEXTURE_BIND_ERROR);
|
||||
|
||||
dim3 threads (32, 4);
|
||||
dim3 blocks (iDivUp (width, threads.x), iDivUp (height, threads.y));
|
||||
|
||||
BlendFramesKernel<<<blocks, threads, 0, nppStGetActiveCUDAstream ()>>>
|
||||
(ufi, vfi, ubi, vbi, o1, o2, width, height, stride, theta, out);
|
||||
|
||||
ncvAssertCUDAReturn (cudaGetLastError (), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
|
||||
Ncv32u nStep,
|
||||
Ncv32u *hpSize)
|
||||
{
|
||||
NCVStatus status = NPPST_ERROR;
|
||||
status = nppiStVectorWarpGetBufferSize(srcSize, nStep, hpSize);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState)
|
||||
{
|
||||
// check state validity
|
||||
ncvAssertReturn (pState->pSrcFrame0 != 0 &&
|
||||
pState->pSrcFrame1 != 0 &&
|
||||
pState->pFU != 0 &&
|
||||
pState->pFV != 0 &&
|
||||
pState->pBU != 0 &&
|
||||
pState->pBV != 0 &&
|
||||
pState->pNewFrame != 0 &&
|
||||
pState->ppBuffers[0] != 0 &&
|
||||
pState->ppBuffers[1] != 0 &&
|
||||
pState->ppBuffers[2] != 0 &&
|
||||
pState->ppBuffers[3] != 0 &&
|
||||
pState->ppBuffers[4] != 0 &&
|
||||
pState->ppBuffers[5] != 0, NPPST_NULL_POINTER_ERROR);
|
||||
|
||||
ncvAssertReturn (pState->size.width > 0 &&
|
||||
pState->size.height > 0, NPPST_ERROR);
|
||||
|
||||
ncvAssertReturn (pState->nStep >= pState->size.width * sizeof (Ncv32f) &&
|
||||
pState->nStep > 0 &&
|
||||
pState->nStep % sizeof (Ncv32f) == 0,
|
||||
NPPST_INVALID_STEP);
|
||||
|
||||
// change notation
|
||||
Ncv32f *cov0 = pState->ppBuffers[0];
|
||||
Ncv32f *cov1 = pState->ppBuffers[1];
|
||||
Ncv32f *fwdU = pState->ppBuffers[2]; // forward u
|
||||
Ncv32f *fwdV = pState->ppBuffers[3]; // forward v
|
||||
Ncv32f *bwdU = pState->ppBuffers[4]; // backward u
|
||||
Ncv32f *bwdV = pState->ppBuffers[5]; // backward v
|
||||
// warp flow
|
||||
ncvAssertReturnNcvStat (
|
||||
nppiStVectorWarp_PSF2x2_32f_C1 (pState->pFU,
|
||||
pState->size,
|
||||
pState->nStep,
|
||||
pState->pFU,
|
||||
pState->pFV,
|
||||
pState->nStep,
|
||||
cov0,
|
||||
pState->pos,
|
||||
fwdU) );
|
||||
ncvAssertReturnNcvStat (
|
||||
nppiStVectorWarp_PSF2x2_32f_C1 (pState->pFV,
|
||||
pState->size,
|
||||
pState->nStep,
|
||||
pState->pFU,
|
||||
pState->pFV,
|
||||
pState->nStep,
|
||||
cov0,
|
||||
pState->pos,
|
||||
fwdV) );
|
||||
// warp backward flow
|
||||
ncvAssertReturnNcvStat (
|
||||
nppiStVectorWarp_PSF2x2_32f_C1 (pState->pBU,
|
||||
pState->size,
|
||||
pState->nStep,
|
||||
pState->pBU,
|
||||
pState->pBV,
|
||||
pState->nStep,
|
||||
cov1,
|
||||
1.0f - pState->pos,
|
||||
bwdU) );
|
||||
ncvAssertReturnNcvStat (
|
||||
nppiStVectorWarp_PSF2x2_32f_C1 (pState->pBV,
|
||||
pState->size,
|
||||
pState->nStep,
|
||||
pState->pBU,
|
||||
pState->pBV,
|
||||
pState->nStep,
|
||||
cov1,
|
||||
1.0f - pState->pos,
|
||||
bwdU) );
|
||||
// interpolate frame
|
||||
ncvAssertReturnNcvStat (
|
||||
BlendFrames (pState->pSrcFrame0,
|
||||
pState->pSrcFrame1,
|
||||
fwdU,
|
||||
fwdV,
|
||||
bwdU,
|
||||
bwdV,
|
||||
cov0,
|
||||
cov1,
|
||||
pState->size.width,
|
||||
pState->size.height,
|
||||
pState->nStep / sizeof (Ncv32f),
|
||||
pState->pos,
|
||||
pState->pNewFrame) );
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// VectorWarpFrame.cu
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
#if __CUDA_ARCH__ < 200
|
||||
|
||||
// FP32 atomic add
|
||||
static __forceinline__ __device__ float _atomicAdd(float *addr, float val)
|
||||
{
|
||||
float old = *addr, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = int_as_float(__iAtomicCAS((int*)addr,
|
||||
float_as_int(assumed),
|
||||
float_as_int(val+assumed)));
|
||||
} while( assumed!=old );
|
||||
|
||||
return old;
|
||||
}
|
||||
#else
|
||||
#define _atomicAdd atomicAdd
|
||||
#endif
|
||||
|
||||
|
||||
__global__ void ForwardWarpKernel_PSF2x2(const float *u,
|
||||
const float *v,
|
||||
const float *src,
|
||||
const int w,
|
||||
const int h,
|
||||
const int flow_stride,
|
||||
const int image_stride,
|
||||
const float time_scale,
|
||||
float *normalization_factor,
|
||||
float *dst)
|
||||
{
|
||||
int j = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
int i = threadIdx.y + blockDim.y * blockIdx.y;
|
||||
|
||||
if (i >= h || j >= w) return;
|
||||
|
||||
int flow_row_offset = i * flow_stride;
|
||||
int image_row_offset = i * image_stride;
|
||||
|
||||
//bottom left corner of a target pixel
|
||||
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
|
||||
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
|
||||
// pixel containing bottom left corner
|
||||
float px;
|
||||
float py;
|
||||
float dx = modff (cx, &px);
|
||||
float dy = modff (cy, &py);
|
||||
// target pixel integer coords
|
||||
int tx;
|
||||
int ty;
|
||||
tx = (int) px;
|
||||
ty = (int) py;
|
||||
float value = src[image_row_offset + j];
|
||||
float weight;
|
||||
// fill pixel containing bottom right corner
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = dx * dy;
|
||||
_atomicAdd (dst + ty * image_stride + tx, value * weight);
|
||||
_atomicAdd (normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing bottom left corner
|
||||
tx -= 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = (1.0f - dx) * dy;
|
||||
_atomicAdd (dst + ty * image_stride + tx, value * weight);
|
||||
_atomicAdd (normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing upper left corner
|
||||
ty -= 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = (1.0f - dx) * (1.0f - dy);
|
||||
_atomicAdd (dst + ty * image_stride + tx, value * weight);
|
||||
_atomicAdd (normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
|
||||
// fill pixel containing upper right corner
|
||||
tx += 1;
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
weight = dx * (1.0f - dy);
|
||||
_atomicAdd (dst + ty * image_stride + tx, value * weight);
|
||||
_atomicAdd (normalization_factor + ty * image_stride + tx, weight);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void ForwardWarpKernel_PSF1x1(const float *u,
|
||||
const float *v,
|
||||
const float *src,
|
||||
const int w,
|
||||
const int h,
|
||||
const int flow_stride,
|
||||
const int image_stride,
|
||||
const float time_scale,
|
||||
float *dst)
|
||||
{
|
||||
int j = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
int i = threadIdx.y + blockDim.y * blockIdx.y;
|
||||
|
||||
if (i >= h || j >= w) return;
|
||||
|
||||
int flow_row_offset = i * flow_stride;
|
||||
int image_row_offset = i * image_stride;
|
||||
|
||||
float u_ = u[flow_row_offset + j];
|
||||
float v_ = v[flow_row_offset + j];
|
||||
|
||||
//bottom left corner of target pixel
|
||||
float cx = u_ * time_scale + (float)j + 1.0f;
|
||||
float cy = v_ * time_scale + (float)i + 1.0f;
|
||||
// pixel containing bottom left corner
|
||||
int tx = __float2int_rn (cx);
|
||||
int ty = __float2int_rn (cy);
|
||||
|
||||
float value = src[image_row_offset + j];
|
||||
// fill pixel
|
||||
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
||||
{
|
||||
_atomicAdd (dst + ty * image_stride + tx, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void NormalizeKernel(const float *normalization_factor, int w, int h, int s, float *image)
|
||||
{
|
||||
int i = threadIdx.y + blockDim.y * blockIdx.y;
|
||||
int j = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
|
||||
if (i >= h || j >= w) return;
|
||||
|
||||
const int pos = i * s + j;
|
||||
|
||||
float scale = normalization_factor[pos];
|
||||
|
||||
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
|
||||
|
||||
image[pos] *= invScale;
|
||||
}
|
||||
|
||||
|
||||
__global__ void MemsetKernel(const float value, int w, int h, float *image)
|
||||
{
|
||||
int i = threadIdx.y + blockDim.y * blockIdx.y;
|
||||
int j = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
|
||||
if (i >= h || j >= w) return;
|
||||
|
||||
const int pos = i * w + j;
|
||||
|
||||
image[pos] = value;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStVectorWarpGetBufferSize (NcvSize32u srcSize, Ncv32u nSrcStep, Ncv32u *hpSize)
|
||||
{
|
||||
ncvAssertReturn (hpSize != NULL, NPPST_NULL_POINTER_ERROR);
|
||||
ncvAssertReturn (srcSize.width * sizeof (Ncv32f) <= nSrcStep,
|
||||
NPPST_INVALID_STEP);
|
||||
|
||||
*hpSize = nSrcStep * srcSize.height;
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
// does not require normalization
|
||||
NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst)
|
||||
{
|
||||
ncvAssertReturn (pSrc != NULL &&
|
||||
pU != NULL &&
|
||||
pV != NULL &&
|
||||
pDst != NULL, NPPST_NULL_POINTER_ERROR);
|
||||
|
||||
ncvAssertReturn (srcSize.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
srcSize.width * sizeof (Ncv32f) <= nVFStep,
|
||||
NPPST_INVALID_STEP);
|
||||
|
||||
Ncv32u srcStep = nSrcStep / sizeof (Ncv32f);
|
||||
Ncv32u vfStep = nVFStep / sizeof (Ncv32f);
|
||||
|
||||
dim3 ctaSize (32, 6);
|
||||
dim3 gridSize (iDivUp (srcSize.width, ctaSize.x), iDivUp (srcSize.height, ctaSize.y));
|
||||
|
||||
ForwardWarpKernel_PSF1x1 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pDst);
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f *pBuffer,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst)
|
||||
{
|
||||
ncvAssertReturn (pSrc != NULL &&
|
||||
pU != NULL &&
|
||||
pV != NULL &&
|
||||
pDst != NULL &&
|
||||
pBuffer != NULL, NPPST_NULL_POINTER_ERROR);
|
||||
|
||||
ncvAssertReturn (srcSize.width * sizeof (Ncv32f) <= nSrcStep &&
|
||||
srcSize.width * sizeof (Ncv32f) <= nVFStep, NPPST_INVALID_STEP);
|
||||
|
||||
Ncv32u srcStep = nSrcStep / sizeof (Ncv32f);
|
||||
Ncv32u vfStep = nVFStep / sizeof(Ncv32f);
|
||||
|
||||
dim3 ctaSize(32, 6);
|
||||
dim3 gridSize (iDivUp (srcSize.width, ctaSize.x), iDivUp (srcSize.height, ctaSize.y));
|
||||
|
||||
MemsetKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||
(0, srcSize.width, srcSize.height, pBuffer);
|
||||
|
||||
ForwardWarpKernel_PSF2x2 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pBuffer, pDst);
|
||||
|
||||
NormalizeKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||
(pBuffer, srcSize.width, srcSize.height, srcStep, pDst);
|
||||
|
||||
return NPPST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Resize.cu
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
texture <float, 2, cudaReadModeElementType> texSrc2D;
|
||||
|
||||
|
||||
__forceinline__
|
||||
__device__ float processLine(int spos,
|
||||
float xmin,
|
||||
float xmax,
|
||||
int ixmin,
|
||||
int ixmax,
|
||||
float fxmin,
|
||||
float cxmax)
|
||||
{
|
||||
// first element
|
||||
float wsum = 1.0f - xmin + fxmin;
|
||||
float sum = tex1Dfetch(texSrc, spos) * (1.0f - xmin + fxmin);
|
||||
spos++;
|
||||
for (int ix = ixmin + 1; ix < ixmax; ++ix)
|
||||
{
|
||||
sum += tex1Dfetch(texSrc, spos);
|
||||
spos++;
|
||||
wsum += 1.0f;
|
||||
}
|
||||
sum += tex1Dfetch(texSrc, spos) * (cxmax - xmax);
|
||||
wsum += cxmax - xmax;
|
||||
return sum / wsum;
|
||||
}
|
||||
|
||||
|
||||
__global__ void resizeSuperSample_32f(NcvSize32u srcSize,
|
||||
Ncv32u srcStep,
|
||||
NcvRect32u srcROI,
|
||||
Ncv32f *dst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u dstStep,
|
||||
NcvRect32u dstROI,
|
||||
Ncv32f scaleX,
|
||||
Ncv32f scaleY)
|
||||
{
|
||||
// position within dst ROI
|
||||
const int ix = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int iy = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ix >= dstROI.width || iy >= dstROI.height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
float rw = (float) srcROI.width;
|
||||
float rh = (float) srcROI.height;
|
||||
|
||||
// source position
|
||||
float x = scaleX * (float) ix;
|
||||
float y = scaleY * (float) iy;
|
||||
|
||||
// x sampling range
|
||||
float xBegin = fmax (x - scaleX, 0.0f);
|
||||
float xEnd = fmin (x + scaleX, rw - 1.0f);
|
||||
// y sampling range
|
||||
float yBegin = fmax (y - scaleY, 0.0f);
|
||||
float yEnd = fmin (y + scaleY, rh - 1.0f);
|
||||
// x range of source samples
|
||||
float floorXBegin = floorf (xBegin);
|
||||
float ceilXEnd = ceilf (xEnd);
|
||||
int iXBegin = srcROI.x + (int) floorXBegin;
|
||||
int iXEnd = srcROI.x + (int) ceilXEnd;
|
||||
// y range of source samples
|
||||
float floorYBegin = floorf (yBegin);
|
||||
float ceilYEnd = ceilf (yEnd);
|
||||
int iYBegin = srcROI.y + (int) floorYBegin;
|
||||
int iYEnd = srcROI.y + (int) ceilYEnd;
|
||||
|
||||
// first row
|
||||
int pos = iYBegin * srcStep + iXBegin;
|
||||
|
||||
float wsum = 1.0f - yBegin + floorYBegin;
|
||||
|
||||
float sum = processLine (pos, xBegin, xEnd, iXBegin, iXEnd, floorXBegin,
|
||||
ceilXEnd) * (1.0f - yBegin + floorYBegin);
|
||||
pos += srcStep;
|
||||
for (int iy = iYBegin + 1; iy < iYEnd; ++iy)
|
||||
{
|
||||
sum += processLine (pos, xBegin, xEnd, iXBegin, iXEnd, floorXBegin,
|
||||
ceilXEnd);
|
||||
pos += srcStep;
|
||||
wsum += 1.0f;
|
||||
}
|
||||
|
||||
sum += processLine (pos, xBegin, xEnd, iXBegin, iXEnd, floorXBegin,
|
||||
ceilXEnd) * (ceilYEnd - yEnd);
|
||||
wsum += ceilYEnd - yEnd;
|
||||
sum /= wsum;
|
||||
|
||||
dst[(ix + dstROI.x) + (iy + dstROI.y) * dstStep] = sum;
|
||||
}
|
||||
|
||||
|
||||
// bicubic interpolation
|
||||
__forceinline__
|
||||
__device__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void resizeBicubic(NcvSize32u srcSize,
|
||||
NcvRect32u srcROI,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u dstStep,
|
||||
Ncv32f *dst,
|
||||
NcvRect32u dstROI,
|
||||
Ncv32f scaleX,
|
||||
Ncv32f scaleY)
|
||||
{
|
||||
const int ix = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int iy = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ix >= dstROI.width || iy >= dstROI.height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float dx = 1.0f / srcROI.width;
|
||||
const float dy = 1.0f / srcROI.height;
|
||||
|
||||
float rx = (float) srcROI.x;
|
||||
float ry = (float) srcROI.y;
|
||||
|
||||
float rw = (float) srcROI.width;
|
||||
float rh = (float) srcROI.height;
|
||||
|
||||
float x = scaleX * (float) ix;
|
||||
float y = scaleY * (float) iy;
|
||||
|
||||
// sampling range
|
||||
// border mode is clamp
|
||||
float xmin = fmax (ceilf (x - 2.0f), 0.0f);
|
||||
float xmax = fmin (floorf (x + 2.0f), rw - 1.0f);
|
||||
|
||||
float ymin = fmax (ceilf (y - 2.0f), 0.0f);
|
||||
float ymax = fmin (floorf (y + 2.0f), rh - 1.0f);
|
||||
|
||||
// shift data window to match ROI
|
||||
rx += 0.5f;
|
||||
ry += 0.5f;
|
||||
|
||||
x += rx;
|
||||
y += ry;
|
||||
|
||||
xmin += rx;
|
||||
xmax += rx;
|
||||
ymin += ry;
|
||||
ymax += ry;
|
||||
|
||||
float sum = 0.0f;
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||
{
|
||||
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||
{
|
||||
float xDist = x - cx;
|
||||
float yDist = y - cy;
|
||||
float wx = bicubicCoeff (xDist);
|
||||
float wy = bicubicCoeff (yDist);
|
||||
wx *= wy;
|
||||
sum += wx * tex2D (texSrc2D, cx * dx, cy * dy);
|
||||
wsum += wx;
|
||||
}
|
||||
}
|
||||
dst[(ix + dstROI.x)+ (iy + dstROI.y) * dstStep] = sum / wsum;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
NcvRect32u srcROI,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u dstROI,
|
||||
Ncv32f xFactor,
|
||||
Ncv32f yFactor,
|
||||
NppStInterpMode interpolation)
|
||||
{
|
||||
NCVStatus status = NPPST_SUCCESS;
|
||||
|
||||
ncvAssertReturn (pSrc != NULL && pDst != NULL, NPPST_NULL_POINTER_ERROR);
|
||||
ncvAssertReturn (xFactor != 0.0 && yFactor != 0.0, NPPST_INVALID_SCALE);
|
||||
|
||||
ncvAssertReturn (nSrcStep >= sizeof (Ncv32f) * (Ncv32u) srcSize.width &&
|
||||
nDstStep >= sizeof (Ncv32f) * (Ncv32f) dstSize.width,
|
||||
NPPST_INVALID_STEP);
|
||||
|
||||
Ncv32u srcStep = nSrcStep / sizeof (Ncv32f);
|
||||
Ncv32u dstStep = nDstStep / sizeof (Ncv32f);
|
||||
|
||||
// TODO: preprocess ROI to prevent out of bounds access
|
||||
|
||||
if (interpolation == nppStSupersample)
|
||||
{
|
||||
// bind texture
|
||||
cudaBindTexture (0, texSrc, pSrc, srcSize.height * nSrcStep);
|
||||
// invoke kernel
|
||||
dim3 ctaSize (32, 6);
|
||||
dim3 gridSize ((dstROI.width + ctaSize.x - 1) / ctaSize.x,
|
||||
(dstROI.height + ctaSize.y - 1) / ctaSize.y);
|
||||
|
||||
resizeSuperSample_32f <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||
(srcSize, srcStep, srcROI, pDst, dstSize, dstStep, dstROI, 1.0f / xFactor, 1.0f / yFactor);
|
||||
}
|
||||
else if (interpolation == nppStBicubic)
|
||||
{
|
||||
texSrc2D.addressMode[0] = cudaAddressModeMirror;
|
||||
texSrc2D.addressMode[1] = cudaAddressModeMirror;
|
||||
texSrc2D.normalized = true;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc <float> ();
|
||||
|
||||
cudaBindTexture2D (0, texSrc2D, pSrc, desc, srcSize.width, srcSize.height,
|
||||
nSrcStep);
|
||||
|
||||
dim3 ctaSize (32, 6);
|
||||
dim3 gridSize ((dstSize.width + ctaSize.x - 1) / ctaSize.x,
|
||||
(dstSize.height + ctaSize.y - 1) / ctaSize.y);
|
||||
|
||||
resizeBicubic <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||
(srcSize, srcROI, dstSize, dstStep, pDst, dstROI, 1.0f / xFactor, 1.0f / yFactor);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = NPPST_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -84,6 +84,255 @@ cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
||||
*/
|
||||
|
||||
|
||||
/** Border type
|
||||
*
|
||||
* Filtering operations assume that each pixel has a neighborhood of pixels.
|
||||
* The following structure describes possible ways to define non-existent pixels.
|
||||
*/
|
||||
enum NppStBorderType
|
||||
{
|
||||
nppStBorderNone = 0, ///< There is no need to define additional pixels, image is extended already
|
||||
nppStBorderClamp = 1, ///< Clamp out of range position to borders
|
||||
nppStBorderWrap = 2, ///< Wrap out of range position. Image becomes periodic.
|
||||
nppStBorderMirror = 3 ///< reflect out of range position across borders
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Filter types for image resizing
|
||||
*/
|
||||
enum NppStInterpMode
|
||||
{
|
||||
nppStSupersample, ///< Supersampling. For downscaling only
|
||||
nppStBicubic ///< Bicubic convolution filter, a = -0.5 (cubic Hermite spline)
|
||||
};
|
||||
|
||||
|
||||
/** Frame interpolation state
|
||||
*
|
||||
* This structure holds parameters required for frame interpolation.
|
||||
* Forward displacement field is a per-pixel mapping from frame 0 to frame 1.
|
||||
* Backward displacement field is a per-pixel mapping from frame 1 to frame 0.
|
||||
*/
|
||||
|
||||
struct NppStInterpolationState
|
||||
{
|
||||
NcvSize32u size; ///< frame size
|
||||
Ncv32u nStep; ///< pitch
|
||||
Ncv32f pos; ///< new frame position
|
||||
Ncv32f *pSrcFrame0; ///< frame 0
|
||||
Ncv32f *pSrcFrame1; ///< frame 1
|
||||
Ncv32f *pFU; ///< forward horizontal displacement
|
||||
Ncv32f *pFV; ///< forward vertical displacement
|
||||
Ncv32f *pBU; ///< backward horizontal displacement
|
||||
Ncv32f *pBV; ///< backward vertical displacement
|
||||
Ncv32f *pNewFrame; ///< new frame
|
||||
Ncv32f *ppBuffers[6]; ///< temporary buffers
|
||||
};
|
||||
|
||||
|
||||
/** Size of a buffer required for interpolation.
|
||||
*
|
||||
* Requires several such buffers. See \see NppStInterpolationState.
|
||||
*
|
||||
* \param srcSize [IN] Frame size (both frames must be of the same size)
|
||||
* \param nStep [IN] Frame line step
|
||||
* \param hpSize [OUT] Where to store computed size (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
|
||||
Ncv32u nStep,
|
||||
Ncv32u *hpSize);
|
||||
|
||||
|
||||
/** Interpolate frames (images) using provided optical flow (displacement field).
|
||||
* 32-bit floating point images, single channel
|
||||
*
|
||||
* \param pState [IN] structure containing all required parameters (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
|
||||
|
||||
|
||||
/** Row linear filter. 32-bit floating point image, single channel
|
||||
*
|
||||
* Apply horizontal linear filter
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [OUT] Destination image size
|
||||
* \param oROI [IN] Region of interest in the source image
|
||||
* \param borderType [IN] Type of border
|
||||
* \param pKernel [IN] Pointer to row kernel values (CUDA device memory)
|
||||
* \param nKernelSize [IN] Size of the kernel in pixels
|
||||
* \param nAnchor [IN] The kernel row alignment with respect to the position of the input pixel
|
||||
* \param multiplier [IN] Value by which the computed result is multiplied
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier);
|
||||
|
||||
|
||||
/** Column linear filter. 32-bit floating point image, single channel
|
||||
*
|
||||
* Apply vertical linear filter
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [OUT] Destination image size
|
||||
* \param oROI [IN] Region of interest in the source image
|
||||
* \param borderType [IN] Type of border
|
||||
* \param pKernel [IN] Pointer to column kernel values (CUDA device memory)
|
||||
* \param nKernelSize [IN] Size of the kernel in pixels
|
||||
* \param nAnchor [IN] The kernel column alignment with respect to the position of the input pixel
|
||||
* \param multiplier [IN] Value by which the computed result is multiplied
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier);
|
||||
|
||||
|
||||
/** Size of buffer required for vector image warping.
|
||||
*
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nStep [IN] Source image line step
|
||||
* \param hpSize [OUT] Where to store computed size (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarpGetBufferSize(NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32u *hpSize);
|
||||
|
||||
|
||||
/** Warp image using provided 2D vector field and 1x1 point spread function.
|
||||
* 32-bit floating point image, single channel
|
||||
*
|
||||
* During warping pixels from the source image may fall between pixels of the destination image.
|
||||
* PSF (point spread function) describes how the source image pixel affects pixels of the destination.
|
||||
* For 1x1 PSF only single pixel with the largest intersection is affected (similar to nearest interpolation).
|
||||
*
|
||||
* Destination image size and line step must be the same as the source image size and line step
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pU [IN] Pointer to horizontal displacement field (CUDA device memory)
|
||||
* \param pV [IN] Pointer to vertical displacement field (CUDA device memory)
|
||||
* \param nVFStep [IN] Displacement field line step
|
||||
* \param timeScale [IN] Value by which displacement field will be scaled for warping
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst);
|
||||
|
||||
|
||||
/** Warp image using provided 2D vector field and 2x2 point spread function.
|
||||
* 32-bit floating point image, single channel
|
||||
*
|
||||
* During warping pixels from the source image may fall between pixels of the destination image.
|
||||
* PSF (point spread function) describes how the source image pixel affects pixels of the destination.
|
||||
* For 2x2 PSF all four intersected pixels will be affected.
|
||||
*
|
||||
* Destination image size and line step must be the same as the source image size and line step
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pU [IN] Pointer to horizontal displacement field (CUDA device memory)
|
||||
* \param pV [IN] Pointer to vertical displacement field (CUDA device memory)
|
||||
* \param nVFStep [IN] Displacement field line step
|
||||
* \param timeScale [IN] Value by which displacement field will be scaled for warping
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f *pBuffer,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst);
|
||||
|
||||
|
||||
/** Resize. 32-bit floating point image, single channel
|
||||
*
|
||||
* Resizes image using specified filter (interpolation type)
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param srcROI [IN] Source image region of interest
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [IN] Destination image size
|
||||
* \param nDstStep [IN] Destination image line step
|
||||
* \param dstROI [IN] Destination image region of interest
|
||||
* \param xFactor [IN] Row scale factor
|
||||
* \param yFactor [IN] Column scale factor
|
||||
* \param interpolation [IN] Interpolation type
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
NcvRect32u srcROI,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u dstROI,
|
||||
Ncv32f xFactor,
|
||||
Ncv32f yFactor,
|
||||
NppStInterpMode interpolation);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
|
||||
*
|
||||
|
639
samples/gpu/opticalflow_nvidia_api.cpp
Normal file
639
samples/gpu/opticalflow_nvidia_api.cpp
Normal file
@ -0,0 +1,639 @@
|
||||
#if _MSC_VER >= 1400
|
||||
#pragma warning( disable : 4201 4408 4127 4100)
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <exception>
|
||||
#include <ctime>
|
||||
|
||||
#include "cvconfig.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "opencv2/opencv.hpp"
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "NPP_staging/NPP_staging.hpp"
|
||||
#include "NCVBroxOpticalFlow.hpp"
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CUDA)
|
||||
int main( int argc, const char** argv )
|
||||
{
|
||||
cout << "Please compile the library with CUDA support" << endl;
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
|
||||
using std::tr1::shared_ptr;
|
||||
|
||||
#define PARAM_INPUT "--input"
|
||||
#define PARAM_SCALE "--scale"
|
||||
#define PARAM_ALPHA "--alpha"
|
||||
#define PARAM_GAMMA "--gamma"
|
||||
#define PARAM_INNER "--inner"
|
||||
#define PARAM_OUTER "--outer"
|
||||
#define PARAM_SOLVER "--solver"
|
||||
#define PARAM_TIME_STEP "--time-step"
|
||||
#define PARAM_HELP "--help"
|
||||
|
||||
shared_ptr<INCVMemAllocator> g_pGPUMemAllocator;
|
||||
shared_ptr<INCVMemAllocator> g_pHostMemAllocator;
|
||||
|
||||
class RgbToMonochrome
|
||||
{
|
||||
public:
|
||||
float operator ()(unsigned char b, unsigned char g, unsigned char r)
|
||||
{
|
||||
float _r = static_cast<float>(r)/255.0f;
|
||||
float _g = static_cast<float>(g)/255.0f;
|
||||
float _b = static_cast<float>(b)/255.0f;
|
||||
return (_r + _g + _b)/3.0f;
|
||||
}
|
||||
};
|
||||
|
||||
class RgbToR
|
||||
{
|
||||
public:
|
||||
float operator ()(unsigned char b, unsigned char g, unsigned char r)
|
||||
{
|
||||
return static_cast<float>(r)/255.0f;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class RgbToG
|
||||
{
|
||||
public:
|
||||
float operator ()(unsigned char b, unsigned char g, unsigned char r)
|
||||
{
|
||||
return static_cast<float>(g)/255.0f;
|
||||
}
|
||||
};
|
||||
|
||||
class RgbToB
|
||||
{
|
||||
public:
|
||||
float operator ()(unsigned char b, unsigned char g, unsigned char r)
|
||||
{
|
||||
return static_cast<float>(b)/255.0f;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
NCVStatus CopyData(IplImage *image, shared_ptr<NCVMatrixAlloc<Ncv32f>> &dst)
|
||||
{
|
||||
dst = shared_ptr<NCVMatrixAlloc<Ncv32f>> (new NCVMatrixAlloc<Ncv32f> (*g_pHostMemAllocator, image->width, image->height));
|
||||
ncvAssertReturn (dst->isMemAllocated (), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
unsigned char *row = reinterpret_cast<unsigned char*> (image->imageData);
|
||||
T convert;
|
||||
for (int i = 0; i < image->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < image->width; ++j)
|
||||
{
|
||||
if (image->nChannels < 3)
|
||||
{
|
||||
dst->ptr ()[j + i*dst->stride ()] = static_cast<float> (*(row + j*image->nChannels))/255.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char *color = row + j * image->nChannels;
|
||||
dst->ptr ()[j +i*dst->stride ()] = convert (color[0], color[1], color[2]);
|
||||
}
|
||||
}
|
||||
row += image->widthStep;
|
||||
}
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
NCVStatus CopyData(const IplImage *image, const NCVMatrixAlloc<Ncv32f> &dst)
|
||||
{
|
||||
unsigned char *row = reinterpret_cast<unsigned char*> (image->imageData);
|
||||
T convert;
|
||||
for (int i = 0; i < image->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < image->width; ++j)
|
||||
{
|
||||
if (image->nChannels < 3)
|
||||
{
|
||||
dst.ptr ()[j + i*dst.stride ()] = static_cast<float>(*(row + j*image->nChannels))/255.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char *color = row + j * image->nChannels;
|
||||
dst.ptr ()[j +i*dst.stride()] = convert (color[0], color[1], color[2]);
|
||||
}
|
||||
}
|
||||
row += image->widthStep;
|
||||
}
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
NCVStatus LoadImages (const char *frame0Name,
|
||||
const char *frame1Name,
|
||||
int &width,
|
||||
int &height,
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> &src,
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> &dst,
|
||||
IplImage *&firstFrame,
|
||||
IplImage *&lastFrame)
|
||||
{
|
||||
IplImage *image;
|
||||
image = cvLoadImage (frame0Name);
|
||||
if (image == 0)
|
||||
{
|
||||
std::cout << "Could not open '" << frame0Name << "'\n";
|
||||
return NCV_FILE_ERROR;
|
||||
}
|
||||
|
||||
firstFrame = image;
|
||||
// copy data to src
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToMonochrome> (image, src));
|
||||
|
||||
IplImage *image2;
|
||||
image2 = cvLoadImage (frame1Name);
|
||||
if (image2 == 0)
|
||||
{
|
||||
std::cout << "Could not open '" << frame1Name << "'\n";
|
||||
return NCV_FILE_ERROR;
|
||||
}
|
||||
lastFrame = image2;
|
||||
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToMonochrome> (image2, dst));
|
||||
|
||||
width = image->width;
|
||||
height = image->height;
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T Clamp (T x, T a, T b)
|
||||
{
|
||||
return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T MapValue (T x, T a, T b, T c, T d)
|
||||
{
|
||||
x = Clamp (x, a, b);
|
||||
return c + (d - c) * (x - a) / (b - a);
|
||||
}
|
||||
|
||||
NCVStatus ShowFlow (NCVMatrixAlloc<Ncv32f> &u, NCVMatrixAlloc<Ncv32f> &v, const char *name)
|
||||
{
|
||||
IplImage *flowField;
|
||||
|
||||
NCVMatrixAlloc<Ncv32f> host_u(*g_pHostMemAllocator, u.width(), u.height());
|
||||
ncvAssertReturn(host_u.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
NCVMatrixAlloc<Ncv32f> host_v (*g_pHostMemAllocator, u.width (), u.height ());
|
||||
ncvAssertReturn (host_v.isMemAllocated (), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
ncvAssertReturnNcvStat (u.copySolid (host_u, 0));
|
||||
ncvAssertReturnNcvStat (v.copySolid (host_v, 0));
|
||||
|
||||
float *ptr_u = host_u.ptr ();
|
||||
float *ptr_v = host_v.ptr ();
|
||||
|
||||
float maxDisplacement = 1.0f;
|
||||
|
||||
for (Ncv32u i = 0; i < u.height (); ++i)
|
||||
{
|
||||
for (Ncv32u j = 0; j < u.width (); ++j)
|
||||
{
|
||||
float d = std::max ( fabsf(*ptr_u), fabsf(*ptr_v) );
|
||||
if (d > maxDisplacement) maxDisplacement = d;
|
||||
++ptr_u;
|
||||
++ptr_v;
|
||||
}
|
||||
ptr_u += u.stride () - u.width ();
|
||||
ptr_v += v.stride () - v.width ();
|
||||
}
|
||||
|
||||
CvSize image_size = cvSize (u.width (), u.height ());
|
||||
flowField = cvCreateImage (image_size, IPL_DEPTH_8U, 4);
|
||||
if (flowField == 0) return NCV_NULL_PTR;
|
||||
|
||||
unsigned char *row = reinterpret_cast<unsigned char *> (flowField->imageData);
|
||||
|
||||
ptr_u = host_u.ptr();
|
||||
ptr_v = host_v.ptr();
|
||||
for (int i = 0; i < flowField->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < flowField->width; ++j)
|
||||
{
|
||||
(row + j * flowField->nChannels)[0] = 0;
|
||||
(row + j * flowField->nChannels)[1] = static_cast<unsigned char> (MapValue (-(*ptr_v), -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
|
||||
(row + j * flowField->nChannels)[2] = static_cast<unsigned char> (MapValue (*ptr_u , -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
|
||||
(row + j * flowField->nChannels)[3] = 255;
|
||||
++ptr_u;
|
||||
++ptr_v;
|
||||
}
|
||||
row += flowField->widthStep;
|
||||
ptr_u += u.stride () - u.width ();
|
||||
ptr_v += v.stride () - v.width ();
|
||||
}
|
||||
|
||||
cvShowImage (name, flowField);
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
IplImage *CreateImage (NCVMatrixAlloc<Ncv32f> &h_r, NCVMatrixAlloc<Ncv32f> &h_g, NCVMatrixAlloc<Ncv32f> &h_b)
|
||||
{
|
||||
CvSize imageSize = cvSize (h_r.width (), h_r.height ());
|
||||
IplImage *image = cvCreateImage (imageSize, IPL_DEPTH_8U, 4);
|
||||
if (image == 0) return 0;
|
||||
|
||||
unsigned char *row = reinterpret_cast<unsigned char*> (image->imageData);
|
||||
|
||||
for (int i = 0; i < image->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < image->width; ++j)
|
||||
{
|
||||
int offset = j * image->nChannels;
|
||||
int pos = i * h_r.stride () + j;
|
||||
row[offset + 0] = static_cast<unsigned char> (h_b.ptr ()[pos] * 255.0f);
|
||||
row[offset + 1] = static_cast<unsigned char> (h_g.ptr ()[pos] * 255.0f);
|
||||
row[offset + 2] = static_cast<unsigned char> (h_r.ptr ()[pos] * 255.0f);
|
||||
row[offset + 3] = 255;
|
||||
}
|
||||
row += image->widthStep;
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
void PrintHelp ()
|
||||
{
|
||||
std::cout << "Usage help:\n";
|
||||
std::cout << std::setiosflags(std::ios::left);
|
||||
std::cout << "\t" << std::setw(15) << PARAM_ALPHA << " - set alpha\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_GAMMA << " - set gamma\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_INNER << " - set number of inner iterations\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_INPUT << " - specify input file names (2 image files)\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_OUTER << " - set number of outer iterations\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_SCALE << " - set pyramid scale factor\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_SOLVER << " - set number of basic solver iterations\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_TIME_STEP << " - set frame interpolation time step\n";
|
||||
std::cout << "\t" << std::setw(15) << PARAM_HELP << " - display this help message\n";
|
||||
}
|
||||
|
||||
int ProcessCommandLine(int argc, char **argv,
|
||||
Ncv32f &timeStep,
|
||||
char *&frame0Name,
|
||||
char *&frame1Name,
|
||||
NCVBroxOpticalFlowDescriptor &desc)
|
||||
{
|
||||
timeStep = 0.25f;
|
||||
for (int iarg = 1; iarg < argc; ++iarg)
|
||||
{
|
||||
if (strcmp(argv[iarg], PARAM_INPUT) == 0)
|
||||
{
|
||||
if (iarg + 2 < argc)
|
||||
{
|
||||
frame0Name = argv[++iarg];
|
||||
frame1Name = argv[++iarg];
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_SCALE) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.scale_factor = static_cast<Ncv32f>(atof(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_ALPHA) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.alpha = static_cast<Ncv32f>(atof(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_GAMMA) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.gamma = static_cast<Ncv32f>(atof(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_INNER) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.number_of_inner_iterations = static_cast<Ncv32u>(atoi(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_OUTER) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.number_of_outer_iterations = static_cast<Ncv32u>(atoi(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_SOLVER) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
desc.number_of_solver_iterations = static_cast<Ncv32u>(atoi(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_TIME_STEP) == 0)
|
||||
{
|
||||
if (iarg + 1 < argc)
|
||||
timeStep = static_cast<Ncv32f>(atof(argv[++iarg]));
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if(strcmp(argv[iarg], PARAM_HELP) == 0)
|
||||
{
|
||||
PrintHelp ();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *frame0Name = 0, *frame1Name = 0;
|
||||
Ncv32f timeStep = 0.01f;
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
|
||||
desc.alpha = 0.197f;
|
||||
desc.gamma = 50.0f;
|
||||
desc.number_of_inner_iterations = 10;
|
||||
desc.number_of_outer_iterations = 77;
|
||||
desc.number_of_solver_iterations = 10;
|
||||
desc.scale_factor = 0.8f;
|
||||
|
||||
int result = ProcessCommandLine (argc, argv, timeStep, frame0Name, frame1Name, desc);
|
||||
if (argc == 1 || result)
|
||||
{
|
||||
PrintHelp();
|
||||
return result;
|
||||
}
|
||||
|
||||
std::cout << "OpenCV / NVIDIA Computer Vision\n";
|
||||
std::cout << "Optical Flow Demo: Frame Interpolation\n";
|
||||
std::cout << "=========================================\n";
|
||||
std::cout << "Press:\n ESC to quit\n 'a' to move to the previous frame\n 's' to move to the next frame\n";
|
||||
|
||||
int devId;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
|
||||
cudaDeviceProp devProp;
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
|
||||
std::cout << "Using GPU: " << devId << "(" << devProp.name <<
|
||||
"), arch=" << devProp.major << "." << devProp.minor << std::endl;
|
||||
|
||||
g_pGPUMemAllocator = shared_ptr<INCVMemAllocator> (new NCVMemNativeAllocator (NCVMemoryTypeDevice, devProp.textureAlignment));
|
||||
ncvAssertPrintReturn (g_pGPUMemAllocator->isInitialized (), "Device memory allocator isn't initialized", -1);
|
||||
|
||||
g_pHostMemAllocator = shared_ptr<INCVMemAllocator> (new NCVMemNativeAllocator (NCVMemoryTypeHostPageable, devProp.textureAlignment));
|
||||
ncvAssertPrintReturn (g_pHostMemAllocator->isInitialized (), "Host memory allocator isn't initialized", -1);
|
||||
|
||||
int width, height;
|
||||
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> src_host;
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> dst_host;
|
||||
|
||||
IplImage *firstFrame, *lastFrame;
|
||||
if (frame0Name != 0 && frame1Name != 0)
|
||||
{
|
||||
ncvAssertReturnNcvStat (LoadImages (frame0Name, frame1Name, width, height, src_host, dst_host, firstFrame, lastFrame));
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertReturnNcvStat (LoadImages ("frame10.bmp", "frame11.bmp", width, height, src_host, dst_host, firstFrame, lastFrame));
|
||||
}
|
||||
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> src (new NCVMatrixAlloc<Ncv32f> (*g_pGPUMemAllocator, src_host->width (), src_host->height ()));
|
||||
ncvAssertReturn(src->isMemAllocated(), -1);
|
||||
|
||||
shared_ptr<NCVMatrixAlloc<Ncv32f>> dst (new NCVMatrixAlloc<Ncv32f> (*g_pGPUMemAllocator, src_host->width (), src_host->height ()));
|
||||
ncvAssertReturn (dst->isMemAllocated (), -1);
|
||||
|
||||
ncvAssertReturnNcvStat (src_host->copySolid ( *src, 0 ));
|
||||
ncvAssertReturnNcvStat (dst_host->copySolid ( *dst, 0 ));
|
||||
|
||||
#if defined SAFE_MAT_DECL
|
||||
#undef SAFE_MAT_DECL
|
||||
#endif
|
||||
#define SAFE_MAT_DECL(name, allocator, sx, sy) \
|
||||
NCVMatrixAlloc<Ncv32f> name(*allocator, sx, sy);\
|
||||
ncvAssertReturn(name##.isMemAllocated(), -1);
|
||||
|
||||
SAFE_MAT_DECL (u, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (v, g_pGPUMemAllocator, width, height);
|
||||
|
||||
SAFE_MAT_DECL (uBck, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (vBck, g_pGPUMemAllocator, width, height);
|
||||
|
||||
SAFE_MAT_DECL (h_r, g_pHostMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (h_g, g_pHostMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (h_b, g_pHostMemAllocator, width, height);
|
||||
|
||||
std::cout << "Estimating optical flow\nForward...\n";
|
||||
|
||||
if (NCV_SUCCESS != NCVBroxOpticalFlow (desc, *g_pGPUMemAllocator, *src, *dst, u, v, 0))
|
||||
{
|
||||
std::cout << "Failed\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::cout << "Backward...\n";
|
||||
if (NCV_SUCCESS != NCVBroxOpticalFlow (desc, *g_pGPUMemAllocator, *dst, *src, uBck, vBck, 0))
|
||||
{
|
||||
std::cout << "Failed\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// matrix for temporary data
|
||||
SAFE_MAT_DECL (d_temp, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// first frame color components (GPU memory)
|
||||
SAFE_MAT_DECL (d_r, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_g, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_b, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// second frame color components (GPU memory)
|
||||
SAFE_MAT_DECL (d_rt, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_gt, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_bt, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// intermediate frame color components (GPU memory)
|
||||
SAFE_MAT_DECL (d_rNew, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_gNew, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (d_bNew, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// interpolated forward flow
|
||||
SAFE_MAT_DECL (ui, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (vi, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// interpolated backward flow
|
||||
SAFE_MAT_DECL (ubi, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (vbi, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// occlusion masks
|
||||
SAFE_MAT_DECL (occ0, g_pGPUMemAllocator, width, height);
|
||||
SAFE_MAT_DECL (occ1, g_pGPUMemAllocator, width, height);
|
||||
|
||||
// prepare color components on host and copy them to device memory
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToR> (firstFrame, h_r));
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToG> (firstFrame, h_g));
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToB> (firstFrame, h_b));
|
||||
|
||||
ncvAssertReturnNcvStat (h_r.copySolid ( d_r, 0 ));
|
||||
ncvAssertReturnNcvStat (h_g.copySolid ( d_g, 0 ));
|
||||
ncvAssertReturnNcvStat (h_b.copySolid ( d_b, 0 ));
|
||||
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToR> (lastFrame, h_r));
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToG> (lastFrame, h_g));
|
||||
ncvAssertReturnNcvStat (CopyData<RgbToB> (lastFrame, h_b));
|
||||
|
||||
ncvAssertReturnNcvStat (h_r.copySolid ( d_rt, 0 ));
|
||||
ncvAssertReturnNcvStat (h_g.copySolid ( d_gt, 0 ));
|
||||
ncvAssertReturnNcvStat (h_b.copySolid ( d_bt, 0 ));
|
||||
|
||||
std::cout << "Interpolating...\n";
|
||||
std::cout.precision (4);
|
||||
|
||||
std::vector<IplImage*> frames;
|
||||
frames.push_back (firstFrame);
|
||||
|
||||
// compute interpolated frames
|
||||
for (Ncv32f timePos = timeStep; timePos < 1.0f; timePos += timeStep)
|
||||
{
|
||||
ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
NppStInterpolationState state;
|
||||
// interpolation state should be filled once except pSrcFrame0, pSrcFrame1, and pNewFrame
|
||||
// we will only need to reset buffers content to 0 since interpolator doesn't do this itself
|
||||
state.size = NcvSize32u (width, height);
|
||||
state.nStep = d_r.pitch ();
|
||||
state.pSrcFrame0 = d_r.ptr ();
|
||||
state.pSrcFrame1 = d_rt.ptr ();
|
||||
state.pFU = u.ptr ();
|
||||
state.pFV = v.ptr ();
|
||||
state.pBU = uBck.ptr ();
|
||||
state.pBV = vBck.ptr ();
|
||||
state.pos = timePos;
|
||||
state.pNewFrame = d_rNew.ptr ();
|
||||
state.ppBuffers[0] = occ0.ptr ();
|
||||
state.ppBuffers[1] = occ1.ptr ();
|
||||
state.ppBuffers[2] = ui.ptr ();
|
||||
state.ppBuffers[3] = vi.ptr ();
|
||||
state.ppBuffers[4] = ubi.ptr ();
|
||||
state.ppBuffers[5] = vbi.ptr ();
|
||||
|
||||
// interpolate red channel
|
||||
nppiStInterpolateFrames (&state);
|
||||
|
||||
// reset buffers
|
||||
ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
// interpolate green channel
|
||||
state.pSrcFrame0 = d_g.ptr ();
|
||||
state.pSrcFrame1 = d_gt.ptr ();
|
||||
state.pNewFrame = d_gNew.ptr ();
|
||||
|
||||
nppiStInterpolateFrames (&state);
|
||||
|
||||
// reset buffers
|
||||
ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR);
|
||||
|
||||
// interpolate blue channel
|
||||
state.pSrcFrame0 = d_b.ptr ();
|
||||
state.pSrcFrame1 = d_bt.ptr ();
|
||||
state.pNewFrame = d_bNew.ptr ();
|
||||
|
||||
nppiStInterpolateFrames (&state);
|
||||
|
||||
// copy to host memory
|
||||
ncvAssertReturnNcvStat (d_rNew.copySolid (h_r, 0));
|
||||
ncvAssertReturnNcvStat (d_gNew.copySolid (h_g, 0));
|
||||
ncvAssertReturnNcvStat (d_bNew.copySolid (h_b, 0));
|
||||
|
||||
// convert to IplImage
|
||||
IplImage *newFrame = CreateImage (h_r, h_g, h_b);
|
||||
if (newFrame == 0)
|
||||
{
|
||||
std::cout << "Could not create new frame in host memory\n";
|
||||
break;
|
||||
}
|
||||
frames.push_back (newFrame);
|
||||
std::cout << timePos * 100.0f << "%\r";
|
||||
}
|
||||
std::cout << std::setw (5) << "100%\n";
|
||||
|
||||
frames.push_back (lastFrame);
|
||||
|
||||
Ncv32u currentFrame;
|
||||
currentFrame = 0;
|
||||
|
||||
ShowFlow (u, v, "Forward flow");
|
||||
ShowFlow (uBck, vBck, "Backward flow");
|
||||
|
||||
cvShowImage ("Interpolated frame", frames[currentFrame]);
|
||||
|
||||
bool qPressed = false;
|
||||
while ( !qPressed )
|
||||
{
|
||||
int key = toupper (cvWaitKey (10));
|
||||
switch (key)
|
||||
{
|
||||
case 27:
|
||||
qPressed = true;
|
||||
break;
|
||||
case 'A':
|
||||
if (currentFrame > 0) --currentFrame;
|
||||
cvShowImage ("Interpolated frame", frames[currentFrame]);
|
||||
break;
|
||||
case 'S':
|
||||
if (currentFrame < frames.size()-1) ++currentFrame;
|
||||
cvShowImage ("Interpolated frame", frames[currentFrame]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cvDestroyAllWindows ();
|
||||
|
||||
std::vector<IplImage*>::iterator iter;
|
||||
for (iter = frames.begin (); iter != frames.end (); ++iter)
|
||||
{
|
||||
cvReleaseImage (&(*iter));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
BIN
samples/gpu/rubberwhale1.png
Normal file
BIN
samples/gpu/rubberwhale1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 352 KiB |
BIN
samples/gpu/rubberwhale2.png
Normal file
BIN
samples/gpu/rubberwhale2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 353 KiB |
Loading…
Reference in New Issue
Block a user