2012-06-26 00:39:40 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
|
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or bpied warranties, including, but not limited to, the bpied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
2012-06-26 00:39:50 +08:00
|
|
|
#include <opencv2/gpu/device/lbp.hpp>
|
2012-07-04 12:51:09 +08:00
|
|
|
#include <opencv2/gpu/device/vec_traits.hpp>
|
|
|
|
#include <opencv2/gpu/device/saturate_cast.hpp>
|
2012-06-26 00:39:40 +08:00
|
|
|
|
|
|
|
namespace cv { namespace gpu { namespace device
|
|
|
|
{
|
|
|
|
namespace lbp
|
|
|
|
{
|
2012-07-10 19:58:15 +08:00
|
|
|
struct LBP
|
|
|
|
{
|
2012-07-11 20:22:22 +08:00
|
|
|
__host__ __device__ __forceinline__ LBP() {}
|
2012-07-10 19:58:15 +08:00
|
|
|
|
2012-07-12 16:50:41 +08:00
|
|
|
__device__ __forceinline__ int operator() (const int* integral, int ty, int fh, int fw, int& shift) const
|
2012-07-12 16:50:36 +08:00
|
|
|
{
|
|
|
|
int anchors[9];
|
|
|
|
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[0] = integral[ty];
|
|
|
|
anchors[1] = integral[ty + fw];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[0] -= anchors[1];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[2] = integral[ty + fw * 2];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[1] -= anchors[2];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[2] -= integral[ty + fw * 3];
|
2012-07-12 16:50:36 +08:00
|
|
|
|
|
|
|
ty += fh;
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[3] = integral[ty];
|
|
|
|
anchors[4] = integral[ty + fw];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[3] -= anchors[4];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[5] = integral[ty + fw * 2];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[4] -= anchors[5];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[5] -= integral[ty + fw * 3];
|
2012-07-12 16:50:36 +08:00
|
|
|
|
|
|
|
anchors[0] -= anchors[3];
|
|
|
|
anchors[1] -= anchors[4];
|
|
|
|
anchors[2] -= anchors[5];
|
|
|
|
// 0 - 2 contains s0 - s2
|
|
|
|
|
|
|
|
ty += fh;
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[6] = integral[ty];
|
|
|
|
anchors[7] = integral[ty + fw];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[6] -= anchors[7];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[8] = integral[ty + fw * 2];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[7] -= anchors[8];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[8] -= integral[ty + fw * 3];
|
2012-07-12 16:50:36 +08:00
|
|
|
|
|
|
|
anchors[3] -= anchors[6];
|
|
|
|
anchors[4] -= anchors[7];
|
|
|
|
anchors[5] -= anchors[8];
|
|
|
|
// 3 - 5 contains s3 - s5
|
|
|
|
|
|
|
|
anchors[0] -= anchors[4];
|
|
|
|
anchors[1] -= anchors[4];
|
|
|
|
anchors[2] -= anchors[4];
|
|
|
|
anchors[3] -= anchors[4];
|
|
|
|
anchors[5] -= anchors[4];
|
|
|
|
|
|
|
|
int response = (~(anchors[0] >> 31)) & 4;
|
|
|
|
response |= (~(anchors[1] >> 31)) & 2;;
|
|
|
|
response |= (~(anchors[2] >> 31)) & 1;
|
|
|
|
|
|
|
|
shift = (~(anchors[5] >> 31)) & 16;
|
|
|
|
shift |= (~(anchors[3] >> 31)) & 1;
|
|
|
|
|
|
|
|
ty += fh;
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[0] = integral[ty];
|
|
|
|
anchors[1] = integral[ty + fw];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[0] -= anchors[1];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[2] = integral[ty + fw * 2];
|
2012-07-12 16:50:36 +08:00
|
|
|
anchors[1] -= anchors[2];
|
2012-07-12 16:50:41 +08:00
|
|
|
anchors[2] -= integral[ty + fw * 3];
|
2012-07-12 16:50:36 +08:00
|
|
|
|
|
|
|
anchors[6] -= anchors[0];
|
|
|
|
anchors[7] -= anchors[1];
|
|
|
|
anchors[8] -= anchors[2];
|
|
|
|
// 0 -2 contains s6 - s8
|
|
|
|
|
|
|
|
anchors[6] -= anchors[4];
|
|
|
|
anchors[7] -= anchors[4];
|
|
|
|
anchors[8] -= anchors[4];
|
|
|
|
|
2012-07-10 19:58:15 +08:00
|
|
|
shift |= (~(anchors[6] >> 31)) & 2;
|
|
|
|
shift |= (~(anchors[7] >> 31)) & 4;
|
|
|
|
shift |= (~(anchors[8] >> 31)) & 8;
|
|
|
|
return response;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-07-04 12:51:09 +08:00
|
|
|
template<typename Pr>
|
2012-07-04 20:11:16 +08:00
|
|
|
__global__ void disjoin(int4* candidates, int4* objects, unsigned int n, int groupThreshold, float grouping_eps, unsigned int* nclasses)
|
2012-07-04 12:51:09 +08:00
|
|
|
{
|
|
|
|
unsigned int tid = threadIdx.x;
|
|
|
|
extern __shared__ int sbuff[];
|
|
|
|
|
|
|
|
int* labels = sbuff;
|
2012-07-15 00:23:56 +08:00
|
|
|
int* rrects = sbuff + n;
|
2012-07-04 12:51:09 +08:00
|
|
|
|
|
|
|
Pr predicate(grouping_eps);
|
|
|
|
partition(candidates, n, labels, predicate);
|
|
|
|
|
|
|
|
rrects[tid * 4 + 0] = 0;
|
|
|
|
rrects[tid * 4 + 1] = 0;
|
|
|
|
rrects[tid * 4 + 2] = 0;
|
|
|
|
rrects[tid * 4 + 3] = 0;
|
|
|
|
__syncthreads();
|
|
|
|
|
|
|
|
int cls = labels[tid];
|
2012-07-17 16:27:34 +08:00
|
|
|
Emulation::smem::atomicAdd((rrects + cls * 4 + 0), candidates[tid].x);
|
|
|
|
Emulation::smem::atomicAdd((rrects + cls * 4 + 1), candidates[tid].y);
|
|
|
|
Emulation::smem::atomicAdd((rrects + cls * 4 + 2), candidates[tid].z);
|
|
|
|
Emulation::smem::atomicAdd((rrects + cls * 4 + 3), candidates[tid].w);
|
|
|
|
|
2012-07-11 20:22:22 +08:00
|
|
|
__syncthreads();
|
2012-07-04 12:51:09 +08:00
|
|
|
labels[tid] = 0;
|
2012-07-11 20:22:22 +08:00
|
|
|
|
2012-07-04 12:51:09 +08:00
|
|
|
__syncthreads();
|
2012-07-17 16:27:34 +08:00
|
|
|
Emulation::smem::atomicInc((unsigned int*)labels + cls, n);
|
|
|
|
|
2012-07-11 20:22:22 +08:00
|
|
|
__syncthreads();
|
2012-07-04 20:11:16 +08:00
|
|
|
*nclasses = 0;
|
2012-07-04 12:51:09 +08:00
|
|
|
|
|
|
|
int active = labels[tid];
|
|
|
|
if (active)
|
|
|
|
{
|
|
|
|
int* r1 = rrects + tid * 4;
|
|
|
|
float s = 1.f / active;
|
|
|
|
r1[0] = saturate_cast<int>(r1[0] * s);
|
|
|
|
r1[1] = saturate_cast<int>(r1[1] * s);
|
|
|
|
r1[2] = saturate_cast<int>(r1[2] * s);
|
|
|
|
r1[3] = saturate_cast<int>(r1[3] * s);
|
2012-07-11 20:22:22 +08:00
|
|
|
}
|
|
|
|
__syncthreads();
|
2012-07-04 12:51:09 +08:00
|
|
|
|
2012-07-11 20:22:22 +08:00
|
|
|
if (active && active >= groupThreshold)
|
|
|
|
{
|
2012-07-17 16:27:34 +08:00
|
|
|
int* r1 = rrects + tid * 4;
|
|
|
|
int4 r_out = make_int4(r1[0], r1[1], r1[2], r1[3]);
|
2012-07-15 00:23:56 +08:00
|
|
|
|
2012-07-17 16:27:34 +08:00
|
|
|
int aidx = Emulation::smem::atomicInc(nclasses, n);
|
2012-07-11 20:22:22 +08:00
|
|
|
objects[aidx] = r_out;
|
2012-07-04 12:51:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-15 00:23:56 +08:00
|
|
|
void connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
|
2012-07-04 12:51:09 +08:00
|
|
|
{
|
2012-07-15 00:23:56 +08:00
|
|
|
int block = ncandidates;
|
|
|
|
int smem = block * ( sizeof(int) + sizeof(int4) );
|
|
|
|
disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
|
2012-07-17 16:27:34 +08:00
|
|
|
cudaSafeCall( cudaGetLastError() );
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Cascade
|
|
|
|
{
|
|
|
|
__host__ __device__ __forceinline__ Cascade(const Stage* _stages, int _nstages, const ClNode* _nodes, const float* _leaves,
|
|
|
|
const int* _subsets, const uchar4* _features, int _subsetSize)
|
|
|
|
|
|
|
|
: stages(_stages), nstages(_nstages), nodes(_nodes), leaves(_leaves), subsets(_subsets), features(_features), subsetSize(_subsetSize){}
|
|
|
|
|
2012-07-24 17:26:53 +08:00
|
|
|
__device__ __forceinline__ bool operator() (int y, int x, int* integral, const int pitch) const
|
2012-07-17 16:27:34 +08:00
|
|
|
{
|
|
|
|
int current_node = 0;
|
|
|
|
int current_leave = 0;
|
|
|
|
|
|
|
|
for (int s = 0; s < nstages; ++s)
|
|
|
|
{
|
|
|
|
float sum = 0;
|
|
|
|
Stage stage = stages[s];
|
|
|
|
for (int t = 0; t < stage.ntrees; t++)
|
|
|
|
{
|
|
|
|
ClNode node = nodes[current_node];
|
|
|
|
uchar4 feature = features[node.featureIdx];
|
|
|
|
|
|
|
|
int shift;
|
|
|
|
int c = evaluator(integral, (y + feature.y) * pitch + x + feature.x, feature.w * pitch, feature.z, shift);
|
|
|
|
int idx = (subsets[ current_node * subsetSize + c] & ( 1 << shift)) ? current_leave : current_leave + 1;
|
|
|
|
sum += leaves[idx];
|
|
|
|
|
|
|
|
current_node += 1;
|
|
|
|
current_leave += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sum < stage.threshold)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Stage* stages;
|
|
|
|
const int nstages;
|
|
|
|
|
|
|
|
const ClNode* nodes;
|
|
|
|
const float* leaves;
|
|
|
|
const int* subsets;
|
|
|
|
const uchar4* features;
|
|
|
|
|
|
|
|
const int subsetSize;
|
|
|
|
const LBP evaluator;
|
|
|
|
};
|
|
|
|
|
|
|
|
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
|
|
|
|
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
|
2012-07-24 17:35:04 +08:00
|
|
|
const int total, int* integral, const int pitch, DevMem2D_<int4> objects, unsigned int* classified)
|
2012-07-17 16:27:34 +08:00
|
|
|
{
|
|
|
|
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
|
2012-07-24 17:35:04 +08:00
|
|
|
if (ftid >= total) return;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
int step = (scale <= 2.f);
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
int windowsForLine = (__float2int_rn( __fdividef(frameW, scale)) - windowW) >> step;
|
|
|
|
int stotal = windowsForLine * ( (__float2int_rn( __fdividef(frameH, scale)) - windowH) >> step);
|
|
|
|
int wshift = 0;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
int scaleTid = ftid;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
while (scaleTid >= stotal)
|
2012-07-17 16:27:34 +08:00
|
|
|
{
|
2012-07-24 17:35:04 +08:00
|
|
|
scaleTid -= stotal;
|
|
|
|
wshift += __float2int_rn(__fdividef(frameW, scale)) + 1;
|
2012-07-17 16:27:34 +08:00
|
|
|
scale *= factor;
|
2012-07-24 17:35:04 +08:00
|
|
|
step = (scale <= 2.f);
|
|
|
|
windowsForLine = ( ((__float2int_rn(__fdividef(frameW, scale)) - windowW) >> step));
|
|
|
|
stotal = windowsForLine * ( (__float2int_rn(__fdividef(frameH, scale)) - windowH) >> step);
|
2012-07-17 16:27:34 +08:00
|
|
|
}
|
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
int y = __fdividef(scaleTid, windowsForLine);
|
|
|
|
int x = scaleTid - y * windowsForLine;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
x <<= step;
|
|
|
|
y <<= step;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
if (cascade(y, x + wshift, integral, pitch))
|
2012-07-17 16:27:34 +08:00
|
|
|
{
|
2012-07-24 17:35:04 +08:00
|
|
|
if(x >= __float2int_rn(__fdividef(frameW, scale)) - windowW) return;
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-07-24 17:35:04 +08:00
|
|
|
int4 rect;
|
|
|
|
rect.x = __float2int_rn(x * scale);
|
|
|
|
rect.y = __float2int_rn(y * scale);
|
|
|
|
rect.z = __float2int_rn(windowW * scale);
|
|
|
|
rect.w = __float2int_rn(windowH * scale);
|
2012-07-17 16:27:34 +08:00
|
|
|
|
2012-08-20 06:26:23 +08:00
|
|
|
int res = atomicInc(classified, (unsigned int)objects.cols);
|
2012-07-17 16:27:34 +08:00
|
|
|
objects(0, res) = rect;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initialScale, float factor, int workAmount,
|
|
|
|
const DevMem2Db& mstages, const int nstages, const DevMem2Di& mnodes, const DevMem2Df& mleaves, const DevMem2Di& msubsets, const DevMem2Db& mfeatures,
|
|
|
|
const int subsetSize, DevMem2D_<int4> objects, unsigned int* classified, DevMem2Di integral)
|
|
|
|
{
|
2012-07-24 17:26:53 +08:00
|
|
|
const int block = 128;
|
2012-07-17 16:27:34 +08:00
|
|
|
int grid = divUp(workAmount, block);
|
2012-07-24 17:51:35 +08:00
|
|
|
cudaFuncSetCacheConfig(lbp_cascade, cudaFuncCachePreferL1);
|
2012-07-17 16:27:34 +08:00
|
|
|
Cascade cascade((Stage*)mstages.ptr(), nstages, (ClNode*)mnodes.ptr(), mleaves.ptr(), msubsets.ptr(), (uchar4*)mfeatures.ptr(), subsetSize);
|
|
|
|
lbp_cascade<<<grid, block>>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), integral.step / sizeof(int), objects, classified);
|
2012-07-04 12:51:09 +08:00
|
|
|
}
|
2012-06-26 00:39:40 +08:00
|
|
|
}
|
|
|
|
}}}
|