mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 22:00:25 +08:00
optimize memory usage
This commit is contained in:
parent
b83d4add2e
commit
8108bd30fe
@ -42,18 +42,17 @@
|
|||||||
|
|
||||||
#include <opencv2/gpu/device/common.hpp>
|
#include <opencv2/gpu/device/common.hpp>
|
||||||
#include <icf.hpp>
|
#include <icf.hpp>
|
||||||
// #include <opencv2/gpu/device/saturate_cast.hpp>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
// #include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
// //#define LOG_CUDA_CASCADE
|
// #define LOG_CUDA_CASCADE
|
||||||
|
|
||||||
// #if defined LOG_CUDA_CASCADE
|
#if defined LOG_CUDA_CASCADE
|
||||||
// # define dprintf(format, ...) \
|
# define dprintf(format, ...) \
|
||||||
// do { printf(format, __VA_ARGS__); } while (0)
|
do { printf(format, __VA_ARGS__); } while (0)
|
||||||
// #else
|
#else
|
||||||
// # define dprintf(format, ...)
|
# define dprintf(format, ...)
|
||||||
// #endif
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device {
|
namespace cv { namespace gpu { namespace device {
|
||||||
namespace icf {
|
namespace icf {
|
||||||
@ -94,32 +93,128 @@ namespace icf {
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
texture<float2, cudaTextureType1D, cudaReadModeElementType> tnode;
|
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
|
||||||
|
// ToDo: do it in load time
|
||||||
|
// __device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
|
||||||
|
// {
|
||||||
|
// scaledRect = node.rect;
|
||||||
|
// return (float)(node.threshold & 0x0FFFFFFFU);
|
||||||
|
// }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
|
||||||
|
{
|
||||||
|
float relScale = level.relScale;
|
||||||
|
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
||||||
|
|
||||||
|
dprintf("feature %d box %d %d %d %d\n", (node.threshold >> 28), scaledRect.x, scaledRect.y,
|
||||||
|
scaledRect.z, scaledRect.w);
|
||||||
|
dprintf("rescale: %f [%f %f] selected %f\n",level.relScale, level.scaling[0], level.scaling[1],
|
||||||
|
level.scaling[(node.threshold >> 28) > 6]);
|
||||||
|
|
||||||
|
// rescale
|
||||||
|
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
|
||||||
|
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
|
||||||
|
scaledRect.z = __float2int_rn(relScale * scaledRect.z);
|
||||||
|
scaledRect.w = __float2int_rn(relScale * scaledRect.w);
|
||||||
|
|
||||||
|
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
||||||
|
|
||||||
|
float approx = 1.f;
|
||||||
|
// if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
|
||||||
|
{
|
||||||
|
const float expected_new_area = farea * relScale * relScale;
|
||||||
|
approx = sarea / expected_new_area;
|
||||||
|
}
|
||||||
|
|
||||||
|
dprintf("new rect: %d box %d %d %d %d rel areas %f %f\n", (node.threshold >> 28),
|
||||||
|
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
|
||||||
|
|
||||||
|
|
||||||
|
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
|
||||||
|
rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
|
||||||
|
|
||||||
|
dprintf("approximation %f %d -> %f %f\n", approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
|
||||||
|
level.scaling[(node.threshold >> 28) > 6]);
|
||||||
|
|
||||||
|
return rootThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int get(const int x, int y, int channel, uchar4 area)
|
||||||
|
{
|
||||||
|
|
||||||
|
dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
|
||||||
|
dprintf("get for channel %d\n", channel);
|
||||||
|
dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
|
||||||
|
x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
|
||||||
|
x + area.x, y + area.w);
|
||||||
|
dprintf("at point %d %d with offset %d\n", x, y, 0);
|
||||||
|
|
||||||
|
int offset = channel * 121;
|
||||||
|
y += offset;
|
||||||
|
|
||||||
|
int a = tex2D(thogluv, x + area.x, y + area.y);
|
||||||
|
int b = tex2D(thogluv, x + area.z, y + area.y);
|
||||||
|
int c = tex2D(thogluv, x + area.z, y + area.w);
|
||||||
|
int d = tex2D(thogluv, x + area.x, y + area.w);
|
||||||
|
|
||||||
|
dprintf(" retruved integral values: %d %d %d %d\n", a, b, c, d);
|
||||||
|
|
||||||
|
return (a - b + c - d);
|
||||||
|
}
|
||||||
|
|
||||||
__global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
|
__global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
|
||||||
const Node* nodes,
|
const Node* nodes, const float* leaves, PtrStepSz<uchar4> objects)
|
||||||
PtrStepSz<uchar4> objects)
|
|
||||||
{
|
{
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
Level level = levels[blockIdx.z];
|
Level level = levels[blockIdx.z];
|
||||||
|
|
||||||
|
// if (x > 0 || y > 0 || blockIdx.z > 0) return;
|
||||||
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
||||||
|
|
||||||
Octave octave = octaves[level.octave];
|
Octave octave = octaves[level.octave];
|
||||||
|
|
||||||
int st = octave.index * octave.stages;
|
int st = octave.index * octave.stages;
|
||||||
const int stEnd = st + 1000;//octave.stages;
|
const int stEnd = st + 1000;//octave.stages;
|
||||||
|
|
||||||
float confidence = 0.f;
|
float confidence = 0.f;
|
||||||
|
|
||||||
#pragma unroll 8
|
// #pragma unroll 8
|
||||||
for(; st < stEnd; ++st)
|
for(; st < stEnd; ++st)
|
||||||
{
|
{
|
||||||
|
dprintf("\n\nstage: %d\n", st);
|
||||||
const int nId = st * 3;
|
const int nId = st * 3;
|
||||||
const Node node = nodes[nId];
|
Node node = nodes[nId];
|
||||||
|
|
||||||
const float stage = stages[st];
|
dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
|
||||||
confidence += node.rect.x * stage;
|
node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
|
||||||
|
|
||||||
|
float threshold = rescale(level, node.rect, node);
|
||||||
|
int sum = get(x, y, (node.threshold >> 28), node.rect);
|
||||||
|
|
||||||
|
dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
|
||||||
|
node.rect.w, threshold);
|
||||||
|
|
||||||
|
int next = 1 + (int)(sum >= threshold);
|
||||||
|
dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
|
||||||
|
|
||||||
|
node = nodes[nId + next];
|
||||||
|
threshold = rescale(level, node.rect, node);
|
||||||
|
sum = get(x, y, (node.threshold >> 28), node.rect);
|
||||||
|
|
||||||
|
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
||||||
|
float impact = leaves[st * 4 + lShift];
|
||||||
|
confidence += impact;
|
||||||
|
|
||||||
|
if (confidence <= stages[st]) st = stEnd + 1;
|
||||||
|
dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
|
||||||
|
dprintf("extracted stage: %f\n", stages[st]);
|
||||||
|
dprintf("computed score: %f\n\n", confidence);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if (st == stEnd)
|
||||||
|
// printf("%d %d %d\n", x, y, st);
|
||||||
|
|
||||||
uchar4 val;
|
uchar4 val;
|
||||||
val.x = (int)confidence;
|
val.x = (int)confidence;
|
||||||
if (x == y) objects(0, threadIdx.x) = val;
|
if (x == y) objects(0, threadIdx.x) = val;
|
||||||
@ -127,188 +222,27 @@ namespace icf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||||
const PtrStepSzb& nodes, const PtrStepSzb& features,
|
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects)
|
||||||
PtrStepSz<uchar4> objects)
|
|
||||||
{
|
{
|
||||||
int fw = 160;
|
int fw = 160;
|
||||||
int fh = 120;
|
int fh = 120;
|
||||||
|
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(fw / 32, fh / 8, 47);
|
dim3 grid(fw / 32, fh / 8, 47);
|
||||||
|
|
||||||
const Level* l = (const Level*)levels.ptr();
|
const Level* l = (const Level*)levels.ptr();
|
||||||
const Octave* oct = ((const Octave*)octaves.ptr());
|
const Octave* oct = ((const Octave*)octaves.ptr());
|
||||||
const float* st = (const float*)stages.ptr();
|
const float* st = (const float*)stages.ptr();
|
||||||
const Node* nd = (const Node*)nodes.ptr();
|
const Node* nd = (const Node*)nodes.ptr();
|
||||||
// cudaSafeCall( cudaBindTexture(0, tnode, nodes.data, rgb.cols / size) );
|
const float* lf = (const float*)leaves.ptr();
|
||||||
|
|
||||||
test_kernel<<<grid, block>>>(l, oct, st, nd, objects);
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||||
|
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
|
||||||
|
|
||||||
|
test_kernel<<<grid, block>>>(l, oct, st, nd, lf, objects);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError());
|
cudaSafeCall( cudaGetLastError());
|
||||||
cudaSafeCall( cudaDeviceSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
// __global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
|
|
||||||
// PtrStepSz<uchar4> objects)
|
|
||||||
// {
|
|
||||||
// cascade.detectAt(hogluv, pitch, objects);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// }
|
|
||||||
|
|
||||||
// float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
|
|
||||||
// const int channel, const float threshold) const
|
|
||||||
// {
|
|
||||||
// dprintf("feature %d box %d %d %d %d\n", channel, scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w);
|
|
||||||
// dprintf("rescale: %f [%f %f]\n",level.relScale, level.scaling[0], level.scaling[1]);
|
|
||||||
|
|
||||||
// float relScale = level.relScale;
|
|
||||||
// float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
|
||||||
|
|
||||||
// // rescale
|
|
||||||
// scaledRect.x = __float2int_rn(relScale * scaledRect.x);
|
|
||||||
// scaledRect.y = __float2int_rn(relScale * scaledRect.y);
|
|
||||||
// scaledRect.z = __float2int_rn(relScale * scaledRect.z);
|
|
||||||
// scaledRect.w = __float2int_rn(relScale * scaledRect.w);
|
|
||||||
|
|
||||||
// float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
|
||||||
|
|
||||||
|
|
||||||
// float approx = 1.f;
|
|
||||||
// if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
|
|
||||||
// {
|
|
||||||
// const float expected_new_area = farea * relScale * relScale;
|
|
||||||
// approx = expected_new_area / sarea;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// dprintf("new rect: %d box %d %d %d %d rel areas %f %f\n", channel,
|
|
||||||
// scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
|
|
||||||
|
|
||||||
// // compensation areas rounding
|
|
||||||
// float rootThreshold = threshold / approx;
|
|
||||||
// // printf(" approx %f\n", rootThreshold);
|
|
||||||
// rootThreshold *= level.scaling[(int)(channel > 6)];
|
|
||||||
|
|
||||||
// dprintf("approximation %f %f -> %f %f\n", approx, threshold, rootThreshold, level.scaling[(int)(channel > 6)]);
|
|
||||||
|
|
||||||
// return rootThreshold;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// typedef unsigned char uchar;
|
|
||||||
// float __device get(const int* __restrict__ hogluv, const int pitch,
|
|
||||||
// const int x, const int y, int channel, uchar4 area)
|
|
||||||
// {
|
|
||||||
// dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
|
|
||||||
// dprintf("get for channel %d\n", channel);
|
|
||||||
// dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
|
|
||||||
// x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
|
|
||||||
// x + area.x, y + area.w);
|
|
||||||
// dprintf("at point %d %d with offset %d\n", x, y, 0);
|
|
||||||
|
|
||||||
// const int* curr = hogluv + ((channel * 121) + y) * pitch;
|
|
||||||
|
|
||||||
// int a = curr[area.y * pitch + x + area.x];
|
|
||||||
// int b = curr[area.y * pitch + x + area.z];
|
|
||||||
// int c = curr[area.w * pitch + x + area.z];
|
|
||||||
// int d = curr[area.w * pitch + x + area.x];
|
|
||||||
|
|
||||||
// dprintf(" retruved integral values: %d %d %d %d\n", a, b, c, d);
|
|
||||||
|
|
||||||
// return (a - b + c - d);
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// void __device icf::Cascade::detectAt(const int* __restrict__ hogluv, const int pitch,
|
|
||||||
// PtrStepSz<uchar4>& objects) const
|
|
||||||
// {
|
|
||||||
// const icf::Level* lls = (const icf::Level*)levels.ptr();
|
|
||||||
|
|
||||||
// const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
// const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
// // if (x > 0 || y > 0) return;
|
|
||||||
|
|
||||||
// Level level = lls[blockIdx.z];
|
|
||||||
// if (x >= level.workRect.x || y >= level.workRect.y) return;
|
|
||||||
|
|
||||||
// dprintf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
|
|
||||||
// level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
|
|
||||||
|
|
||||||
// const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
|
|
||||||
// // printf("Octave: %d %d %d (%d %d) %f\n", octave.index, octave.stages,
|
|
||||||
// // octave.shrinkage, octave.size.x, octave.size.y, octave.scale);
|
|
||||||
|
|
||||||
// const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
|
|
||||||
|
|
||||||
// float detectionScore = 0.f;
|
|
||||||
|
|
||||||
// int st = stBegin;
|
|
||||||
// for(; st < stEnd; ++st)
|
|
||||||
// {
|
|
||||||
// const float stage = stages(0, st);
|
|
||||||
// dprintf("Stage: %f\n", stage);
|
|
||||||
// {
|
|
||||||
// const int nId = st * 3;
|
|
||||||
|
|
||||||
// // work with root node
|
|
||||||
// const Node node = ((const Node*)nodes.ptr())[nId];
|
|
||||||
|
|
||||||
// dprintf("Node: %d %f\n", node.feature, node.threshold);
|
|
||||||
|
|
||||||
// const Feature feature = ((const Feature*)features.ptr())[node.feature];
|
|
||||||
|
|
||||||
// uchar4 scaledRect = feature.rect;
|
|
||||||
// float threshold = rescale(level, scaledRect, feature.channel, node.threshold);
|
|
||||||
|
|
||||||
// float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
|
|
||||||
|
|
||||||
// dprintf("root feature %d %f\n",feature.channel, sum);
|
|
||||||
|
|
||||||
// int next = 1 + (int)(sum >= threshold);
|
|
||||||
|
|
||||||
// dprintf("go: %d (%f >= %f)\n\n" ,next, sum, threshold);
|
|
||||||
|
|
||||||
// // leaves
|
|
||||||
// const Node leaf = ((const Node*)nodes.ptr())[nId + next];
|
|
||||||
// const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
|
|
||||||
|
|
||||||
// scaledRect = fLeaf.rect;
|
|
||||||
// threshold = rescale(level, scaledRect, fLeaf.channel, leaf.threshold);
|
|
||||||
// sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
|
|
||||||
|
|
||||||
// const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
|
||||||
// float impact = leaves(0, (st * 4) + lShift);
|
|
||||||
|
|
||||||
// detectionScore += impact;
|
|
||||||
|
|
||||||
// dprintf("decided: %d (%f >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
|
|
||||||
// dprintf("extracted stage:\n");
|
|
||||||
// dprintf("ct %f\n", stage);
|
|
||||||
// dprintf("computed score %f\n\n", detectionScore);
|
|
||||||
// dprintf("\n\n");
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (detectionScore <= stage || st - stBegin == 100) break;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// dprintf("x %d y %d: %d\n", x, y, st - stBegin);
|
|
||||||
|
|
||||||
// if (st == stEnd)
|
|
||||||
// {
|
|
||||||
// uchar4 a;
|
|
||||||
// a.x = level.workRect.x;
|
|
||||||
// a.y = level.workRect.y;
|
|
||||||
// objects(0, threadIdx.x) = a;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// void icf::Cascade::detect(const cv::gpu::PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, cudaStream_t stream) const
|
|
||||||
// {
|
|
||||||
// dim3 block(32, 8, 1);
|
|
||||||
// dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 47);
|
|
||||||
// device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(int), objects);
|
|
||||||
// cudaSafeCall( cudaGetLastError() );
|
|
||||||
// if (!stream)
|
|
||||||
// cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
// }
|
|
||||||
|
|
||||||
// }}
|
|
@ -40,11 +40,13 @@
|
|||||||
//
|
//
|
||||||
//M
|
//M
|
||||||
|
|
||||||
#include <opencv2/gpu/device/common.hpp>
|
|
||||||
|
|
||||||
#ifndef __OPENCV_ICF_HPP__
|
#ifndef __OPENCV_ICF_HPP__
|
||||||
#define __OPENCV_ICF_HPP__
|
#define __OPENCV_ICF_HPP__
|
||||||
|
|
||||||
|
#include <opencv2/gpu/device/common.hpp>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
// #if defined __CUDACC__
|
// #if defined __CUDACC__
|
||||||
// # define __device __device__ __forceinline__
|
// # define __device __device__ __forceinline__
|
||||||
// #else
|
// #else
|
||||||
@ -92,20 +94,27 @@ struct __align__(8) Level //is actually 24 bytes
|
|||||||
|
|
||||||
struct __align__(8) Node
|
struct __align__(8) Node
|
||||||
{
|
{
|
||||||
// int feature;
|
|
||||||
uchar4 rect;
|
uchar4 rect;
|
||||||
float threshold;
|
// ushort channel;
|
||||||
|
uint threshold;
|
||||||
|
|
||||||
Node(const uchar4 c, const int t) : rect(c), threshold(t) {}
|
enum { THRESHOLD_MASK = 0x0FFFFFFF };
|
||||||
|
|
||||||
|
Node(const uchar4 r, const uint ch, const uint t) : rect(r), threshold(t + (ch << 28))
|
||||||
|
{
|
||||||
|
// printf("%d\n", t);
|
||||||
|
// printf("[%d %d %d %d] %d, %d\n",rect.x, rect.y, rect.z, rect.w, (int)(threshold >> 28),
|
||||||
|
// (int)(0x0FFFFFFF & threshold));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __align__(8) Feature
|
// struct __align__(8) Feature
|
||||||
{
|
// {
|
||||||
int channel;
|
// int channel;
|
||||||
uchar4 rect;
|
// uchar4 rect;
|
||||||
|
|
||||||
Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
|
// Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
|
||||||
};
|
// };
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
// struct Cascade
|
// struct Cascade
|
||||||
|
@ -60,19 +60,10 @@ namespace icf {
|
|||||||
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||||
const int fw, const int fh, const int bins);
|
const int fw, const int fh, const int bins);
|
||||||
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||||
const PtrStepSzb& nodes, const PtrStepSzb& features,
|
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects);
|
||||||
PtrStepSz<uchar4> objects);
|
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
// namespace {
|
|
||||||
// char *itoa(long i, char* s, int /*dummy_radix*/)
|
|
||||||
// {
|
|
||||||
// sprintf(s, "%ld", i);
|
|
||||||
// return s;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
struct cv::gpu::SoftCascade::Filds
|
struct cv::gpu::SoftCascade::Filds
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -97,7 +88,6 @@ struct cv::gpu::SoftCascade::Filds
|
|||||||
GpuMat stages;
|
GpuMat stages;
|
||||||
GpuMat nodes;
|
GpuMat nodes;
|
||||||
GpuMat leaves;
|
GpuMat leaves;
|
||||||
GpuMat features;
|
|
||||||
GpuMat levels;
|
GpuMat levels;
|
||||||
|
|
||||||
// preallocated buffer 640x480x10 for hogluv + 640x480 got gray
|
// preallocated buffer 640x480x10 for hogluv + 640x480 got gray
|
||||||
@ -137,7 +127,7 @@ struct cv::gpu::SoftCascade::Filds
|
|||||||
bool fill(const FileNode &root, const float mins, const float maxs);
|
bool fill(const FileNode &root, const float mins, const float maxs);
|
||||||
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
|
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
|
||||||
{
|
{
|
||||||
device::icf::detect(levels, octaves, stages, nodes, features, objects);
|
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -216,10 +206,9 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
|
|||||||
if (fn.empty()) return false;
|
if (fn.empty()) return false;
|
||||||
|
|
||||||
std::vector<Octave> voctaves;
|
std::vector<Octave> voctaves;
|
||||||
std::vector<float> vstages;
|
std::vector<float> vstages;
|
||||||
std::vector<Node> vnodes;
|
std::vector<Node> vnodes;
|
||||||
std::vector<float> vleaves;
|
std::vector<float> vleaves;
|
||||||
std::vector<Feature> vfeatures;
|
|
||||||
scales.clear();
|
scales.clear();
|
||||||
|
|
||||||
FileNodeIterator it = fn.begin(), it_end = fn.end();
|
FileNodeIterator it = fn.begin(), it_end = fn.end();
|
||||||
@ -245,6 +234,8 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
|
|||||||
FileNode ffs = fns[SC_FEATURES];
|
FileNode ffs = fns[SC_FEATURES];
|
||||||
if (ffs.empty()) return false;
|
if (ffs.empty()) return false;
|
||||||
|
|
||||||
|
FileNodeIterator ftrs = ffs.begin();
|
||||||
|
|
||||||
fns = fns[SC_STAGES];
|
fns = fns[SC_STAGES];
|
||||||
if (fn.empty()) return false;
|
if (fn.empty()) return false;
|
||||||
|
|
||||||
@ -263,10 +254,21 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
|
|||||||
FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
|
FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
|
||||||
for (; inIt != inIt_end;)
|
for (; inIt != inIt_end;)
|
||||||
{
|
{
|
||||||
int feature = (int)(*(inIt +=2)++) + feature_offset;
|
// int feature = (int)(*(inIt +=2)) + feature_offset;
|
||||||
float th = (float)(*(inIt++));
|
inIt +=3;
|
||||||
|
// extract feature, Todo:check it
|
||||||
|
uint th = saturate_cast<uint>((float)(*(inIt++)));
|
||||||
|
cv::FileNode ftn = (*ftrs)[SC_F_RECT];
|
||||||
|
cv::FileNodeIterator r_it = ftn.begin();
|
||||||
uchar4 rect;
|
uchar4 rect;
|
||||||
vnodes.push_back(Node(rect, th));
|
rect.x = saturate_cast<uchar>((int)*(r_it++));
|
||||||
|
rect.y = saturate_cast<uchar>((int)*(r_it++));
|
||||||
|
rect.z = saturate_cast<uchar>((int)*(r_it++));
|
||||||
|
rect.w = saturate_cast<uchar>((int)*(r_it++));
|
||||||
|
|
||||||
|
uint channel = saturate_cast<uint>((int)(*ftrs)[SC_F_CHANNEL]);
|
||||||
|
vnodes.push_back(Node(rect, channel, th));
|
||||||
|
++ftrs;
|
||||||
}
|
}
|
||||||
|
|
||||||
fns = (*ftr)[SC_LEAF];
|
fns = (*ftr)[SC_LEAF];
|
||||||
@ -276,19 +278,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
st = ffs.begin(), st_end = ffs.end();
|
|
||||||
for (; st != st_end; ++st )
|
|
||||||
{
|
|
||||||
cv::FileNode rn = (*st)[SC_F_RECT];
|
|
||||||
cv::FileNodeIterator r_it = rn.begin();
|
|
||||||
uchar4 rect;
|
|
||||||
rect.x = saturate_cast<uchar>((int)*(r_it++));
|
|
||||||
rect.y = saturate_cast<uchar>((int)*(r_it++));
|
|
||||||
rect.z = saturate_cast<uchar>((int)*(r_it++));
|
|
||||||
rect.w = saturate_cast<uchar>((int)*(r_it++));
|
|
||||||
vfeatures.push_back(Feature((int)(*st)[SC_F_CHANNEL], rect));
|
|
||||||
}
|
|
||||||
|
|
||||||
feature_offset += octave.stages * 3;
|
feature_offset += octave.stages * 3;
|
||||||
++octIndex;
|
++octIndex;
|
||||||
}
|
}
|
||||||
@ -306,9 +295,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
|
|||||||
leaves.upload(cv::Mat(vleaves).reshape(1,1));
|
leaves.upload(cv::Mat(vleaves).reshape(1,1));
|
||||||
CV_Assert(!leaves.empty());
|
CV_Assert(!leaves.empty());
|
||||||
|
|
||||||
features.upload(cv::Mat(1, vfeatures.size() * sizeof(Feature), CV_8UC1, (uchar*)&(vfeatures[0]) ));
|
|
||||||
CV_Assert(!features.empty());
|
|
||||||
|
|
||||||
// compute levels
|
// compute levels
|
||||||
calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
|
calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
|
||||||
CV_Assert(!levels.empty());
|
CV_Assert(!levels.empty());
|
||||||
@ -425,7 +411,14 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// #define USE_REFERENCE_VALUES
|
#define USE_REFERENCE_VALUES
|
||||||
|
namespace {
|
||||||
|
char *itoa(long i, char* s, int /*dummy_radix*/)
|
||||||
|
{
|
||||||
|
sprintf(s, "%ld", i);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
|
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
|
||||||
GpuMat& objects, const int /*rejectfactor*/, Stream s)
|
GpuMat& objects, const int /*rejectfactor*/, Stream s)
|
||||||
{
|
{
|
||||||
@ -438,17 +431,20 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
|
|||||||
Filds& flds = *filds;
|
Filds& flds = *filds;
|
||||||
|
|
||||||
#if defined USE_REFERENCE_VALUES
|
#if defined USE_REFERENCE_VALUES
|
||||||
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
|
cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
|
||||||
// cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
|
|
||||||
// char buff[33];
|
|
||||||
|
|
||||||
// for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
|
cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
|
||||||
// {
|
char buff[33];
|
||||||
// cv::Mat channel;
|
|
||||||
// imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
|
for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
|
||||||
// GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
|
{
|
||||||
// gchannel.upload(channel);
|
cv::Mat channel;
|
||||||
// }
|
imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
|
||||||
|
|
||||||
|
// std::cout << "channel " << i << std::endl << channel << std::endl;
|
||||||
|
GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
|
||||||
|
gchannel.upload(channel);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
GpuMat& plane = flds.plane;
|
GpuMat& plane = flds.plane;
|
||||||
GpuMat& shrunk = flds.shrunk;
|
GpuMat& shrunk = flds.shrunk;
|
||||||
|
Loading…
Reference in New Issue
Block a user