/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencv2/objdetect/objdetect_c.h" using namespace cv; using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) Ptr cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr(); } Ptr cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr(); } #else // // CascadeClassifierBase // namespace { class CascadeClassifierBase : public cuda::CascadeClassifier { public: CascadeClassifierBase(); virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; } virtual Size getMaxObjectSize() const { return maxObjectSize_; } virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; } virtual Size getMinObjectSize() const { return minObjectSize_; } virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; } virtual double getScaleFactor() const { return scaleFactor_; } virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; } virtual int getMinNeighbors() const { return minNeighbors_; } virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; } virtual bool getFindLargestObject() { return findLargestObject_; } virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; } virtual int getMaxNumObjects() const { return maxNumObjects_; } protected: Size maxObjectSize_; Size minObjectSize_; double scaleFactor_; int minNeighbors_; bool findLargestObject_; int maxNumObjects_; }; CascadeClassifierBase::CascadeClassifierBase() : maxObjectSize_(), minObjectSize_(), scaleFactor_(1.2), minNeighbors_(4), findLargestObject_(false), maxNumObjects_(100) { } } // // HaarCascade // #ifdef HAVE_OPENCV_CUDALEGACY namespace { class HaarCascade_Impl : public CascadeClassifierBase { public: explicit HaarCascade_Impl(const String& filename); virtual Size getClassifierSize() const; virtual void detectMultiScale(InputArray image, OutputArray objects, Stream& stream); virtual void convert(OutputArray gpu_objects, std::vector& objects); private: NCVStatus load(const String& classifierFile); NCVStatus calculateMemReqsAndAllocate(const Size& frameSize); NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections); Size lastAllocatedFrameSize; Ptr gpuAllocator; Ptr cpuAllocator; cudaDeviceProp devProp; NCVStatus ncvStat; Ptr gpuCascadeAllocator; Ptr cpuCascadeAllocator; Ptr > h_haarStages; Ptr > h_haarNodes; Ptr > h_haarFeatures; HaarClassifierCascadeDescriptor haar; Ptr > d_haarStages; Ptr > d_haarNodes; Ptr > d_haarFeatures; }; static void NCVDebugOutputHandler(const String &msg) { CV_Error(Error::GpuApiCallError, msg.c_str()); } HaarCascade_Impl::HaarCascade_Impl(const String& filename) : lastAllocatedFrameSize(-1, -1) { ncvSetDebugOutputHandler(NCVDebugOutputHandler); ncvSafeCall( load(filename) ); } Size HaarCascade_Impl::getClassifierSize() const { return Size(haar.ClassifierSize.width, haar.ClassifierSize.height); } void HaarCascade_Impl::detectMultiScale(InputArray _image, OutputArray _objects, Stream& stream) { const GpuMat image = _image.getGpuMat(); CV_Assert( image.depth() == CV_8U); CV_Assert( scaleFactor_ > 1 ); CV_Assert( !stream ); Size ncvMinSize = getClassifierSize(); if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height) { ncvMinSize.width = minObjectSize_.width; ncvMinSize.height = minObjectSize_.height; } BufferPool pool(stream); GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, traits::Type::value); unsigned int numDetections; ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) ); if (numDetections > 0) { objectsBuf.colRange(0, numDetections).copyTo(_objects); } else { _objects.release(); } } void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector& objects) { if (_gpu_objects.empty()) { objects.clear(); return; } Mat gpu_objects; if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) { _gpu_objects.getGpuMat().download(gpu_objects); } else { gpu_objects = _gpu_objects.getMat(); } CV_Assert( gpu_objects.rows == 1 ); CV_Assert( gpu_objects.type() == traits::Type::value ); Rect* ptr = gpu_objects.ptr(); objects.assign(ptr, ptr + gpu_objects.cols); } NCVStatus HaarCascade_Impl::load(const String& classifierFile) { int devId = cv::cuda::getDevice(); ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR); // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator gpuCascadeAllocator = makePtr(NCVMemoryTypeDevice, static_cast(devProp.textureAlignment)); cpuCascadeAllocator = makePtr(NCVMemoryTypeHostPinned, static_cast(devProp.textureAlignment)); ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR); Ncv32u haarNumStages, haarNumNodes, haarNumFeatures; ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures); ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR); h_haarStages.reset (new NCVVectorAlloc(*cpuCascadeAllocator, haarNumStages)); h_haarNodes.reset (new NCVVectorAlloc(*cpuCascadeAllocator, haarNumNodes)); h_haarFeatures.reset(new NCVVectorAlloc(*cpuCascadeAllocator, haarNumFeatures)); ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures); ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR); d_haarStages.reset (new NCVVectorAlloc(*gpuCascadeAllocator, haarNumStages)); d_haarNodes.reset (new NCVVectorAlloc(*gpuCascadeAllocator, haarNumNodes)); d_haarFeatures.reset(new NCVVectorAlloc(*gpuCascadeAllocator, haarNumFeatures)); ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); ncvStat = h_haarStages->copySolid(*d_haarStages, 0); ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0); ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0); ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); return NCV_SUCCESS; } NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize) { if (lastAllocatedFrameSize == frameSize) { return NCV_SUCCESS; } // Calculate memory requirements and create real allocators NCVMemStackAllocator gpuCounter(static_cast(devProp.textureAlignment)); NCVMemStackAllocator cpuCounter(static_cast(devProp.textureAlignment)); ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR); NCVMatrixAlloc d_src(gpuCounter, frameSize.width, frameSize.height); NCVMatrixAlloc h_src(cpuCounter, frameSize.width, frameSize.height); ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); NCVVectorAlloc d_rects(gpuCounter, 100); ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); NcvSize32u roi; roi.width = d_src.width(); roi.height = d_src.height(); Ncv32u numDetections; ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages, *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0); ncvAssertReturnNcvStat(ncvStat); ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); gpuAllocator = makePtr(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast(devProp.textureAlignment)); cpuAllocator = makePtr(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast(devProp.textureAlignment)); ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR); lastAllocatedFrameSize = frameSize; return NCV_SUCCESS; } NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections) { calculateMemReqsAndAllocate(src.size()); NCVMemPtr src_beg; src_beg.ptr = (void*)src.ptr(); src_beg.memtype = NCVMemoryTypeDevice; NCVMemSegment src_seg; src_seg.begin = src_beg; src_seg.size = src.step * src.rows; NCVMatrixReuse d_src(src_seg, static_cast(devProp.textureAlignment), src.cols, src.rows, static_cast(src.step), true); ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); CV_Assert(objects.rows == 1); NCVMemPtr objects_beg; objects_beg.ptr = (void*)objects.ptr(); objects_beg.memtype = NCVMemoryTypeDevice; NCVMemSegment objects_seg; objects_seg.begin = objects_beg; objects_seg.size = objects.step * objects.rows; NCVVectorReuse d_rects(objects_seg, objects.cols); ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); NcvSize32u roi; roi.width = d_src.width(); roi.height = d_src.height(); NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); Ncv32u flags = 0; flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0; ncvStat = ncvDetectObjectsMultiScale_device( d_src, roi, d_rects, numDetections, haar, *h_haarStages, *d_haarStages, *d_haarNodes, *d_haarFeatures, winMinSize, minNeighbors_, scaleFactor_, 1, flags, *gpuAllocator, *cpuAllocator, devProp, 0); ncvAssertReturnNcvStat(ncvStat); ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); return NCV_SUCCESS; } } #endif // // LbpCascade // namespace cv { namespace cuda { namespace device { namespace lbp { void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initalScale, float factor, int total, const PtrStepSzb& mstages, const int nstages, const PtrStepSzi& mnodes, const PtrStepSzf& mleaves, const PtrStepSzi& msubsets, const PtrStepSzb& mfeatures, const int subsetSize, PtrStepSz objects, unsigned int* classified, PtrStepSzi integral); void connectedConmonents(PtrStepSz candidates, int ncandidates, PtrStepSz objects, int groupThreshold, float grouping_eps, unsigned int* nclasses); } }}} namespace { cv::Size operator -(const cv::Size& a, const cv::Size& b) { return cv::Size(a.width - b.width, a.height - b.height); } cv::Size operator +(const cv::Size& a, const int& i) { return cv::Size(a.width + i, a.height + i); } cv::Size operator *(const cv::Size& a, const float& f) { return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); } cv::Size operator /(const cv::Size& a, const float& f) { return cv::Size(cvRound(a.width / f), cvRound(a.height / f)); } bool operator <=(const cv::Size& a, const cv::Size& b) { return a.width <= b.width && a.height <= b.width; } struct PyrLavel { PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize) { do { order = _order; scale = pow(_scale, order); sFrame = frame / scale; workArea = sFrame - window + 1; sWindow = window * scale; _order++; } while (sWindow <= minObjectSize); } bool isFeasible(cv::Size maxObj) { return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj; } PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize) { return PyrLavel(order + 1, factor, frame, window, minObjectSize); } int order; float scale; cv::Size sFrame; cv::Size workArea; cv::Size sWindow; }; class LbpCascade_Impl : public CascadeClassifierBase { public: explicit LbpCascade_Impl(const FileStorage& file); virtual Size getClassifierSize() const { return NxM; } virtual void detectMultiScale(InputArray image, OutputArray objects, Stream& stream); virtual void convert(OutputArray gpu_objects, std::vector& objects); private: bool load(const FileNode &root); void allocateBuffers(cv::Size frame); private: struct Stage { int first; int ntrees; float threshold; }; enum stage { BOOST = 0 }; enum feature { LBP = 1, HAAR = 2 }; static const stage stageType = BOOST; static const feature featureType = LBP; cv::Size NxM; bool isStumps; int ncategories; int subsetSize; int nodeStep; // gpu representation of classifier GpuMat stage_mat; GpuMat trees_mat; GpuMat nodes_mat; GpuMat leaves_mat; GpuMat subsets_mat; GpuMat features_mat; GpuMat integral; GpuMat integralBuffer; GpuMat resuzeBuffer; GpuMat candidates; static const int integralFactor = 4; }; LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file) { load(file.getFirstTopLevelNode()); } void LbpCascade_Impl::detectMultiScale(InputArray _image, OutputArray _objects, Stream& stream) { const GpuMat image = _image.getGpuMat(); CV_Assert( image.depth() == CV_8U); CV_Assert( scaleFactor_ > 1 ); CV_Assert( !stream ); const float grouping_eps = 0.2f; BufferPool pool(stream); GpuMat objects = pool.getBuffer(1, maxNumObjects_, traits::Type::value); // used for debug // candidates.setTo(cv::Scalar::all(0)); // objects.setTo(cv::Scalar::all(0)); if (maxObjectSize_ == cv::Size()) maxObjectSize_ = image.size(); allocateBuffers(image.size()); unsigned int classified = 0; GpuMat dclassified(1, 1, CV_32S); cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_); while (level.isFeasible(maxObjectSize_)) { int acc = level.sFrame.width + 1; float iniScale = level.scale; cv::Size area = level.workArea; int step = 1 + (level.scale <= 2.f); int total = 0, prev = 0; while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_)) { // create sutable matrix headers GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1)); // generate integral for scale cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR); cuda::integral(src, sint); // calculate job int totalWidth = level.workArea.width / step; total += totalWidth * (level.workArea.height / step); // go to next pyramide level level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_); area = level.workArea; step = (1 + (level.scale <= 2.f)); prev = acc; acc += level.sFrame.width + 1; } device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr(), integral); } if (minNeighbors_ <= 0 || objects.empty()) return; cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr()); cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaDeviceSynchronize() ); if (classified > 0) { objects.colRange(0, classified).copyTo(_objects); } else { _objects.release(); } } void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector& objects) { if (_gpu_objects.empty()) { objects.clear(); return; } Mat gpu_objects; if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) { _gpu_objects.getGpuMat().download(gpu_objects); } else { gpu_objects = _gpu_objects.getMat(); } CV_Assert( gpu_objects.rows == 1 ); CV_Assert( gpu_objects.type() == traits::Type::value ); Rect* ptr = gpu_objects.ptr(); objects.assign(ptr, ptr + gpu_objects.cols); } bool LbpCascade_Impl::load(const FileNode &root) { const char *CUDA_CC_STAGE_TYPE = "stageType"; const char *CUDA_CC_FEATURE_TYPE = "featureType"; const char *CUDA_CC_BOOST = "BOOST"; const char *CUDA_CC_LBP = "LBP"; const char *CUDA_CC_MAX_CAT_COUNT = "maxCatCount"; const char *CUDA_CC_HEIGHT = "height"; const char *CUDA_CC_WIDTH = "width"; const char *CUDA_CC_STAGE_PARAMS = "stageParams"; const char *CUDA_CC_MAX_DEPTH = "maxDepth"; const char *CUDA_CC_FEATURE_PARAMS = "featureParams"; const char *CUDA_CC_STAGES = "stages"; const char *CUDA_CC_STAGE_THRESHOLD = "stageThreshold"; const float CUDA_THRESHOLD_EPS = 1e-5f; const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers"; const char *CUDA_CC_INTERNAL_NODES = "internalNodes"; const char *CUDA_CC_LEAF_VALUES = "leafValues"; const char *CUDA_CC_FEATURES = "features"; const char *CUDA_CC_RECT = "rect"; String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE]; CV_Assert(stageTypeStr == CUDA_CC_BOOST); String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE]; CV_Assert(featureTypeStr == CUDA_CC_LBP); NxM.width = (int)root[CUDA_CC_WIDTH]; NxM.height = (int)root[CUDA_CC_HEIGHT]; CV_Assert( NxM.height > 0 && NxM.width > 0 ); isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false; CV_Assert(isStumps); FileNode fn = root[CUDA_CC_FEATURE_PARAMS]; if (fn.empty()) return false; ncategories = fn[CUDA_CC_MAX_CAT_COUNT]; subsetSize = (ncategories + 31) / 32; nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 ); fn = root[CUDA_CC_STAGES]; if (fn.empty()) return false; std::vector stages; stages.reserve(fn.size()); std::vector cl_trees; std::vector cl_nodes; std::vector cl_leaves; std::vector subsets; FileNodeIterator it = fn.begin(), it_end = fn.end(); for (size_t si = 0; it != it_end; si++, ++it ) { FileNode fns = *it; Stage st; st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS; fns = fns[CUDA_CC_WEAK_CLASSIFIERS]; if (fns.empty()) return false; st.ntrees = (int)fns.size(); st.first = (int)cl_trees.size(); stages.push_back(st);// (int, int, float) cl_trees.reserve(stages[si].first + stages[si].ntrees); // weak trees FileNodeIterator it1 = fns.begin(), it1_end = fns.end(); for ( ; it1 != it1_end; ++it1 ) { FileNode fnw = *it1; FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES]; FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES]; if ( internalNodes.empty() || leafValues.empty() ) return false; int nodeCount = (int)internalNodes.size()/nodeStep; cl_trees.push_back(nodeCount); cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3); cl_leaves.reserve(cl_leaves.size() + leafValues.size()); if( subsetSize > 0 ) subsets.reserve(subsets.size() + nodeCount * subsetSize); // nodes FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end(); for( ; iIt != iEnd; ) { cl_nodes.push_back((int)*(iIt++)); cl_nodes.push_back((int)*(iIt++)); cl_nodes.push_back((int)*(iIt++)); if( subsetSize > 0 ) for( int j = 0; j < subsetSize; j++, ++iIt ) subsets.push_back((int)*iIt); } // leaves iIt = leafValues.begin(), iEnd = leafValues.end(); for( ; iIt != iEnd; ++iIt ) cl_leaves.push_back((float)*iIt); } } fn = root[CUDA_CC_FEATURES]; if( fn.empty() ) return false; std::vector features; features.reserve(fn.size() * 4); FileNodeIterator f_it = fn.begin(), f_end = fn.end(); for (; f_it != f_end; ++f_it) { FileNode rect = (*f_it)[CUDA_CC_RECT]; FileNodeIterator r_it = rect.begin(); features.push_back(saturate_cast((int)*(r_it++))); features.push_back(saturate_cast((int)*(r_it++))); features.push_back(saturate_cast((int)*(r_it++))); features.push_back(saturate_cast((int)*(r_it++))); } // copy data structures on gpu stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) )); trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); subsets_mat.upload(cv::Mat(subsets).reshape(1,1)); features_mat.upload(cv::Mat(features).reshape(4,1)); return true; } void LbpCascade_Impl::allocateBuffers(cv::Size frame) { if (frame == cv::Size()) return; if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) { resuzeBuffer.create(frame, CV_8UC1); integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); #ifdef HAVE_OPENCV_CUDALEGACY NcvSize32u roiSize; roiSize.width = frame.width; roiSize.height = frame.height; cudaDeviceProp prop; cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) ); Ncv32u bufSize; ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); integralBuffer.create(1, bufSize, CV_8UC1); #endif candidates.create(1 , frame.width >> 1, CV_32SC4); } } } // // create // Ptr cv::cuda::CascadeClassifier::create(const String& filename) { String fext = filename.substr(filename.find_last_of(".") + 1); fext = fext.toLowerCase(); if (fext == "nvbin") { #ifndef HAVE_OPENCV_CUDALEGACY CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); return Ptr(); #else return makePtr(filename); #endif } FileStorage fs(filename, FileStorage::READ); if (!fs.isOpened()) { #ifndef HAVE_OPENCV_CUDALEGACY CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); return Ptr(); #else return makePtr(filename); #endif } const char *CUDA_CC_LBP = "LBP"; String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"]; if (featureTypeStr == CUDA_CC_LBP) { return makePtr(fs); } else { #ifndef HAVE_OPENCV_CUDALEGACY CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); return Ptr(); #else return makePtr(filename); #endif } CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier"); return Ptr(); } Ptr cv::cuda::CascadeClassifier::create(const FileStorage& file) { return makePtr(file); } #endif