Fix android build warnings

This commit is contained in:
Andrey Kamaev 2012-09-04 17:44:23 +04:00
parent 8325a28d09
commit 07d92d9e5a
4 changed files with 445 additions and 445 deletions

View File

@ -81,46 +81,46 @@ Mat BOWMSCTrainer::cluster() const {
return cluster(mergedDescriptors);
}
Mat BOWMSCTrainer::cluster(const Mat& descriptors) const {
Mat BOWMSCTrainer::cluster(const Mat& _descriptors) const {
CV_Assert(!descriptors.empty());
CV_Assert(!_descriptors.empty());
// TODO: sort the descriptors before clustering.
Mat icovar = Mat::eye(descriptors.cols,descriptors.cols,descriptors.type());
Mat icovar = Mat::eye(_descriptors.cols,_descriptors.cols,_descriptors.type());
vector<Mat> initialCentres;
initialCentres.push_back(descriptors.row(0));
for (int i = 1; i < descriptors.rows; i++) {
initialCentres.push_back(_descriptors.row(0));
for (int i = 1; i < _descriptors.rows; i++) {
double minDist = DBL_MAX;
for (size_t j = 0; j < initialCentres.size(); j++) {
minDist = std::min(minDist,
cv::Mahalanobis(descriptors.row(i),initialCentres[j],
cv::Mahalanobis(_descriptors.row(i),initialCentres[j],
icovar));
}
if (minDist > clusterSize)
initialCentres.push_back(descriptors.row(i));
initialCentres.push_back(_descriptors.row(i));
}
std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());
for (int i = 0; i < descriptors.rows; i++) {
for (int i = 0; i < _descriptors.rows; i++) {
int index = 0; double dist = 0, minDist = DBL_MAX;
for (size_t j = 0; j < initialCentres.size(); j++) {
dist = cv::Mahalanobis(descriptors.row(i),initialCentres[j],icovar);
dist = cv::Mahalanobis(_descriptors.row(i),initialCentres[j],icovar);
if (dist < minDist) {
minDist = dist;
index = (int)j;
}
}
clusters[index].push_back(descriptors.row(i));
clusters[index].push_back(_descriptors.row(i));
}
// TODO: throw away small clusters.
Mat vocabulary;
Mat centre = Mat::zeros(1,descriptors.cols,descriptors.type());
Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
for (size_t i = 0; i < clusters.size(); i++) {
centre.setTo(0);
for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {

View File

@ -63,7 +63,7 @@ namespace of2 {
static double logsumexp(double a, double b) {
return a > b ? log(1 + exp(b - a)) + a : log(1 + exp(a - b)) + b;
}
FabMap::FabMap(const Mat& _clTree, double _PzGe,
double _PzGNe, int _flags, int _numSamples) :
clTree(_clTree), PzGe(_PzGe), PzGNe(_PzGNe), flags(
@ -445,16 +445,16 @@ FabMap1::~FabMap1() {
}
void FabMap1::getLikelihoods(const Mat& queryImgDescriptor,
const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
for (size_t i = 0; i < testImgDescriptors.size(); i++) {
for (size_t i = 0; i < testImageDescriptors.size(); i++) {
bool zq, zpq, Lzq;
double logP = 0;
for (int q = 0; q < clTree.cols; q++) {
zq = queryImgDescriptor.at<float>(0,q) > 0;
zpq = queryImgDescriptor.at<float>(0,pq(q)) > 0;
Lzq = testImgDescriptors[i].at<float>(0,q) > 0;
Lzq = testImageDescriptors[i].at<float>(0,q) > 0;
logP += log((this->*PzGL)(q, zq, zpq, Lzq));
@ -490,16 +490,16 @@ FabMapLUT::~FabMapLUT() {
}
void FabMapLUT::getLikelihoods(const Mat& queryImgDescriptor,
const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
double precFactor = (double)pow(10.0, -precision);
for (size_t i = 0; i < testImgDescriptors.size(); i++) {
for (size_t i = 0; i < testImageDescriptors.size(); i++) {
unsigned long long int logP = 0;
for (int q = 0; q < clTree.cols; q++) {
logP += table[q][(queryImgDescriptor.at<float>(0,pq(q)) > 0) +
((queryImgDescriptor.at<float>(0, q) > 0) << 1) +
((testImgDescriptors[i].at<float>(0,q) > 0) << 2)];
((testImageDescriptors[i].at<float>(0,q) > 0) << 2)];
}
matches.push_back(IMatch(0,(int)i,-precFactor*(double)logP,0));
}
@ -518,7 +518,7 @@ FabMapFBO::~FabMapFBO() {
}
void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
std::multiset<WordStats> wordData;
setWordStatistics(queryImgDescriptor, wordData);
@ -526,7 +526,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
vector<int> matchIndices;
vector<IMatch> queryMatches;
for (size_t i = 0; i < testImgDescriptors.size(); i++) {
for (size_t i = 0; i < testImageDescriptors.size(); i++) {
queryMatches.push_back(IMatch(0,(int)i,0,0));
matchIndices.push_back((int)i);
}
@ -543,7 +543,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
for (size_t i = 0; i < matchIndices.size(); i++) {
bool Lzq =
testImgDescriptors[matchIndices[i]].at<float>(0,wordIter->q) > 0;
testImageDescriptors[matchIndices[i]].at<float>(0,wordIter->q) > 0;
queryMatches[matchIndices[i]].likelihood +=
log((this->*PzGL)(wordIter->q,zq,zpq,Lzq));
currBest =
@ -689,17 +689,17 @@ void FabMap2::add(const vector<Mat>& queryImgDescriptors) {
}
void FabMap2::getLikelihoods(const Mat& queryImgDescriptor,
const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
if (&testImgDescriptors== &(this->testImgDescriptors)) {
if (&testImageDescriptors == &testImgDescriptors) {
getIndexLikelihoods(queryImgDescriptor, testDefaults, testInvertedMap,
matches);
} else {
CV_Assert(!(flags & MOTION_MODEL));
vector<double> defaults;
std::map<int, vector<int> > invertedMap;
for (size_t i = 0; i < testImgDescriptors.size(); i++) {
addToIndex(testImgDescriptors[i],defaults,invertedMap);
for (size_t i = 0; i < testImageDescriptors.size(); i++) {
addToIndex(testImageDescriptors[i],defaults,invertedMap);
}
getIndexLikelihoods(queryImgDescriptor, defaults, invertedMap, matches);
}

View File

@ -47,18 +47,18 @@
#if CV_SSE2 || CV_SSE3
# if !CV_SSE4_1 && !CV_SSE4_2
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# endif
#endif
# if CV_AVX
# define CV_HAAR_USE_AVX 1
# else
# if CV_SSE2 || CV_SSE3
# define CV_HAAR_USE_SSE 1
# endif
# endif
# if CV_AVX
# define CV_HAAR_USE_AVX 1
# else
# if CV_SSE2 || CV_SSE3
# define CV_HAAR_USE_SSE 1
# endif
# endif
/* these settings affect the quality of detection: change with care */
#define CV_ADJUST_FEATURES 1
@ -634,86 +634,86 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
//AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
#ifdef CV_HAAR_USE_AVX
#ifdef CV_HAAR_USE_AVX
CV_INLINE
double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier,
double variance_norm_factor, size_t p_offset )
double variance_norm_factor, size_t p_offset )
{
int CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0};
char flags[8] = {0,0,0,0,0,0,0,0};
CvHidHaarTreeNode* nodes[8];
double res = 0;
char exitConditionFlag = 0;
for(;;)
{
float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
nodes[0] = classifier ->node + idxV[0];
nodes[1] = (classifier+1)->node + idxV[1];
nodes[2] = (classifier+2)->node + idxV[2];
nodes[3] = (classifier+3)->node + idxV[3];
nodes[4] = (classifier+4)->node + idxV[4];
nodes[5] = (classifier+5)->node + idxV[5];
nodes[6] = (classifier+6)->node + idxV[6];
nodes[7] = (classifier+7)->node + idxV[7];
int CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0};
char flags[8] = {0,0,0,0,0,0,0,0};
CvHidHaarTreeNode* nodes[8];
double res = 0;
char exitConditionFlag = 0;
for(;;)
{
float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
nodes[0] = classifier ->node + idxV[0];
nodes[1] = (classifier+1)->node + idxV[1];
nodes[2] = (classifier+2)->node + idxV[2];
nodes[3] = (classifier+3)->node + idxV[3];
nodes[4] = (classifier+4)->node + idxV[4];
nodes[5] = (classifier+5)->node + idxV[5];
nodes[6] = (classifier+6)->node + idxV[6];
nodes[7] = (classifier+7)->node + idxV[7];
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
if( nodes[0]->feature.rect[2].p0 )
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
if( nodes[1]->feature.rect[2].p0 )
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
if( nodes[0]->feature.rect[2].p0 )
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
if( nodes[1]->feature.rect[2].p0 )
tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
if( nodes[2]->feature.rect[2].p0 )
if( nodes[2]->feature.rect[2].p0 )
tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
if( nodes[3]->feature.rect[2].p0 )
if( nodes[3]->feature.rect[2].p0 )
tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
if( nodes[4]->feature.rect[2].p0 )
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
if( nodes[5]->feature.rect[2].p0 )
if( nodes[4]->feature.rect[2].p0 )
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
if( nodes[5]->feature.rect[2].p0 )
tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
if( nodes[6]->feature.rect[2].p0 )
if( nodes[6]->feature.rect[2].p0 )
tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
if( nodes[7]->feature.rect[2].p0 )
if( nodes[7]->feature.rect[2].p0 )
tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
sum = _mm256_add_ps(sum,_mm256_load_ps(tmp));
__m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left);
__m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right);
sum = _mm256_add_ps(sum,_mm256_load_ps(tmp));
_mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ ))));
__m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left);
__m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right);
for(int i = 0; i < 8; i++)
{
if(idxV[i]<=0)
{
if(!flags[i])
{
exitConditionFlag++;
flags[i]=1;
res+=((classifier+i)->alpha[-idxV[i]]);
}
idxV[i]=0;
}
}
if(exitConditionFlag==8)
return res;
}
_mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ ))));
for(int i = 0; i < 8; i++)
{
if(idxV[i]<=0)
{
if(!flags[i])
{
exitConditionFlag++;
flags[i]=1;
res+=((classifier+i)->alpha[-idxV[i]]);
}
idxV[i]=0;
}
}
if(exitConditionFlag==8)
return res;
}
}
#endif
@ -723,50 +723,50 @@ double icvEvalHidHaarClassifier( CvHidHaarClassifier* classifier,
size_t p_offset )
{
int idx = 0;
/*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX
if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
{
double CV_DECL_ALIGNED(16) temp[2];
__m128d zero = _mm_setzero_pd();
do
{
CvHidHaarTreeNode* node = classifier->node + idx;
__m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
__m128d left = _mm_set1_pd(node->left);
__m128d right = _mm_set1_pd(node->right);
/*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX
if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
{
double CV_DECL_ALIGNED(16) temp[2];
__m128d zero = _mm_setzero_pd();
do
{
CvHidHaarTreeNode* node = classifier->node + idx;
__m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
__m128d left = _mm_set1_pd(node->left);
__m128d right = _mm_set1_pd(node->right);
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
__m128d sum = _mm_set1_pd(_sum);
t = _mm_cmplt_sd(sum, t);
sum = _mm_blendv_pd(right, left, t);
__m128d sum = _mm_set1_pd(_sum);
t = _mm_cmplt_sd(sum, t);
sum = _mm_blendv_pd(right, left, t);
_mm_store_pd(temp, sum);
idx = (int)temp[0];
}
while(idx > 0 );
}
else
#endif*/
_mm_store_pd(temp, sum);
idx = (int)temp[0];
}
while(idx > 0 );
}
else
#endif*/
{
do
{
do
{
CvHidHaarTreeNode* node = classifier->node + idx;
double t = node->threshold * variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
idx = sum < t ? node->left : node->right;
}
while( idx > 0 );
idx = sum < t ? node->left : node->right;
}
while( idx > 0 );
}
return classifier->alpha[-idx];
}
@ -777,18 +777,18 @@ static int
cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
CvPoint pt, double& stage_sum, int start_stage )
{
#ifdef CV_HAAR_USE_AVX
bool haveAVX = false;
if(cv::checkHardwareSupport(CV_CPU_AVX))
if(_xgetbv(_XCR_XFEATURE_ENABLED_MASK)&0x6)// Check if the OS will save the YMM registers
{
haveAVX = true;
}
#else
#ifdef CV_HAAR_USE_SSE
bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
#endif
#endif
#ifdef CV_HAAR_USE_AVX
bool haveAVX = false;
if(cv::checkHardwareSupport(CV_CPU_AVX))
if(_xgetbv(_XCR_XFEATURE_ENABLED_MASK)&0x6)// Check if the OS will save the YMM registers
{
haveAVX = true;
}
#else
#ifdef CV_HAAR_USE_SSE
bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
#endif
#endif
int p_offset, pq_offset;
int i, j;
@ -828,17 +828,17 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
{
stage_sum = 0.0;
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
{
stage_sum += icvEvalHidHaarClassifierAVX(
cascade->stage_classifier[i].classifier+j,
variance_norm_factor, p_offset );
}
}
#endif
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
{
stage_sum += icvEvalHidHaarClassifierAVX(
cascade->stage_classifier[i].classifier+j,
variance_norm_factor, p_offset );
}
}
#endif
for( j = 0; j < ptr->count; j++ )
{
stage_sum += icvEvalHidHaarClassifier( ptr->classifier + j, variance_norm_factor, p_offset );
@ -859,283 +859,283 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
}
else if( cascade->isStumpBased )
{
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
CvHidHaarClassifier* classifiers[8];
CvHidHaarTreeNode* nodes[8];
for( i = start_stage; i < cascade->count; i++ )
{
stage_sum = 0.0;
int j = 0;
float CV_DECL_ALIGNED(32) buf[8];
if( cascade->stage_classifier[i].two_rects )
{
for( ; j <= cascade->stage_classifier[i].count-8; j+=8 )
{
//__m256 stage_sumPart = _mm256_setzero_ps();
classifiers[0] = cascade->stage_classifier[i].classifier + j;
nodes[0] = classifiers[0]->node;
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
nodes[1] = classifiers[1]->node;
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
nodes[2]= classifiers[2]->node;
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
nodes[3] = classifiers[3]->node;
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
nodes[4] = classifiers[4]->node;
classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
nodes[5] = classifiers[5]->node;
classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
nodes[6] = classifiers[6]->node;
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
nodes[7] = classifiers[7]->node;
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
CvHidHaarClassifier* classifiers[8];
CvHidHaarTreeNode* nodes[8];
for( i = start_stage; i < cascade->count; i++ )
{
stage_sum = 0.0;
int j = 0;
float CV_DECL_ALIGNED(32) buf[8];
if( cascade->stage_classifier[i].two_rects )
{
for( ; j <= cascade->stage_classifier[i].count-8; j+=8 )
{
//__m256 stage_sumPart = _mm256_setzero_ps();
classifiers[0] = cascade->stage_classifier[i].classifier + j;
nodes[0] = classifiers[0]->node;
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
nodes[1] = classifiers[1]->node;
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
nodes[2]= classifiers[2]->node;
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
nodes[3] = classifiers[3]->node;
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
nodes[4] = classifiers[4]->node;
classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
nodes[5] = classifiers[5]->node;
classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
nodes[6] = classifiers[6]->node;
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
nodes[7] = classifiers[7]->node;
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
_mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )));
stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
for( ; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 )
{
float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
_mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )));
stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
classifiers[0] = cascade->stage_classifier[i].classifier + j;
nodes[0] = classifiers[0]->node;
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
nodes[1] = classifiers[1]->node;
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
nodes[2]= classifiers[2]->node;
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
nodes[3] = classifiers[3]->node;
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
nodes[4] = classifiers[4]->node;
classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
nodes[5] = classifiers[5]->node;
classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
nodes[6] = classifiers[6]->node;
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
nodes[7] = classifiers[7]->node;
}
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
for( ; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 )
{
float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
if( nodes[0]->feature.rect[2].p0 )
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
if( nodes[1]->feature.rect[2].p0 )
tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
if( nodes[2]->feature.rect[2].p0 )
tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
if( nodes[3]->feature.rect[2].p0 )
tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
if( nodes[4]->feature.rect[2].p0 )
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
if( nodes[5]->feature.rect[2].p0 )
tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
if( nodes[6]->feature.rect[2].p0 )
tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
if( nodes[7]->feature.rect[2].p0 )
tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp));
classifiers[0] = cascade->stage_classifier[i].classifier + j;
nodes[0] = classifiers[0]->node;
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
nodes[1] = classifiers[1]->node;
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
nodes[2]= classifiers[2]->node;
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
nodes[3] = classifiers[3]->node;
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
nodes[4] = classifiers[4]->node;
classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
nodes[5] = classifiers[5]->node;
classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
nodes[6] = classifiers[6]->node;
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
nodes[7] = classifiers[7]->node;
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
__m256 t = _mm256_set1_ps(variance_norm_factor);
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
__m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ));
outBuf = _mm256_hadd_ps(outBuf, outBuf);
outBuf = _mm256_hadd_ps(outBuf, outBuf);
_mm256_store_ps(buf, outBuf);
stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
for( ; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
__m256 sum = _mm256_mul_ps(offset, weight);
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
else
#endif
#ifdef CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
if(haveSSE2)
{
for( i = start_stage; i < cascade->count; i++ )
{
__m128d stage_sum = _mm_setzero_pd();
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
calc_sum(nodes[0]->feature.rect[1],p_offset));
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
__m128d a = _mm_set_sd(classifier->alpha[0]);
__m128d b = _mm_set_sd(classifier->alpha[1]);
__m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight +
calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight);
t = _mm_cmpgt_sd(t, sum);
stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
__m128d a = _mm_set_sd(classifier->alpha[0]);
__m128d b = _mm_set_sd(classifier->alpha[1]);
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
__m128d sum = _mm_set_sd(_sum);
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
t = _mm_cmpgt_sd(t, sum);
stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
}
}
__m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold);
if( _mm_comilt_sd(stage_sum, i_threshold) )
return -i;
}
}
else
#endif
{
for( i = start_stage; i < cascade->count; i++ )
{
stage_sum = 0.0;
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
}
else
if( nodes[0]->feature.rect[2].p0 )
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
if( nodes[1]->feature.rect[2].p0 )
tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
if( nodes[2]->feature.rect[2].p0 )
tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
if( nodes[3]->feature.rect[2].p0 )
tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
if( nodes[4]->feature.rect[2].p0 )
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
if( nodes[5]->feature.rect[2].p0 )
tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
if( nodes[6]->feature.rect[2].p0 )
tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
if( nodes[7]->feature.rect[2].p0 )
tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp));
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
__m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ));
outBuf = _mm256_hadd_ps(outBuf, outBuf);
outBuf = _mm256_hadd_ps(outBuf, outBuf);
_mm256_store_ps(buf, outBuf);
stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
for( ; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
else
#endif
#if defined CV_HAAR_USE_SSE && CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
if(haveSSE2)
{
for( i = start_stage; i < cascade->count; i++ )
{
__m128d stage_sum = _mm_setzero_pd();
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
__m128d a = _mm_set_sd(classifier->alpha[0]);
__m128d b = _mm_set_sd(classifier->alpha[1]);
__m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight +
calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight);
t = _mm_cmpgt_sd(t, sum);
stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
// ayasin - NHM perf optim. Avoid use of costly flaky jcc
__m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
__m128d a = _mm_set_sd(classifier->alpha[0]);
__m128d b = _mm_set_sd(classifier->alpha[1]);
double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
_sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
_sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
__m128d sum = _mm_set_sd(_sum);
t = _mm_cmpgt_sd(t, sum);
stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
}
}
__m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold);
if( _mm_comilt_sd(stage_sum, i_threshold) )
return -i;
}
}
else
#endif
{
for( i = start_stage; i < cascade->count; i++ )
{
stage_sum = 0.0;
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
CvHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
}
else
{
for( i = start_stage; i < cascade->count; i++ )
{
stage_sum = 0.0;
int j = 0;
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
{
stage_sum += icvEvalHidHaarClassifierAVX(
cascade->stage_classifier[i].classifier+j,
variance_norm_factor, p_offset );
}
}
#endif
for(; j < cascade->stage_classifier[i].count; j++ )
{
stage_sum += icvEvalHidHaarClassifier(
cascade->stage_classifier[i].classifier + j,
variance_norm_factor, p_offset );
}
int k = 0;
#ifdef CV_HAAR_USE_AVX
if(haveAVX)
{
for( ; k < cascade->stage_classifier[i].count-8; k+=8 )
{
stage_sum += icvEvalHidHaarClassifierAVX(
cascade->stage_classifier[i].classifier+k,
variance_norm_factor, p_offset );
}
}
#endif
for(; k < cascade->stage_classifier[i].count; k++ )
{
stage_sum += icvEvalHidHaarClassifier(
cascade->stage_classifier[i].classifier + k,
variance_norm_factor, p_offset );
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
//_mm256_zeroupper();
//_mm256_zeroupper();
return 1;
}

View File

@ -1,13 +1,13 @@
/*
* pca.cpp
*
* Author:
* Author:
* Kevin Hughes <kevinhughes27[at]gmail[dot]com>
*
* Special Thanks to:
* Philipp Wagner <bytefish[at]gmx[dot]de>
*
* This program demonstrates how to use OpenCV PCA with a
* This program demonstrates how to use OpenCV PCA with a
* specified amount of variance to retain. The effect
* is illustrated further by using a trackbar to
* change the value for retained varaince.
@ -17,9 +17,9 @@
* on this list of images. The author recommends using
* the first 15 faces of the AT&T face data set:
* http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
*
*
* so for example your input text file would look like this:
*
*
* <path_to_at&t_faces>/orl_faces/s1/1.pgm
* <path_to_at&t_faces>/orl_faces/s2/1.pgm
* <path_to_at&t_faces>/orl_faces/s3/1.pgm
@ -50,7 +50,7 @@ using namespace std;
///////////////////////
// Functions
void read_imgList(const string& filename, vector<Mat>& images) {
static void read_imgList(const string& filename, vector<Mat>& images) {
std::ifstream file(filename.c_str(), ifstream::in);
if (!file) {
string error_message = "No valid input file was given, please check the given filename.";
@ -62,19 +62,19 @@ void read_imgList(const string& filename, vector<Mat>& images) {
}
}
Mat formatImagesForPCA(const vector<Mat> &data)
static Mat formatImagesForPCA(const vector<Mat> &data)
{
Mat dst(data.size(), data[0].rows*data[0].cols, CV_32F);
for(unsigned int i = 0; i < data.size(); i++)
{
Mat image_row = data[i].clone().reshape(1,1);
Mat row_i = dst.row(i);
image_row.convertTo(row_i,CV_32F);
image_row.convertTo(row_i,CV_32F);
}
return dst;
}
Mat toGrayscale(InputArray _src) {
static Mat toGrayscale(InputArray _src) {
Mat src = _src.getMat();
// only allow one channel
if(src.channels() != 1) {
@ -95,22 +95,22 @@ struct params
string winName;
};
void onTrackbar(int pos, void* ptr)
{
static void onTrackbar(int pos, void* ptr)
{
cout << "Retained Variance = " << pos << "% ";
cout << "re-calculating PCA..." << std::flush;
double var = pos / 100.0;
struct params *p = (struct params *)ptr;
p->pca = PCA(p->data, cv::Mat(), CV_PCA_DATA_AS_ROW, var);
Mat point = p->pca.project(p->data.row(0));
Mat reconstruction = p->pca.backProject(point);
reconstruction = reconstruction.reshape(p->ch, p->rows);
reconstruction = toGrayscale(reconstruction);
imshow(p->winName, reconstruction);
cout << "done! # of principal components: " << p->pca.eigenvectors.rows << endl;
}
@ -118,19 +118,19 @@ void onTrackbar(int pos, void* ptr)
///////////////////////
// Main
int main(int argc, char** argv)
int main(int argc, char** argv)
{
if (argc != 2) {
cout << "usage: " << argv[0] << " <image_list.txt>" << endl;
exit(1);
}
// Get the path to your CSV.
string imgList = string(argv[1]);
// vector to hold the images
vector<Mat> images;
// Read in the data. This can fail if not valid
try {
read_imgList(imgList, images);
@ -138,29 +138,29 @@ int main(int argc, char** argv)
cerr << "Error opening file \"" << imgList << "\". Reason: " << e.msg << endl;
exit(1);
}
// Quit if there are not enough images for this demo.
if(images.size() <= 1) {
string error_message = "This demo needs at least 2 images to work. Please add more images to your data set!";
CV_Error(CV_StsError, error_message);
}
// Reshape and stack images into a rowMatrix
Mat data = formatImagesForPCA(images);
// perform PCA
PCA pca(data, cv::Mat(), CV_PCA_DATA_AS_ROW, 0.95); // trackbar is initially set here, also this is a common value for retainedVariance
// Demonstration of the effect of retainedVariance on the first image
// Demonstration of the effect of retainedVariance on the first image
Mat point = pca.project(data.row(0)); // project into the eigenspace, thus the image becomes a "point"
Mat reconstruction = pca.backProject(point); // re-create the image from the "point"
reconstruction = reconstruction.reshape(images[0].channels(), images[0].rows); // reshape from a row vector into image shape
reconstruction = toGrayscale(reconstruction); // re-scale for displaying purposes
// init highgui window
string winName = "Reconstruction | press 'q' to quit";
namedWindow(winName, CV_WINDOW_NORMAL);
// params struct to pass to the trackbar handler
params p;
p.data = data;
@ -168,17 +168,17 @@ int main(int argc, char** argv)
p.rows = images[0].rows;
p.pca = pca;
p.winName = winName;
// create the tracbar
int pos = 95;
createTrackbar("Retained Variance (%)", winName, &pos, 100, onTrackbar, (void*)&p);
createTrackbar("Retained Variance (%)", winName, &pos, 100, onTrackbar, (void*)&p);
// display until user presses q
imshow(winName, reconstruction);
char key = 0;
while(key != 'q')
key = waitKey();
return 0;
return 0;
}