From 9f29506d2cf8954a2a0fb8411d87c2067ea75cff Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Fri, 6 May 2011 12:24:56 +0000 Subject: [PATCH] Refactored NEON optimization usage --- CMakeLists.txt | 6 +-- android/android-opencv/cmake_android.sh | 12 +++-- android/android-opencv/cmake_android_neon.sh | 11 ++-- .../core/include/opencv2/core/internal.hpp | 5 ++ modules/features2d/src/brief.cpp | 50 +++++++++---------- 5 files changed, 45 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2707e5924f..f6b80433ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -447,6 +447,7 @@ if(UNIX) endif() if(WITH_V4L) + CHECK_MODULE(libv4l1 HAVE_LIBV4L) CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L) CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2) @@ -889,6 +890,7 @@ if(MSVC) # 64-bit portability warnings, in MSVC8 if(MSVC80) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /Wp64") + endif() #if(MSVC90) # set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /D _BIND_TO_CURRENT_CRT_VERSION=1 /D _BIND_TO_CURRENT_VCLIBS_VERSION=1") @@ -1016,10 +1018,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -DNDEBUG") set(EXTRA_C_FLAGS_DEBUG "${EXTRA_C_FLAGS_DEBUG} -O0 -ggdb3 -DDEBUG -D_DEBUG") - if(ANDROID) - #force compiler to interpret char as signed char - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsigned-char") - endif() endif() # Extra link libs if the user selects building static libs: diff --git a/android/android-opencv/cmake_android.sh b/android/android-opencv/cmake_android.sh index 61ebe07930..3e90e33076 100644 --- a/android/android-opencv/cmake_android.sh +++ b/android/android-opencv/cmake_android.sh @@ -1,5 +1,9 @@ -opencv_dir=`pwd`/../build -mkdir build -cd build -cmake -DOpenCVDIR=$opencv_dir -DCMAKE_TOOLCHAIN_FILE=$ANDTOOLCHAIN .. +#!/bin/sh +cd `dirname $0` + +opencv_build_dir=`pwd`/../build +mkdir -p build +cd build + +cmake -DOpenCVDIR=$opencv_build_dir -DCMAKE_TOOLCHAIN_FILE=../../android.toolchain.cmake .. diff --git a/android/android-opencv/cmake_android_neon.sh b/android/android-opencv/cmake_android_neon.sh index fa95bd9af6..56c14923c6 100644 --- a/android/android-opencv/cmake_android_neon.sh +++ b/android/android-opencv/cmake_android_neon.sh @@ -1,5 +1,8 @@ -opencv_dir=`pwd`/../build_neon -mkdir build_neon -cd build_neon -cmake -DOpenCV_DIR=$opencv_dir -DARM_TARGETS="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=$ANDTOOLCHAIN .. +#!/bin/sh +cd `dirname $0` +opencv_build_dir=`pwd`/../build_neon +mkdir -p build_neon +cd build_neon + +cmake -DOpenCVDIR=$opencv_build_dir -DARM_TARGET="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../../android.toolchain.cmake .. diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 7b5b4c192b..5fabd24105 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -122,8 +122,13 @@ CV_INLINE IppiSize ippiSize(int width, int height) #if defined ANDROID && defined __ARM_NEON__ #include "arm_neon.h" #define CV_NEON 1 + +#define CPU_HAS_NEON_FEATURE (true) +//TODO: make real check using stuff from "cpu-features.h" +//((bool)android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) #else #define CV_NEON 0 +#define CPU_HAS_NEON_FEATURE (false) #endif #ifndef IPPI_CALL diff --git a/modules/features2d/src/brief.cpp b/modules/features2d/src/brief.cpp index 0bd4012149..bde3127590 100644 --- a/modules/features2d/src/brief.cpp +++ b/modules/features2d/src/brief.cpp @@ -44,11 +44,6 @@ #include #include -#if ANDROID && HAVE_NEON -#include -#include -#endif - #include #include @@ -115,9 +110,8 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c { #if __GNUC__ ResultType result = 0; -#if ANDROID && HAVE_NEON - static uint64_t features = android_getCpuFeatures(); - if ((features & ANDROID_CPU_ARM_FEATURE_NEON)) +#if CV_NEON + if (CPU_HAS_NEON_FEATURE) { for (size_t i = 0; i < size; i += 16) { @@ -126,7 +120,7 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c //uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t) uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); - uint8x16_t bitsSet += vcntq_u8 (AxorB); + uint8x16_t bitsSet = vcntq_u8 (AxorB); //uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t) uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); @@ -138,25 +132,27 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c } else #endif - //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) - typedef unsigned long long pop_t; - const size_t modulo = size % sizeof(pop_t); - const pop_t * a2 = reinterpret_cast (a); - const pop_t * b2 = reinterpret_cast (b); - const pop_t * a2_end = a2 + (size/sizeof(pop_t)); - - for (; a2 != a2_end; ++a2, ++b2) - result += __builtin_popcountll((*a2) ^ (*b2)); - - if (modulo) { - //in the case where size is not divisible by sizeof(size_t) - //need to mask off the bits at the end - pop_t a_final=0,b_final=0; - memcpy(&a_final,a2,modulo); - memcpy(&b_final,b2,modulo); - result += __builtin_popcountll(a_final ^ b_final); - } + //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) + typedef unsigned long long pop_t; + const size_t modulo = size % sizeof(pop_t); + const pop_t * a2 = reinterpret_cast (a); + const pop_t * b2 = reinterpret_cast (b); + const pop_t * a2_end = a2 + (size/sizeof(pop_t)); + + for (; a2 != a2_end; ++a2, ++b2) + result += __builtin_popcountll((*a2) ^ (*b2)); + + if (modulo) + { + //in the case where size is not divisible by sizeof(size_t) + //need to mask off the bits at the end + pop_t a_final=0,b_final=0; + memcpy(&a_final,a2,modulo); + memcpy(&b_final,b2,modulo); + result += __builtin_popcountll(a_final ^ b_final); + } + } return result; #else return HammingLUT()(a,b,size);