From 10d18a315c99ece9fee02666afdebe42da0943a9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 25 Aug 2016 17:26:46 +0300 Subject: [PATCH] instrumentation: minor refactoring, wrap OpenCL - calculate ticksTotal instead of ticksMean - local / global width is based on ticksTotal value - added instrumentation for OpenCL program compilation - added instrumentation for OpenCL kernel execution --- cmake/OpenCVGenABI.cmake | 2 +- modules/core/include/opencv2/core/private.hpp | 14 ++++-- modules/core/include/opencv2/core/utility.hpp | 49 ++++++++++++------- modules/core/src/ocl.cpp | 10 ++++ modules/core/src/system.cpp | 46 ++++++++--------- modules/ts/src/ts_perf.cpp | 19 ++++--- 6 files changed, 86 insertions(+), 54 deletions(-) diff --git a/cmake/OpenCVGenABI.cmake b/cmake/OpenCVGenABI.cmake index 35cc10d8ed..86aa78b17b 100644 --- a/cmake/OpenCVGenABI.cmake +++ b/cmake/OpenCVGenABI.cmake @@ -43,7 +43,7 @@ string(REPLACE ";" "\n " OPENCV_ABI_SKIP_HEADERS "${OPENCV_ABI_SKIP_HEADERS}" string(REPLACE ";" "\n " OPENCV_ABI_SKIP_LIBRARIES "${OPENCV_ABI_SKIP_LIBRARIES}") # Options -set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") +set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} -DOPENCV_ABI_CHECK=1") string(REGEX REPLACE "([^ ]) +([^ ])" "\\1\\n \\2" OPENCV_ABI_GCC_OPTIONS "${OPENCV_ABI_GCC_OPTIONS}") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/opencv_abi.xml.in" "${path1}") diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index c839327823..44e1a44bc1 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -476,7 +476,7 @@ public: class CV_EXPORTS IntrumentationRegion { public: - IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN); + IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN); ~IntrumentationRegion(); private: @@ -492,7 +492,7 @@ CV_EXPORTS InstrNode* getCurrentNode(); ///// General instrumentation // Instrument region -#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL); +#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL); // Instrument functions with non-void return type #define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\ {\ @@ -535,13 +535,13 @@ CV_EXPORTS InstrNode* getCurrentNode(); ///// General instrumentation // General OpenCV region instrumentation macro -#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) +#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) // Parallel OpenCV region instrumentation macro #define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) ///// IPP instrumentation // Wrapper region instrumentation macro -#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) +#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) // Function instrumentation macro #define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__) // Diagnostic markers @@ -549,12 +549,15 @@ CV_EXPORTS InstrNode* getCurrentNode(); ///// OpenCL instrumentation // Wrapper region instrumentation macro -#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) +#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) +#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) // Function instrumentation macro #define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__) // Diagnostic markers #define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME) #else +#define CV_INSTRUMENT_REGION_META(...) + #define CV_INSTRUMENT_REGION() #define CV_INSTRUMENT_REGION_MT() @@ -563,6 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode(); #define CV_INSTRUMENT_MARK_IPP(NAME) #define CV_INSTRUMENT_REGION_OPENCL() +#define CV_INSTRUMENT_REGION_OPENCL_(...) #define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__)) #define CV_INSTRUMENT_MARK_OPENCL(NAME) #endif diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 3bf6975643..e465faee5f 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -1076,7 +1076,10 @@ public: // Instrumentation external interface namespace instr { -enum + +#if !defined OPENCV_ABI_CHECK + +enum TYPE { TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function TYPE_MARKER, // Information marker @@ -1084,48 +1087,60 @@ enum TYPE_FUN, // Simple function call }; -enum +enum IMPL { IMPL_PLAIN = 0, IMPL_IPP, IMPL_OPENCL, }; -enum -{ - FLAGS_MAPPING = 0x01, -}; - class CV_EXPORTS NodeData { public: - NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN); + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); NodeData(NodeData &ref); ~NodeData(); NodeData& operator=(const NodeData&); cv::String m_funName; - int m_instrType; - int m_implType; - cv::String m_fileName; + cv::instr::TYPE m_instrType; + cv::instr::IMPL m_implType; + const char* m_fileName; int m_lineNum; - bool m_funError; volatile int m_counter; - bool m_stopPoint; - uint64 m_ticksMean; + volatile uint64 m_ticksTotal; + // No synchronization + double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); } + // No synchronization + double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); } + + bool m_funError; + bool m_stopPoint; }; bool operator==(const NodeData& lhs, const NodeData& rhs); typedef Node InstrNode; +CV_EXPORTS InstrNode* getTrace(); + +#endif // !defined OPENCV_ABI_CHECK + + CV_EXPORTS bool useInstrumentation(); CV_EXPORTS void setUseInstrumentation(bool flag); -CV_EXPORTS InstrNode* getTrace(); CV_EXPORTS void resetTrace(); -CV_EXPORTS void setFlags(int modeFlags); -CV_EXPORTS int getFlags(); + +enum FLAGS +{ + FLAGS_NONE = 0, + FLAGS_MAPPING = 1 << 0, +}; + +CV_EXPORTS void setFlags(FLAGS modeFlags); +static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } +CV_EXPORTS FLAGS getFlags(); } } //namespace cv diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 6780d85e57..924b360851 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -45,6 +45,7 @@ #include #include #include // std::cerr +#include #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0 #define CV_OPENCL_SHOW_RUN_ERRORS 0 @@ -3169,6 +3170,9 @@ struct Kernel::Impl { cl_program ph = (cl_program)prog.ptr(); cl_int retval = 0; +#ifdef ENABLE_INSTRUMENTATION + name = kname; +#endif handle = ph != 0 ? clCreateKernel(ph, kname, &retval) : 0; CV_OclDbgAssert(retval == CL_SUCCESS); @@ -3221,6 +3225,9 @@ struct Kernel::Impl IMPLEMENT_REFCOUNTABLE(); +#ifdef ENABLE_INSTRUMENTATION + cv::String name; +#endif cl_kernel handle; cl_event e; enum { MAX_ARRS = 16 }; @@ -3441,6 +3448,8 @@ int Kernel::set(int i, const KernelArg& arg) bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], bool sync, const Queue& q) { + CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL); + if(!p || !p->handle || p->e != 0) return false; @@ -3552,6 +3561,7 @@ struct Program::Impl Impl(const ProgramSource& _src, const String& _buildflags, String& errmsg) { + CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str()); refcount = 1; const Context& ctx = Context::getDefault(); src = _src; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index ffca514f8e..bd44fe3339 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1337,7 +1337,7 @@ void resetTrace() #endif } -void setFlags(int modeFlags) +void setFlags(FLAGS modeFlags) { #ifdef ENABLE_INSTRUMENTATION getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING); @@ -1345,33 +1345,34 @@ void setFlags(int modeFlags) CV_UNUSED(modeFlags); #endif } -int getFlags() +FLAGS getFlags() { #ifdef ENABLE_INSTRUMENTATION int flags = 0; if(getInstrumentStruct().enableMapping) flags |= FLAGS_MAPPING; - return flags; + return (FLAGS)flags; #else - return 0; + return (FLAGS)0; #endif } -NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType) +NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType) { m_instrType = TYPE_GENERAL; m_implType = IMPL_PLAIN; - m_funError = false; - m_counter = 0; - m_stopPoint = false; - m_ticksMean = 0; - m_funName = funName; m_instrType = instrType; m_implType = implType; m_fileName = fileName; m_lineNum = lineNum; + + m_counter = 0; + m_ticksTotal = 0; + + m_funError = false; + m_stopPoint = false; } NodeData::NodeData(NodeData &ref) { @@ -1382,13 +1383,12 @@ NodeData& NodeData::operator=(const NodeData &right) this->m_funName = right.m_funName; this->m_instrType = right.m_instrType; this->m_implType = right.m_implType; - this->m_funError = right.m_funError; - this->m_counter = right.m_counter; - this->m_stopPoint = right.m_stopPoint; - this->m_ticksMean = right.m_ticksMean; this->m_fileName = right.m_fileName; this->m_lineNum = right.m_lineNum; - + this->m_counter = right.m_counter; + this->m_ticksTotal = right.m_ticksTotal; + this->m_funError = right.m_funError; + this->m_stopPoint = right.m_stopPoint; return *this; } NodeData::~NodeData() @@ -1418,7 +1418,7 @@ InstrNode* getCurrentNode() return getInstrumentTLSStruct().pCurrentNode; } -IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType) +IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType) { m_disabled = false; m_regionTicks = 0; @@ -1482,18 +1482,18 @@ IntrumentationRegion::~IntrumentationRegion() } else { - if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL && - pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN) - cv::ocl::finish(); + if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL && + (pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN || + pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)) + { + cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation + } uint64 ticks = (getTickCount() - m_regionTicks); { cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation pTLS->pCurrentNode->m_payload.m_counter++; - if(pTLS->pCurrentNode->m_payload.m_counter <= 1) - pTLS->pCurrentNode->m_payload.m_ticksMean = ticks; - else - pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter; + pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks; } pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent; diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 50e23548fe..8866272667 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -767,7 +767,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot) static double calcLocalWeight(cv::instr::InstrNode *pNode) { if(pNode->m_pParent && pNode->m_pParent->m_pParent) - return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean); + return ((double)pNode->m_payload.m_ticksTotal*100/pNode->m_pParent->m_payload.m_ticksTotal); else return 100; } @@ -779,7 +779,7 @@ static double calcGlobalWeight(cv::instr::InstrNode *pNode) while(globNode->m_pParent && globNode->m_pParent->m_pParent) globNode = globNode->m_pParent; - return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean); + return ((double)pNode->m_payload.m_ticksTotal*100/(double)globNode->m_payload.m_ticksTotal); } static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot) @@ -811,7 +811,7 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo if(pNode->m_pParent) { printf(" - C:%d", pNode->m_payload.m_counter); - printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000); + printf(" T:%.4fms", pNode->m_payload.getMeanMs()); if(pNode->m_pParent->m_pParent) printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode)); } @@ -886,12 +886,15 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode) return string; } -static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl) +static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, cv::instr::TYPE type, cv::instr::IMPL impl) { uint64 ticks = 0; - if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl) - ticks = pNode->m_payload.m_ticksMean; + if (pNode->m_pParent && (type < 0 || pNode->m_payload.m_instrType == type) && pNode->m_payload.m_implType == impl) + { + ticks = pNode->m_payload.m_ticksTotal; + return ticks; + } for(size_t i = 0; i < pNode->m_childs.size(); i++) ticks += getNodeTimeRec(pNode->m_childs[i], type, impl); @@ -899,7 +902,7 @@ static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl) return ticks; } -static uint64 getImplTime(int impl) +static uint64 getImplTime(cv::instr::IMPL impl) { uint64 ticks = 0; cv::instr::InstrNode *pRoot = cv::instr::getTrace(); @@ -915,7 +918,7 @@ static uint64 getTotalTime() cv::instr::InstrNode *pRoot = cv::instr::getTrace(); for(size_t i = 0; i < pRoot->m_childs.size(); i++) - ticks += pRoot->m_childs[i]->m_payload.m_ticksMean; + ticks += pRoot->m_childs[i]->m_payload.m_ticksTotal; return ticks; }