Implementations instrumentation framework for OpenCV performance system;

This commit is contained in:
Pavel Vlasov 2016-08-15 17:12:45 +03:00
parent cd943cc60d
commit a462d76a95
10 changed files with 790 additions and 32 deletions

View File

@ -286,6 +286,7 @@ OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too no
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF IF ANDROID )
OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )

View File

@ -212,3 +212,6 @@
/* FP16 */
#cmakedefine HAVE_FP16
/* Library was compiled with functions instrumentation */
#cmakedefine ENABLE_INSTRUMENTATION

View File

@ -344,7 +344,7 @@ static struct __IppInitializer__ __ipp_initializer__;
#ifdef CV_IPP_RUN_VERBOSE
#define CV_IPP_RUN_(condition, func, ...) \
{ \
if (cv::ipp::useIPP() && (condition) && func) \
if (cv::ipp::useIPP() && (condition) && (func)) \
{ \
printf("%s: IPP implementation is running\n", CV_Func); \
fflush(stdout); \
@ -376,7 +376,7 @@ static struct __IppInitializer__ __ipp_initializer__;
}
#else
#define CV_IPP_RUN_(condition, func, ...) \
if (cv::ipp::useIPP() && (condition) && func) \
if (cv::ipp::useIPP() && (condition) && (func)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return __VA_ARGS__; \
@ -393,7 +393,7 @@ static struct __IppInitializer__ __ipp_initializer__;
#define CV_IPP_RUN_FAST(func, ...)
#endif
#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)
#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
#ifndef IPPI_CALL
@ -449,6 +449,135 @@ CV_EXPORTS void setUseTegra(bool flag);
}
#endif
#ifdef ENABLE_INSTRUMENTATION
namespace cv
{
namespace instr
{
struct InstrTLSStruct
{
InstrTLSStruct()
{
pCurrentNode = NULL;
}
InstrNode* pCurrentNode;
};
class InstrStruct
{
public:
InstrStruct()
{
useInstr = false;
enableMapping = true;
rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
tlsStruct.get()->pCurrentNode = &rootNode;
}
Mutex mutexCreate;
Mutex mutexCount;
bool useInstr;
bool enableMapping;
InstrNode rootNode;
TLSData<InstrTLSStruct> tlsStruct;
};
class CV_EXPORTS IntrumentationRegion
{
public:
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
~IntrumentationRegion();
private:
bool m_disabled; // region status
uint64 m_regionTicks;
};
InstrStruct& getInstrumentStruct();
InstrTLSStruct& getInstrumentTLSStruct();
CV_EXPORTS InstrNode* getCurrentNode();
}
}
///// General instrumentation
// Instrument region
#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
// Instrument functions with non-void return type
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
{\
if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
try{\
auto status = ((FUN)(__VA_ARGS__));\
if(ERROR_COND){\
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
}\
return status;\
}catch(...){\
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
throw;\
}\
}else{\
return ((FUN)(__VA_ARGS__));\
}\
}())
// Instrument functions with void return type
#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
{\
if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
try{\
(FUN)(__VA_ARGS__);\
}catch(...){\
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
throw;\
}\
}else{\
(FUN)(__VA_ARGS__);\
}\
}())
// Instrumentation information marker
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(#NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
///// General instrumentation
// General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
// Parallel OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
///// IPP instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
// Diagnostic markers
#define CV_INSTRUMENT_MARK_IPP(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)
///// OpenCL instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
// Diagnostic markers
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
#else
#define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION_MT()
#define CV_INSTRUMENT_REGION_IPP()
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_IPP(NAME)
#define CV_INSTRUMENT_REGION_OPENCL()
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_OPENCL(NAME)
#endif
//! @endcond
#endif // __OPENCV_CORE_PRIVATE_HPP__

View File

@ -1002,6 +1002,137 @@ template<> inline std::string CommandLineParser::get<std::string>(const String&
//! @endcond
// Basic Node class for tree building
template<class OBJECT>
class CV_EXPORTS Node
{
public:
Node()
{
m_pParent = 0;
}
Node(OBJECT& payload) : m_payload(payload)
{
m_pParent = 0;
}
~Node()
{
removeChilds();
if (m_pParent)
{
int idx = m_pParent->findChild(this);
if (idx >= 0)
m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
}
}
Node<OBJECT>* findChild(OBJECT& payload) const
{
for(int i = 0; i < this->m_childs.size(); i++)
{
if(this->m_childs[i]->m_payload == payload)
return this->m_childs[i];
}
return NULL;
}
int findChild(Node<OBJECT> *pNode) const
{
for (int i = 0; i < this->m_childs.size(); i++)
{
if(this->m_childs[i] == pNode)
return i;
}
return -1;
}
void addChild(Node<OBJECT> *pNode)
{
if(!pNode)
return;
CV_Assert(pNode->m_pParent == 0);
pNode->m_pParent = this;
this->m_childs.push_back(pNode);
}
void removeChilds()
{
for(int i = 0; i < m_childs.size(); i++)
{
m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
delete m_childs[i];
}
m_childs.clear();
}
public:
OBJECT m_payload;
Node<OBJECT>* m_pParent;
std::vector<Node<OBJECT>*> m_childs;
};
// Instrumentation external interface
namespace instr
{
enum
{
TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
TYPE_MARKER, // Information marker
TYPE_WRAPPER, // Wrapper function for implementation
TYPE_FUN, // Simple function call
};
enum
{
IMPL_PLAIN = 0,
IMPL_IPP,
IMPL_OPENCL,
};
enum
{
FLAGS_MAPPING = 0x01,
};
enum
{
COMMAND_RESET = 0x01,
};
class CV_EXPORTS NodeData
{
public:
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
NodeData(NodeData &ref);
~NodeData();
NodeData& operator=(const NodeData&);
cv::String m_funName;
int m_instrType;
int m_implType;
cv::String m_fileName;
int m_lineNum;
bool m_funError;
volatile int m_counter;
bool m_stopPoint;
uint64 m_ticksMean;
};
bool operator==(const NodeData& lhs, const NodeData& rhs);
typedef Node<NodeData> InstrNode;
CV_EXPORTS bool useInstrumentation();
CV_EXPORTS void setUseInstrumentation(bool flag);
CV_EXPORTS InstrNode* getTrace();
CV_EXPORTS void resetTrace();
CV_EXPORTS void setFlags(int modeFlags);
CV_EXPORTS int getFlags();
};
} //namespace cv
#ifndef DISABLE_OPENCV_24_COMPATIBILITY

View File

@ -149,13 +149,25 @@ namespace
public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
{
body = &_body;
wholeRange = _r;
double len = wholeRange.end - wholeRange.start;
nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
#ifdef ENABLE_INSTRUMENTATION
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
#endif
}
void operator()(const cv::Range& sr) const
{
#ifdef ENABLE_INSTRUMENTATION
{
cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
}
#endif
cv::Range r;
r.start = (int)(wholeRange.start +
((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
@ -169,6 +181,9 @@ namespace
const cv::ParallelLoopBody* body;
cv::Range wholeRange;
int nstripes;
#ifdef ENABLE_INSTRUMENTATION
cv::instr::InstrNode *pThreadRoot;
#endif
};
#if defined HAVE_TBB
@ -252,6 +267,8 @@ static SchedPtr pplScheduler;
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
CV_INSTRUMENT_REGION()
#ifdef CV_PARALLEL_FRAMEWORK
if(numThreads != 0)

View File

@ -1522,6 +1522,8 @@ namespace cv {
static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
CV_INSTRUMENT_REGION_OPENCL()
bool haveMask = _mask.kind() != _InputArray::NONE;
int nz = haveMask ? -1 : (int)_src.total();
Scalar mean, stddev;
@ -1585,7 +1587,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
size_t globalsize = groups * wgs;
if (!k.run(1, &globalsize, &wgs, false))
if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
return false;
typedef Scalar (* part_sum)(Mat m);
@ -1641,6 +1644,8 @@ namespace cv
{
static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
int cn = src.channels();
size_t total_size = src.total();
@ -1679,29 +1684,29 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
if( !mask.empty() )
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC1 ippFuncC1 =
ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
0;
if( ippFuncC1 )
if( ippiMean_StdDev_C1MR )
{
if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC3 ippFuncC3 =
ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
0;
if( ippFuncC3 )
if( ippiMean_StdDev_C3CMR )
{
if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
@ -1710,31 +1715,31 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
else
{
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
ippiMeanStdDevFuncC1 ippFuncC1 =
ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
#if (IPP_VERSION_X100 >= 810)
type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
#endif
0;
if( ippFuncC1 )
if( ippiMean_StdDev_C1R )
{
if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
ippiMeanStdDevFuncC3 ippFuncC3 =
ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
0;
if( ippFuncC3 )
if( ippiMean_StdDev_C3CR )
{
if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
@ -1751,6 +1756,8 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
CV_INSTRUMENT_REGION()
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_meanStdDev(_src, _mean, _sdv, _mask))

View File

@ -1300,6 +1300,210 @@ void setUseCollection(bool flag)
}
#endif
namespace instr
{
bool useInstrumentation()
{
#ifdef ENABLE_INSTRUMENTATION
return getInstrumentStruct().useInstr;
#else
return false;
#endif
}
void setUseInstrumentation(bool flag)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().useInstr = flag;
#else
CV_UNUSED(flag);
#endif
}
InstrNode* getTrace()
{
#ifdef ENABLE_INSTRUMENTATION
return &getInstrumentStruct().rootNode;
#else
return NULL;
#endif
}
void resetTrace()
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().rootNode.removeChilds();
getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
#endif
}
void setFlags(int modeFlags)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
#else
CV_UNUSED(modeFlags);
#endif
}
int getFlags()
{
#ifdef ENABLE_INSTRUMENTATION
int flags = 0;
if(getInstrumentStruct().enableMapping)
flags |= FLAGS_MAPPING;
return flags;
#else
return 0;
#endif
}
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
{
m_instrType = TYPE_GENERAL;
m_implType = IMPL_PLAIN;
m_funError = false;
m_counter = 0;
m_stopPoint = false;
m_ticksMean = 0;
m_funName = funName;
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
}
NodeData::NodeData(NodeData &ref)
{
*this = ref;
}
NodeData& NodeData::operator=(const NodeData &right)
{
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_funError = right.m_funError;
this->m_counter = right.m_counter;
this->m_stopPoint = right.m_stopPoint;
this->m_ticksMean = right.m_ticksMean;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
return *this;
}
NodeData::~NodeData()
{
}
bool operator==(const NodeData& left, const NodeData& right)
{
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
return true;
return false;
}
#ifdef ENABLE_INSTRUMENTATION
InstrStruct& getInstrumentStruct()
{
static InstrStruct instr;
return instr;
}
InstrTLSStruct& getInstrumentTLSStruct()
{
return *getInstrumentStruct().tlsStruct.get();
}
InstrNode* getCurrentNode()
{
return getInstrumentTLSStruct().pCurrentNode;
}
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
{
m_disabled = false;
m_regionTicks = 0;
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
// Disable in case of failure
if(!pTLS->pCurrentNode)
{
m_disabled = true;
return;
}
m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
if(m_disabled)
return;
NodeData payload(funName, fileName, lineNum, instrType, implType);
Node<NodeData>* pChild = NULL;
if(pStruct->enableMapping)
{
// Critical section
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
pChild = new Node<NodeData>(payload);
pTLS->pCurrentNode->addChild(pChild);
}
}
else
{
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
pTLS->pCurrentNode->m_payload.m_stopPoint = true;
return;
}
}
pTLS->pCurrentNode = pChild;
m_regionTicks = getTickCount();
}
}
IntrumentationRegion::~IntrumentationRegion()
{
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
if(!m_disabled)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
if(pTLS->pCurrentNode->m_payload.m_stopPoint)
{
pTLS->pCurrentNode->m_payload.m_stopPoint = false;
}
else
{
if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
cv::ocl::finish();
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
else
pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
}
}
}
}
#endif
}
namespace ipp
{

View File

@ -97,13 +97,13 @@ using std::tr1::tuple;
// TODO Replace finish call to dstUMat.wait()
#define OCL_TEST_CYCLE() \
for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_N(n) \
for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)

View File

@ -354,6 +354,15 @@ typedef struct ImplData
} ImplData;
#endif
#ifdef ENABLE_INSTRUMENTATION
class InstumentData
{
public:
static ::cv::String treeToString();
static void printTree();
};
#endif
class CV_EXPORTS TestBase: public ::testing::Test
{
public:
@ -382,7 +391,7 @@ protected:
virtual void SetUp();
virtual void TearDown();
void startTimer();
bool startTimer(); // bool is dummy for conditional loop
void stopTimer();
bool next();
@ -406,6 +415,10 @@ protected:
#ifdef CV_COLLECT_IMPL_DATA
ImplData implConf;
#endif
#ifdef ENABLE_INSTRUMENTATION
InstumentData instrConf;
#endif
private:
typedef std::vector<std::pair<int, cv::Size> > SizeVector;
typedef std::vector<int64> TimeVector;
@ -645,9 +658,9 @@ int main(int argc, char **argv)\
CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
}
#define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer())
#define TEST_CYCLE() for(; startTimer(), next(); stopTimer())
#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r)
#define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer())
#define TEST_CYCLE() for(; next() && startTimer(); stopTimer())
#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r)
namespace perf
{

View File

@ -45,6 +45,9 @@ static bool param_verify_sanity;
#ifdef CV_COLLECT_IMPL_DATA
static bool param_collect_impl;
#endif
#ifdef ENABLE_INSTRUMENTATION
static bool param_instrument;
#endif
extern bool test_ipp_check;
#ifdef HAVE_CUDA
@ -727,7 +730,219 @@ public:
}
};
#ifdef ENABLE_INSTRUMENTATION
static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
{
// Print empty line for a big tree nodes
if(pNode->m_pParent)
{
int parendIdx = pNode->m_pParent->findChild(pNode);
if(parendIdx > 0 && pNode->m_pParent->m_childs[parendIdx-1]->m_childs.size())
{
printShift(pNode->m_pParent->m_childs[parendIdx-1]->m_childs[0], pRoot);
printf("\n");
}
}
// Check if parents have more childs
std::vector<cv::instr::InstrNode*> cache;
cv::instr::InstrNode *pTmpNode = pNode;
while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
{
cache.push_back(pTmpNode->m_pParent);
pTmpNode = pTmpNode->m_pParent;
}
for(int i = (int)cache.size()-1; i >= 0; i--)
{
if(cache[i]->m_pParent)
{
if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
printf(" ");
else
printf("| ");
}
}
}
static double calcLocalWeight(cv::instr::InstrNode *pNode)
{
if(pNode->m_pParent && pNode->m_pParent->m_pParent)
return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
else
return 100;
}
static double calcGlobalWeight(cv::instr::InstrNode *pNode)
{
cv::instr::InstrNode* globNode = pNode;
while(globNode->m_pParent && globNode->m_pParent->m_pParent)
globNode = globNode->m_pParent;
return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
}
static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
{
printf("%s", (pNode->m_payload.m_funName.substr(0, 40) + ((pNode->m_payload.m_funName.size()>40)?"...":"")).c_str());
// Write instrumentation flags
if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL || pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
{
printf("<");
if(pNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)
printf("W");
else if(pNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
printf("F");
else if(pNode->m_payload.m_instrType == cv::instr::TYPE_MARKER)
printf("MARK");
if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL && pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
printf("_");
if(pNode->m_payload.m_implType == cv::instr::IMPL_IPP)
printf("IPP");
else if(pNode->m_payload.m_implType == cv::instr::IMPL_OPENCL)
printf("OCL");
printf(">");
}
if(pNode->m_pParent)
{
printf(" - C:%d", pNode->m_payload.m_counter);
printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
if(pNode->m_pParent->m_pParent)
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
}
printf("\n");
// Group childes
std::vector<cv::String> groups;
{
bool bFound = false;
for(size_t i = 0; i < pNode->m_childs.size(); i++)
{
bFound = false;
for(size_t j = 0; j < groups.size(); j++)
{
if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
{
bFound = true;
break;
}
}
if(!bFound)
groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
}
}
for(size_t g = 0; g < groups.size(); g++)
{
for(size_t i = 0; i < pNode->m_childs.size(); i++)
{
if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
{
printShift(pNode->m_childs[i], pRoot);
if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
printf("\\---");
else
printf("|---");
printNodeRec(pNode->m_childs[i], pRoot);
}
}
}
}
template <typename T>
std::string to_string_with_precision(const T value, const int p = 3)
{
std::ostringstream out;
out << std::fixed << std::setprecision(p) << value;
return out.str();
}
static cv::String nodeToString(cv::instr::InstrNode *pNode)
{
cv::String string;
if (pNode->m_payload.m_funName == "ROOT")
string = pNode->m_payload.m_funName;
else
{
string = "#";
string += std::to_string(pNode->m_payload.m_instrType);
string += pNode->m_payload.m_funName;
string += " - L:";
string += to_string_with_precision(calcLocalWeight(pNode));
string += ", G:";
string += to_string_with_precision(calcGlobalWeight(pNode));
}
string += "(";
for(size_t i = 0; i < pNode->m_childs.size(); i++)
string += nodeToString(pNode->m_childs[i]);
string += ")";
return string;
}
static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
{
uint64 ticks = 0;
if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
ticks = pNode->m_payload.m_ticksMean;
for(size_t i = 0; i < pNode->m_childs.size(); i++)
ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
return ticks;
}
static uint64 getImplTime(int impl)
{
uint64 ticks = 0;
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
ticks = getNodeTimeRec(pRoot, cv::instr::TYPE_FUN, impl);
return ticks;
}
static uint64 getTotalTime()
{
uint64 ticks = 0;
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
for(size_t i = 0; i < pRoot->m_childs.size(); i++)
ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
return ticks;
}
::cv::String InstumentData::treeToString()
{
cv::String string = nodeToString(cv::instr::getTrace());
return string;
}
void InstumentData::printTree()
{
if(cv::instr::getTrace()->m_childs.size())
{
printf("[ TRACE ]\n");
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
#ifdef HAVE_IPP
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
#endif
#ifdef HAVE_OPENCL
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
#endif
printf("\n[/TRACE ]\n");
fflush(stdout);
}
}
#endif
/*****************************************************************************************\
* ::perf::TestBase
@ -774,6 +989,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
#endif
#ifdef CV_COLLECT_IMPL_DATA
"{ perf_collect_impl |false |collect info about executed implementations}"
#endif
#ifdef ENABLE_INSTRUMENTATION
"{ perf_instrument |false |instrument code to collect implementations trace}"
#endif
"{ help h |false |print help info}"
#ifdef HAVE_CUDA
@ -826,6 +1044,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
#ifdef CV_COLLECT_IMPL_DATA
param_collect_impl = args.get<bool>("perf_collect_impl");
#endif
#ifdef ENABLE_INSTRUMENTATION
param_instrument = args.get<bool>("perf_instrument");
#endif
#ifdef ANDROID
param_affinity_mask = args.get<int>("perf_affinity_mask");
log_power_checkpoints = args.has("perf_log_power_checkpoints");
@ -856,6 +1077,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
else
cv::setUseCollection(0);
#endif
#ifdef ENABLE_INSTRUMENTATION
if(param_instrument)
cv::instr::setUseInstrumentation(true);
else
cv::instr::setUseInstrumentation(false);
#endif
#ifdef HAVE_CUDA
@ -969,13 +1196,13 @@ int64 TestBase::_calibrate()
cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2));
declare.time(30);
double s = 0;
for(declare.iterations(20); startTimer(), next(); stopTimer())
for(declare.iterations(20); next() && startTimer(); stopTimer())
s+=a.dot(b);
declare.time(s);
//self calibration
SetUp();
for(declare.iterations(1000); startTimer(), next(); stopTimer()){}
for(declare.iterations(1000); next() && startTimer(); stopTimer()){}
}
};
@ -1242,8 +1469,6 @@ bool TestBase::next()
}
#endif
if (has_next)
startTimer(); // really we should measure activity from this moment, so reset start time
return has_next;
}
@ -1281,9 +1506,17 @@ unsigned int TestBase::getTotalOutputSize() const
return res;
}
void TestBase::startTimer()
bool TestBase::startTimer()
{
#ifdef ENABLE_INSTRUMENTATION
if(currentIter == 0)
{
cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_MAPPING); // enable mapping for the first run
cv::instr::resetTrace();
}
#endif
lastTime = cv::getTickCount();
return true; // dummy true for conditional loop
}
void TestBase::stopTimer()
@ -1297,6 +1530,10 @@ void TestBase::stopTimer()
if (lastTime < 0) lastTime = 0;
times.push_back(lastTime);
lastTime = 0;
#ifdef ENABLE_INSTRUMENTATION
cv::instr::setFlags(cv::instr::getFlags()&~cv::instr::FLAGS_MAPPING); // disable mapping to decrease overhead for +1 run
#endif
}
performance_metrics& TestBase::calcMetrics()
@ -1469,6 +1706,16 @@ void TestBase::reportMetrics(bool toJUnitXML)
RecordProperty("gstddev", cv::format("%.6f", m.gstddev).c_str());
RecordProperty("mean", cv::format("%.0f", m.mean).c_str());
RecordProperty("stddev", cv::format("%.0f", m.stddev).c_str());
#ifdef ENABLE_INSTRUMENTATION
if(cv::instr::useInstrumentation())
{
cv::String tree = InstumentData::treeToString();
RecordProperty("functions_hierarchy", tree.c_str());
RecordProperty("total_ipp_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())));
RecordProperty("total_opencl_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())));
cv::instr::resetTrace();
}
#endif
#ifdef CV_COLLECT_IMPL_DATA
if(param_collect_impl)
{
@ -1639,6 +1886,12 @@ void TestBase::TearDown()
fflush(stdout);
}
#endif
#ifdef ENABLE_INSTRUMENTATION
if(cv::instr::useInstrumentation())
InstumentData::printTree();
#endif
reportMetrics(true);
}