mirror of
https://github.com/opencv/opencv.git
synced 2024-12-04 00:39:11 +08:00
Implementations instrumentation framework for OpenCV performance system;
This commit is contained in:
parent
cd943cc60d
commit
a462d76a95
@ -286,6 +286,7 @@ OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too no
|
||||
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
|
||||
OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF IF ANDROID )
|
||||
OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
|
||||
OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
|
||||
OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
|
||||
|
||||
OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
|
||||
|
@ -212,3 +212,6 @@
|
||||
|
||||
/* FP16 */
|
||||
#cmakedefine HAVE_FP16
|
||||
|
||||
/* Library was compiled with functions instrumentation */
|
||||
#cmakedefine ENABLE_INSTRUMENTATION
|
||||
|
@ -344,7 +344,7 @@ static struct __IppInitializer__ __ipp_initializer__;
|
||||
#ifdef CV_IPP_RUN_VERBOSE
|
||||
#define CV_IPP_RUN_(condition, func, ...) \
|
||||
{ \
|
||||
if (cv::ipp::useIPP() && (condition) && func) \
|
||||
if (cv::ipp::useIPP() && (condition) && (func)) \
|
||||
{ \
|
||||
printf("%s: IPP implementation is running\n", CV_Func); \
|
||||
fflush(stdout); \
|
||||
@ -376,7 +376,7 @@ static struct __IppInitializer__ __ipp_initializer__;
|
||||
}
|
||||
#else
|
||||
#define CV_IPP_RUN_(condition, func, ...) \
|
||||
if (cv::ipp::useIPP() && (condition) && func) \
|
||||
if (cv::ipp::useIPP() && (condition) && (func)) \
|
||||
{ \
|
||||
CV_IMPL_ADD(CV_IMPL_IPP); \
|
||||
return __VA_ARGS__; \
|
||||
@ -393,7 +393,7 @@ static struct __IppInitializer__ __ipp_initializer__;
|
||||
#define CV_IPP_RUN_FAST(func, ...)
|
||||
#endif
|
||||
|
||||
#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)
|
||||
#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
|
||||
|
||||
|
||||
#ifndef IPPI_CALL
|
||||
@ -449,6 +449,135 @@ CV_EXPORTS void setUseTegra(bool flag);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
namespace cv
|
||||
{
|
||||
namespace instr
|
||||
{
|
||||
struct InstrTLSStruct
|
||||
{
|
||||
InstrTLSStruct()
|
||||
{
|
||||
pCurrentNode = NULL;
|
||||
}
|
||||
InstrNode* pCurrentNode;
|
||||
};
|
||||
|
||||
class InstrStruct
|
||||
{
|
||||
public:
|
||||
InstrStruct()
|
||||
{
|
||||
useInstr = false;
|
||||
enableMapping = true;
|
||||
|
||||
rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
|
||||
tlsStruct.get()->pCurrentNode = &rootNode;
|
||||
}
|
||||
|
||||
Mutex mutexCreate;
|
||||
Mutex mutexCount;
|
||||
|
||||
bool useInstr;
|
||||
bool enableMapping;
|
||||
InstrNode rootNode;
|
||||
TLSData<InstrTLSStruct> tlsStruct;
|
||||
};
|
||||
|
||||
class CV_EXPORTS IntrumentationRegion
|
||||
{
|
||||
public:
|
||||
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
|
||||
~IntrumentationRegion();
|
||||
|
||||
private:
|
||||
bool m_disabled; // region status
|
||||
uint64 m_regionTicks;
|
||||
};
|
||||
|
||||
InstrStruct& getInstrumentStruct();
|
||||
InstrTLSStruct& getInstrumentTLSStruct();
|
||||
CV_EXPORTS InstrNode* getCurrentNode();
|
||||
}
|
||||
}
|
||||
|
||||
///// General instrumentation
|
||||
// Instrument region
|
||||
#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
|
||||
// Instrument functions with non-void return type
|
||||
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
|
||||
{\
|
||||
if(::cv::instr::useInstrumentation()){\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
|
||||
try{\
|
||||
auto status = ((FUN)(__VA_ARGS__));\
|
||||
if(ERROR_COND){\
|
||||
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
|
||||
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
|
||||
}\
|
||||
return status;\
|
||||
}catch(...){\
|
||||
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
|
||||
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
|
||||
throw;\
|
||||
}\
|
||||
}else{\
|
||||
return ((FUN)(__VA_ARGS__));\
|
||||
}\
|
||||
}())
|
||||
// Instrument functions with void return type
|
||||
#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
|
||||
{\
|
||||
if(::cv::instr::useInstrumentation()){\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
|
||||
try{\
|
||||
(FUN)(__VA_ARGS__);\
|
||||
}catch(...){\
|
||||
::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
|
||||
CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
|
||||
throw;\
|
||||
}\
|
||||
}else{\
|
||||
(FUN)(__VA_ARGS__);\
|
||||
}\
|
||||
}())
|
||||
// Instrumentation information marker
|
||||
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(#NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
|
||||
|
||||
///// General instrumentation
|
||||
// General OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
|
||||
// Parallel OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
|
||||
|
||||
///// IPP instrumentation
|
||||
// Wrapper region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
|
||||
// Function instrumentation macro
|
||||
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
|
||||
// Diagnostic markers
|
||||
#define CV_INSTRUMENT_MARK_IPP(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)
|
||||
|
||||
///// OpenCL instrumentation
|
||||
// Wrapper region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
|
||||
// Function instrumentation macro
|
||||
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
|
||||
// Diagnostic markers
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
|
||||
#else
|
||||
#define CV_INSTRUMENT_REGION()
|
||||
#define CV_INSTRUMENT_REGION_MT()
|
||||
|
||||
#define CV_INSTRUMENT_REGION_IPP()
|
||||
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
|
||||
#define CV_INSTRUMENT_MARK_IPP(NAME)
|
||||
|
||||
#define CV_INSTRUMENT_REGION_OPENCL()
|
||||
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(NAME)
|
||||
#endif
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_PRIVATE_HPP__
|
||||
|
@ -1002,6 +1002,137 @@ template<> inline std::string CommandLineParser::get<std::string>(const String&
|
||||
|
||||
//! @endcond
|
||||
|
||||
|
||||
// Basic Node class for tree building
|
||||
template<class OBJECT>
|
||||
class CV_EXPORTS Node
|
||||
{
|
||||
public:
|
||||
Node()
|
||||
{
|
||||
m_pParent = 0;
|
||||
}
|
||||
Node(OBJECT& payload) : m_payload(payload)
|
||||
{
|
||||
m_pParent = 0;
|
||||
}
|
||||
~Node()
|
||||
{
|
||||
removeChilds();
|
||||
if (m_pParent)
|
||||
{
|
||||
int idx = m_pParent->findChild(this);
|
||||
if (idx >= 0)
|
||||
m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
|
||||
}
|
||||
}
|
||||
|
||||
Node<OBJECT>* findChild(OBJECT& payload) const
|
||||
{
|
||||
for(int i = 0; i < this->m_childs.size(); i++)
|
||||
{
|
||||
if(this->m_childs[i]->m_payload == payload)
|
||||
return this->m_childs[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int findChild(Node<OBJECT> *pNode) const
|
||||
{
|
||||
for (int i = 0; i < this->m_childs.size(); i++)
|
||||
{
|
||||
if(this->m_childs[i] == pNode)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void addChild(Node<OBJECT> *pNode)
|
||||
{
|
||||
if(!pNode)
|
||||
return;
|
||||
|
||||
CV_Assert(pNode->m_pParent == 0);
|
||||
pNode->m_pParent = this;
|
||||
this->m_childs.push_back(pNode);
|
||||
}
|
||||
|
||||
void removeChilds()
|
||||
{
|
||||
for(int i = 0; i < m_childs.size(); i++)
|
||||
{
|
||||
m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
|
||||
delete m_childs[i];
|
||||
}
|
||||
m_childs.clear();
|
||||
}
|
||||
|
||||
public:
|
||||
OBJECT m_payload;
|
||||
Node<OBJECT>* m_pParent;
|
||||
std::vector<Node<OBJECT>*> m_childs;
|
||||
};
|
||||
|
||||
// Instrumentation external interface
|
||||
namespace instr
|
||||
{
|
||||
enum
|
||||
{
|
||||
TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
|
||||
TYPE_MARKER, // Information marker
|
||||
TYPE_WRAPPER, // Wrapper function for implementation
|
||||
TYPE_FUN, // Simple function call
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
IMPL_PLAIN = 0,
|
||||
IMPL_IPP,
|
||||
IMPL_OPENCL,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
FLAGS_MAPPING = 0x01,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
COMMAND_RESET = 0x01,
|
||||
};
|
||||
|
||||
class CV_EXPORTS NodeData
|
||||
{
|
||||
public:
|
||||
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
|
||||
NodeData(NodeData &ref);
|
||||
~NodeData();
|
||||
NodeData& operator=(const NodeData&);
|
||||
|
||||
cv::String m_funName;
|
||||
int m_instrType;
|
||||
int m_implType;
|
||||
cv::String m_fileName;
|
||||
int m_lineNum;
|
||||
|
||||
bool m_funError;
|
||||
volatile int m_counter;
|
||||
bool m_stopPoint;
|
||||
uint64 m_ticksMean;
|
||||
|
||||
};
|
||||
bool operator==(const NodeData& lhs, const NodeData& rhs);
|
||||
|
||||
typedef Node<NodeData> InstrNode;
|
||||
|
||||
CV_EXPORTS bool useInstrumentation();
|
||||
CV_EXPORTS void setUseInstrumentation(bool flag);
|
||||
CV_EXPORTS InstrNode* getTrace();
|
||||
CV_EXPORTS void resetTrace();
|
||||
CV_EXPORTS void setFlags(int modeFlags);
|
||||
CV_EXPORTS int getFlags();
|
||||
};
|
||||
|
||||
} //namespace cv
|
||||
|
||||
#ifndef DISABLE_OPENCV_24_COMPATIBILITY
|
||||
|
@ -149,13 +149,25 @@ namespace
|
||||
public:
|
||||
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
|
||||
{
|
||||
|
||||
body = &_body;
|
||||
wholeRange = _r;
|
||||
double len = wholeRange.end - wholeRange.start;
|
||||
nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
|
||||
#endif
|
||||
}
|
||||
void operator()(const cv::Range& sr) const
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
{
|
||||
cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
|
||||
pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
|
||||
}
|
||||
#endif
|
||||
|
||||
cv::Range r;
|
||||
r.start = (int)(wholeRange.start +
|
||||
((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
||||
@ -169,6 +181,9 @@ namespace
|
||||
const cv::ParallelLoopBody* body;
|
||||
cv::Range wholeRange;
|
||||
int nstripes;
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
cv::instr::InstrNode *pThreadRoot;
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined HAVE_TBB
|
||||
@ -252,6 +267,8 @@ static SchedPtr pplScheduler;
|
||||
|
||||
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
#ifdef CV_PARALLEL_FRAMEWORK
|
||||
|
||||
if(numThreads != 0)
|
||||
|
@ -1522,6 +1522,8 @@ namespace cv {
|
||||
|
||||
static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
|
||||
{
|
||||
CV_INSTRUMENT_REGION_OPENCL()
|
||||
|
||||
bool haveMask = _mask.kind() != _InputArray::NONE;
|
||||
int nz = haveMask ? -1 : (int)_src.total();
|
||||
Scalar mean, stddev;
|
||||
@ -1585,7 +1587,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
|
||||
k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
|
||||
|
||||
size_t globalsize = groups * wgs;
|
||||
if (!k.run(1, &globalsize, &wgs, false))
|
||||
|
||||
if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
|
||||
return false;
|
||||
|
||||
typedef Scalar (* part_sum)(Mat m);
|
||||
@ -1641,6 +1644,8 @@ namespace cv
|
||||
{
|
||||
static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 >= 700
|
||||
int cn = src.channels();
|
||||
size_t total_size = src.total();
|
||||
@ -1679,29 +1684,29 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
|
||||
if( !mask.empty() )
|
||||
{
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
|
||||
ippiMaskMeanStdDevFuncC1 ippFuncC1 =
|
||||
ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
|
||||
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
|
||||
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
|
||||
type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
|
||||
0;
|
||||
if( ippFuncC1 )
|
||||
if( ippiMean_StdDev_C1MR )
|
||||
{
|
||||
if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
|
||||
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
|
||||
ippiMaskMeanStdDevFuncC3 ippFuncC3 =
|
||||
ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
|
||||
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
|
||||
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
|
||||
type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
|
||||
0;
|
||||
if( ippFuncC3 )
|
||||
if( ippiMean_StdDev_C3CMR )
|
||||
{
|
||||
if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
|
||||
ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
|
||||
ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
|
||||
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
|
||||
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
|
||||
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@ -1710,31 +1715,31 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
|
||||
else
|
||||
{
|
||||
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
|
||||
ippiMeanStdDevFuncC1 ippFuncC1 =
|
||||
ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
|
||||
type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
|
||||
type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
|
||||
#if (IPP_VERSION_X100 >= 810)
|
||||
type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
|
||||
#endif
|
||||
0;
|
||||
if( ippFuncC1 )
|
||||
if( ippiMean_StdDev_C1R )
|
||||
{
|
||||
if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
|
||||
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
|
||||
ippiMeanStdDevFuncC3 ippFuncC3 =
|
||||
ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
|
||||
type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
|
||||
type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
|
||||
type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
|
||||
0;
|
||||
if( ippFuncC3 )
|
||||
if( ippiMean_StdDev_C3CR )
|
||||
{
|
||||
if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
|
||||
ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
|
||||
ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
|
||||
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
|
||||
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
|
||||
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@ -1751,6 +1756,8 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
|
||||
|
||||
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||
ocl_meanStdDev(_src, _mean, _sdv, _mask))
|
||||
|
||||
|
@ -1300,6 +1300,210 @@ void setUseCollection(bool flag)
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace instr
|
||||
{
|
||||
bool useInstrumentation()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
return getInstrumentStruct().useInstr;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setUseInstrumentation(bool flag)
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
getInstrumentStruct().useInstr = flag;
|
||||
#else
|
||||
CV_UNUSED(flag);
|
||||
#endif
|
||||
}
|
||||
|
||||
InstrNode* getTrace()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
return &getInstrumentStruct().rootNode;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void resetTrace()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
getInstrumentStruct().rootNode.removeChilds();
|
||||
getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setFlags(int modeFlags)
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
|
||||
#else
|
||||
CV_UNUSED(modeFlags);
|
||||
#endif
|
||||
}
|
||||
int getFlags()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
int flags = 0;
|
||||
if(getInstrumentStruct().enableMapping)
|
||||
flags |= FLAGS_MAPPING;
|
||||
return flags;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
|
||||
{
|
||||
m_instrType = TYPE_GENERAL;
|
||||
m_implType = IMPL_PLAIN;
|
||||
|
||||
m_funError = false;
|
||||
m_counter = 0;
|
||||
m_stopPoint = false;
|
||||
m_ticksMean = 0;
|
||||
|
||||
m_funName = funName;
|
||||
m_instrType = instrType;
|
||||
m_implType = implType;
|
||||
m_fileName = fileName;
|
||||
m_lineNum = lineNum;
|
||||
}
|
||||
NodeData::NodeData(NodeData &ref)
|
||||
{
|
||||
*this = ref;
|
||||
}
|
||||
NodeData& NodeData::operator=(const NodeData &right)
|
||||
{
|
||||
this->m_funName = right.m_funName;
|
||||
this->m_instrType = right.m_instrType;
|
||||
this->m_implType = right.m_implType;
|
||||
this->m_funError = right.m_funError;
|
||||
this->m_counter = right.m_counter;
|
||||
this->m_stopPoint = right.m_stopPoint;
|
||||
this->m_ticksMean = right.m_ticksMean;
|
||||
this->m_fileName = right.m_fileName;
|
||||
this->m_lineNum = right.m_lineNum;
|
||||
|
||||
return *this;
|
||||
}
|
||||
NodeData::~NodeData()
|
||||
{
|
||||
}
|
||||
bool operator==(const NodeData& left, const NodeData& right)
|
||||
{
|
||||
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
InstrStruct& getInstrumentStruct()
|
||||
{
|
||||
static InstrStruct instr;
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstrTLSStruct& getInstrumentTLSStruct()
|
||||
{
|
||||
return *getInstrumentStruct().tlsStruct.get();
|
||||
}
|
||||
|
||||
InstrNode* getCurrentNode()
|
||||
{
|
||||
return getInstrumentTLSStruct().pCurrentNode;
|
||||
}
|
||||
|
||||
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
|
||||
{
|
||||
m_disabled = false;
|
||||
m_regionTicks = 0;
|
||||
|
||||
InstrStruct *pStruct = &getInstrumentStruct();
|
||||
if(pStruct->useInstr)
|
||||
{
|
||||
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
|
||||
|
||||
// Disable in case of failure
|
||||
if(!pTLS->pCurrentNode)
|
||||
{
|
||||
m_disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
|
||||
if(m_disabled)
|
||||
return;
|
||||
|
||||
NodeData payload(funName, fileName, lineNum, instrType, implType);
|
||||
Node<NodeData>* pChild = NULL;
|
||||
|
||||
if(pStruct->enableMapping)
|
||||
{
|
||||
// Critical section
|
||||
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
|
||||
pChild = pTLS->pCurrentNode->findChild(payload);
|
||||
if(!pChild)
|
||||
{
|
||||
pChild = new Node<NodeData>(payload);
|
||||
pTLS->pCurrentNode->addChild(pChild);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pChild = pTLS->pCurrentNode->findChild(payload);
|
||||
if(!pChild)
|
||||
{
|
||||
pTLS->pCurrentNode->m_payload.m_stopPoint = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
pTLS->pCurrentNode = pChild;
|
||||
|
||||
m_regionTicks = getTickCount();
|
||||
}
|
||||
}
|
||||
|
||||
IntrumentationRegion::~IntrumentationRegion()
|
||||
{
|
||||
InstrStruct *pStruct = &getInstrumentStruct();
|
||||
if(pStruct->useInstr)
|
||||
{
|
||||
if(!m_disabled)
|
||||
{
|
||||
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
|
||||
if(pTLS->pCurrentNode->m_payload.m_stopPoint)
|
||||
{
|
||||
pTLS->pCurrentNode->m_payload.m_stopPoint = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
|
||||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
|
||||
cv::ocl::finish();
|
||||
|
||||
uint64 ticks = (getTickCount() - m_regionTicks);
|
||||
{
|
||||
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
|
||||
pTLS->pCurrentNode->m_payload.m_counter++;
|
||||
if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
|
||||
pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
|
||||
else
|
||||
pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
|
||||
}
|
||||
|
||||
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace ipp
|
||||
{
|
||||
|
||||
|
@ -97,13 +97,13 @@ using std::tr1::tuple;
|
||||
|
||||
// TODO Replace finish call to dstUMat.wait()
|
||||
#define OCL_TEST_CYCLE() \
|
||||
for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
|
||||
for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
|
||||
|
||||
#define OCL_TEST_CYCLE_N(n) \
|
||||
for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
|
||||
for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
|
||||
|
||||
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
|
||||
for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
|
||||
for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
|
||||
for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)
|
||||
|
||||
|
||||
|
@ -354,6 +354,15 @@ typedef struct ImplData
|
||||
} ImplData;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
class InstumentData
|
||||
{
|
||||
public:
|
||||
static ::cv::String treeToString();
|
||||
static void printTree();
|
||||
};
|
||||
#endif
|
||||
|
||||
class CV_EXPORTS TestBase: public ::testing::Test
|
||||
{
|
||||
public:
|
||||
@ -382,7 +391,7 @@ protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
void startTimer();
|
||||
bool startTimer(); // bool is dummy for conditional loop
|
||||
void stopTimer();
|
||||
bool next();
|
||||
|
||||
@ -406,6 +415,10 @@ protected:
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
ImplData implConf;
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
InstumentData instrConf;
|
||||
#endif
|
||||
|
||||
private:
|
||||
typedef std::vector<std::pair<int, cv::Size> > SizeVector;
|
||||
typedef std::vector<int64> TimeVector;
|
||||
@ -645,9 +658,9 @@ int main(int argc, char **argv)\
|
||||
CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
|
||||
}
|
||||
|
||||
#define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer())
|
||||
#define TEST_CYCLE() for(; startTimer(), next(); stopTimer())
|
||||
#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r)
|
||||
#define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer())
|
||||
#define TEST_CYCLE() for(; next() && startTimer(); stopTimer())
|
||||
#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r)
|
||||
|
||||
namespace perf
|
||||
{
|
||||
|
@ -45,6 +45,9 @@ static bool param_verify_sanity;
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
static bool param_collect_impl;
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
static bool param_instrument;
|
||||
#endif
|
||||
extern bool test_ipp_check;
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
@ -727,7 +730,219 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
|
||||
{
|
||||
// Print empty line for a big tree nodes
|
||||
if(pNode->m_pParent)
|
||||
{
|
||||
int parendIdx = pNode->m_pParent->findChild(pNode);
|
||||
if(parendIdx > 0 && pNode->m_pParent->m_childs[parendIdx-1]->m_childs.size())
|
||||
{
|
||||
printShift(pNode->m_pParent->m_childs[parendIdx-1]->m_childs[0], pRoot);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Check if parents have more childs
|
||||
std::vector<cv::instr::InstrNode*> cache;
|
||||
cv::instr::InstrNode *pTmpNode = pNode;
|
||||
while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
|
||||
{
|
||||
cache.push_back(pTmpNode->m_pParent);
|
||||
pTmpNode = pTmpNode->m_pParent;
|
||||
}
|
||||
for(int i = (int)cache.size()-1; i >= 0; i--)
|
||||
{
|
||||
if(cache[i]->m_pParent)
|
||||
{
|
||||
if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
|
||||
printf(" ");
|
||||
else
|
||||
printf("| ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static double calcLocalWeight(cv::instr::InstrNode *pNode)
|
||||
{
|
||||
if(pNode->m_pParent && pNode->m_pParent->m_pParent)
|
||||
return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
|
||||
else
|
||||
return 100;
|
||||
}
|
||||
|
||||
static double calcGlobalWeight(cv::instr::InstrNode *pNode)
|
||||
{
|
||||
cv::instr::InstrNode* globNode = pNode;
|
||||
|
||||
while(globNode->m_pParent && globNode->m_pParent->m_pParent)
|
||||
globNode = globNode->m_pParent;
|
||||
|
||||
return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
|
||||
}
|
||||
|
||||
static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
|
||||
{
|
||||
printf("%s", (pNode->m_payload.m_funName.substr(0, 40) + ((pNode->m_payload.m_funName.size()>40)?"...":"")).c_str());
|
||||
|
||||
// Write instrumentation flags
|
||||
if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL || pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
|
||||
{
|
||||
printf("<");
|
||||
if(pNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)
|
||||
printf("W");
|
||||
else if(pNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
|
||||
printf("F");
|
||||
else if(pNode->m_payload.m_instrType == cv::instr::TYPE_MARKER)
|
||||
printf("MARK");
|
||||
|
||||
if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL && pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
|
||||
printf("_");
|
||||
|
||||
if(pNode->m_payload.m_implType == cv::instr::IMPL_IPP)
|
||||
printf("IPP");
|
||||
else if(pNode->m_payload.m_implType == cv::instr::IMPL_OPENCL)
|
||||
printf("OCL");
|
||||
|
||||
printf(">");
|
||||
}
|
||||
|
||||
if(pNode->m_pParent)
|
||||
{
|
||||
printf(" - C:%d", pNode->m_payload.m_counter);
|
||||
printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
|
||||
if(pNode->m_pParent->m_pParent)
|
||||
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Group childes
|
||||
std::vector<cv::String> groups;
|
||||
{
|
||||
bool bFound = false;
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
{
|
||||
bFound = false;
|
||||
for(size_t j = 0; j < groups.size(); j++)
|
||||
{
|
||||
if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
|
||||
{
|
||||
bFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!bFound)
|
||||
groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t g = 0; g < groups.size(); g++)
|
||||
{
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
{
|
||||
if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
|
||||
{
|
||||
printShift(pNode->m_childs[i], pRoot);
|
||||
|
||||
if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
|
||||
printf("\\---");
|
||||
else
|
||||
printf("|---");
|
||||
printNodeRec(pNode->m_childs[i], pRoot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string to_string_with_precision(const T value, const int p = 3)
|
||||
{
|
||||
std::ostringstream out;
|
||||
out << std::fixed << std::setprecision(p) << value;
|
||||
return out.str();
|
||||
}
|
||||
|
||||
static cv::String nodeToString(cv::instr::InstrNode *pNode)
|
||||
{
|
||||
cv::String string;
|
||||
if (pNode->m_payload.m_funName == "ROOT")
|
||||
string = pNode->m_payload.m_funName;
|
||||
else
|
||||
{
|
||||
string = "#";
|
||||
string += std::to_string(pNode->m_payload.m_instrType);
|
||||
string += pNode->m_payload.m_funName;
|
||||
string += " - L:";
|
||||
string += to_string_with_precision(calcLocalWeight(pNode));
|
||||
string += ", G:";
|
||||
string += to_string_with_precision(calcGlobalWeight(pNode));
|
||||
}
|
||||
string += "(";
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
string += nodeToString(pNode->m_childs[i]);
|
||||
string += ")";
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
|
||||
{
|
||||
uint64 ticks = 0;
|
||||
|
||||
if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
|
||||
ticks = pNode->m_payload.m_ticksMean;
|
||||
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
|
||||
|
||||
return ticks;
|
||||
}
|
||||
|
||||
static uint64 getImplTime(int impl)
|
||||
{
|
||||
uint64 ticks = 0;
|
||||
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
|
||||
|
||||
ticks = getNodeTimeRec(pRoot, cv::instr::TYPE_FUN, impl);
|
||||
|
||||
return ticks;
|
||||
}
|
||||
|
||||
static uint64 getTotalTime()
|
||||
{
|
||||
uint64 ticks = 0;
|
||||
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
|
||||
|
||||
for(size_t i = 0; i < pRoot->m_childs.size(); i++)
|
||||
ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
|
||||
|
||||
return ticks;
|
||||
}
|
||||
|
||||
::cv::String InstumentData::treeToString()
|
||||
{
|
||||
cv::String string = nodeToString(cv::instr::getTrace());
|
||||
return string;
|
||||
}
|
||||
|
||||
void InstumentData::printTree()
|
||||
{
|
||||
if(cv::instr::getTrace()->m_childs.size())
|
||||
{
|
||||
printf("[ TRACE ]\n");
|
||||
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
|
||||
#ifdef HAVE_IPP
|
||||
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
|
||||
#endif
|
||||
#ifdef HAVE_OPENCL
|
||||
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
|
||||
#endif
|
||||
printf("\n[/TRACE ]\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*****************************************************************************************\
|
||||
* ::perf::TestBase
|
||||
@ -774,6 +989,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
#endif
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
"{ perf_collect_impl |false |collect info about executed implementations}"
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
"{ perf_instrument |false |instrument code to collect implementations trace}"
|
||||
#endif
|
||||
"{ help h |false |print help info}"
|
||||
#ifdef HAVE_CUDA
|
||||
@ -826,6 +1044,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
param_collect_impl = args.get<bool>("perf_collect_impl");
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
param_instrument = args.get<bool>("perf_instrument");
|
||||
#endif
|
||||
#ifdef ANDROID
|
||||
param_affinity_mask = args.get<int>("perf_affinity_mask");
|
||||
log_power_checkpoints = args.has("perf_log_power_checkpoints");
|
||||
@ -856,6 +1077,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
else
|
||||
cv::setUseCollection(0);
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(param_instrument)
|
||||
cv::instr::setUseInstrumentation(true);
|
||||
else
|
||||
cv::instr::setUseInstrumentation(false);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
@ -969,13 +1196,13 @@ int64 TestBase::_calibrate()
|
||||
cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2));
|
||||
declare.time(30);
|
||||
double s = 0;
|
||||
for(declare.iterations(20); startTimer(), next(); stopTimer())
|
||||
for(declare.iterations(20); next() && startTimer(); stopTimer())
|
||||
s+=a.dot(b);
|
||||
declare.time(s);
|
||||
|
||||
//self calibration
|
||||
SetUp();
|
||||
for(declare.iterations(1000); startTimer(), next(); stopTimer()){}
|
||||
for(declare.iterations(1000); next() && startTimer(); stopTimer()){}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1242,8 +1469,6 @@ bool TestBase::next()
|
||||
}
|
||||
#endif
|
||||
|
||||
if (has_next)
|
||||
startTimer(); // really we should measure activity from this moment, so reset start time
|
||||
return has_next;
|
||||
}
|
||||
|
||||
@ -1281,9 +1506,17 @@ unsigned int TestBase::getTotalOutputSize() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void TestBase::startTimer()
|
||||
bool TestBase::startTimer()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(currentIter == 0)
|
||||
{
|
||||
cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_MAPPING); // enable mapping for the first run
|
||||
cv::instr::resetTrace();
|
||||
}
|
||||
#endif
|
||||
lastTime = cv::getTickCount();
|
||||
return true; // dummy true for conditional loop
|
||||
}
|
||||
|
||||
void TestBase::stopTimer()
|
||||
@ -1297,6 +1530,10 @@ void TestBase::stopTimer()
|
||||
if (lastTime < 0) lastTime = 0;
|
||||
times.push_back(lastTime);
|
||||
lastTime = 0;
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
cv::instr::setFlags(cv::instr::getFlags()&~cv::instr::FLAGS_MAPPING); // disable mapping to decrease overhead for +1 run
|
||||
#endif
|
||||
}
|
||||
|
||||
performance_metrics& TestBase::calcMetrics()
|
||||
@ -1469,6 +1706,16 @@ void TestBase::reportMetrics(bool toJUnitXML)
|
||||
RecordProperty("gstddev", cv::format("%.6f", m.gstddev).c_str());
|
||||
RecordProperty("mean", cv::format("%.0f", m.mean).c_str());
|
||||
RecordProperty("stddev", cv::format("%.0f", m.stddev).c_str());
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(cv::instr::useInstrumentation())
|
||||
{
|
||||
cv::String tree = InstumentData::treeToString();
|
||||
RecordProperty("functions_hierarchy", tree.c_str());
|
||||
RecordProperty("total_ipp_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())));
|
||||
RecordProperty("total_opencl_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())));
|
||||
cv::instr::resetTrace();
|
||||
}
|
||||
#endif
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
if(param_collect_impl)
|
||||
{
|
||||
@ -1639,6 +1886,12 @@ void TestBase::TearDown()
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(cv::instr::useInstrumentation())
|
||||
InstumentData::printTree();
|
||||
#endif
|
||||
|
||||
reportMetrics(true);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user