mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
--perf_instrument parameter now has int type and 0, 1, 2 modes (1 - simple trees, 2 - expanded trees for functions with same name but different calling address);
Maximum depth limit var was added to the instrumentation structure; Trace names output console output fix: improper tree formatting could happen; Output in case of error was added; Custom regions improvements; Improved timing and weight calculation for parallel regions; New TC (threads counter) value to indicate how many different threads accessed particular node; parallel_for, warnings fixes and ReturnAddress code from Alexander Alekhin;
This commit is contained in:
parent
442380bfac
commit
349d5ba012
@ -457,10 +457,11 @@ class InstrStruct
|
||||
public:
|
||||
InstrStruct()
|
||||
{
|
||||
useInstr = false;
|
||||
enableMapping = true;
|
||||
useInstr = false;
|
||||
flags = FLAGS_MAPPING;
|
||||
maxDepth = 0;
|
||||
|
||||
rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
|
||||
rootNode.m_payload = NodeData("ROOT", NULL, 0, NULL, false, TYPE_GENERAL, IMPL_PLAIN);
|
||||
tlsStruct.get()->pCurrentNode = &rootNode;
|
||||
}
|
||||
|
||||
@ -468,7 +469,8 @@ public:
|
||||
Mutex mutexCount;
|
||||
|
||||
bool useInstr;
|
||||
bool enableMapping;
|
||||
int flags;
|
||||
int maxDepth;
|
||||
InstrNode rootNode;
|
||||
TLSData<InstrTLSStruct> tlsStruct;
|
||||
};
|
||||
@ -476,7 +478,7 @@ public:
|
||||
class CV_EXPORTS IntrumentationRegion
|
||||
{
|
||||
public:
|
||||
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
|
||||
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
|
||||
~IntrumentationRegion();
|
||||
|
||||
private:
|
||||
@ -484,20 +486,28 @@ private:
|
||||
uint64 m_regionTicks;
|
||||
};
|
||||
|
||||
InstrStruct& getInstrumentStruct();
|
||||
InstrTLSStruct& getInstrumentTLSStruct();
|
||||
CV_EXPORTS InstrNode* getCurrentNode();
|
||||
CV_EXPORTS InstrStruct& getInstrumentStruct();
|
||||
InstrTLSStruct& getInstrumentTLSStruct();
|
||||
CV_EXPORTS InstrNode* getCurrentNode();
|
||||
}
|
||||
}
|
||||
|
||||
///// General instrumentation
|
||||
#ifdef _WIN32
|
||||
#define CV_INSTRUMENT_GET_RETURN_ADDRESS _ReturnAddress()
|
||||
#else
|
||||
#define CV_INSTRUMENT_GET_RETURN_ADDRESS __builtin_extract_return_addr(__builtin_return_address(0))
|
||||
#endif
|
||||
|
||||
// Instrument region
|
||||
#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL);
|
||||
#define CV_INSTRUMENT_REGION_META(NAME, ALWAYS_EXPAND, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, CV_INSTRUMENT_GET_RETURN_ADDRESS, ALWAYS_EXPAND, TYPE, IMPL);
|
||||
#define CV_INSTRUMENT_REGION_CUSTOM_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)\
|
||||
void *__curr_address__ = [&]() {return CV_INSTRUMENT_GET_RETURN_ADDRESS;}();\
|
||||
::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, __curr_address__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
|
||||
// Instrument functions with non-void return type
|
||||
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
|
||||
{\
|
||||
if(::cv::instr::useInstrumentation()){\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
|
||||
try{\
|
||||
auto status = ((FUN)(__VA_ARGS__));\
|
||||
if(ERROR_COND){\
|
||||
@ -518,7 +528,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
|
||||
{\
|
||||
if(::cv::instr::useInstrumentation()){\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
|
||||
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
|
||||
try{\
|
||||
(FUN)(__VA_ARGS__);\
|
||||
}catch(...){\
|
||||
@ -531,17 +541,19 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
}\
|
||||
}())
|
||||
// Instrumentation information marker
|
||||
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
|
||||
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, NULL, false, ::cv::instr::TYPE_MARKER, IMPL);}
|
||||
|
||||
///// General instrumentation
|
||||
// General OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
|
||||
// Parallel OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
|
||||
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
|
||||
// Custom OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
|
||||
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
|
||||
#define CV_INSTRUMENT_REGION_MT_FORK() CV_INSTRUMENT_REGION_META(__FUNCTION__, true, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
|
||||
|
||||
///// IPP instrumentation
|
||||
// Wrapper region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
|
||||
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
|
||||
// Function instrumentation macro
|
||||
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
|
||||
// Diagnostic markers
|
||||
@ -549,26 +561,28 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
|
||||
///// OpenCL instrumentation
|
||||
// Wrapper region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
|
||||
// Function instrumentation macro
|
||||
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
|
||||
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
|
||||
// OpenCL kernel compilation wrapper
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
|
||||
// OpenCL kernel run wrapper
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_RUN(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL)
|
||||
// Diagnostic markers
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
|
||||
#else
|
||||
#define CV_INSTRUMENT_REGION_META(...)
|
||||
|
||||
#define CV_INSTRUMENT_REGION()
|
||||
#define CV_INSTRUMENT_REGION_MT()
|
||||
#define CV_INSTRUMENT_REGION_NAME(...)
|
||||
#define CV_INSTRUMENT_REGION_MT_FORK()
|
||||
|
||||
#define CV_INSTRUMENT_REGION_IPP()
|
||||
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
|
||||
#define CV_INSTRUMENT_MARK_IPP(NAME)
|
||||
#define CV_INSTRUMENT_MARK_IPP(...)
|
||||
|
||||
#define CV_INSTRUMENT_REGION_OPENCL()
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_(...)
|
||||
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(NAME)
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...)
|
||||
#define CV_INSTRUMENT_REGION_OPENCL_RUN(...)
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(...)
|
||||
#endif
|
||||
|
||||
//! @endcond
|
||||
|
@ -1029,7 +1029,7 @@ public:
|
||||
|
||||
Node<OBJECT>* findChild(OBJECT& payload) const
|
||||
{
|
||||
for(int i = 0; i < this->m_childs.size(); i++)
|
||||
for(size_t i = 0; i < this->m_childs.size(); i++)
|
||||
{
|
||||
if(this->m_childs[i]->m_payload == payload)
|
||||
return this->m_childs[i];
|
||||
@ -1039,10 +1039,10 @@ public:
|
||||
|
||||
int findChild(Node<OBJECT> *pNode) const
|
||||
{
|
||||
for (int i = 0; i < this->m_childs.size(); i++)
|
||||
for (size_t i = 0; i < this->m_childs.size(); i++)
|
||||
{
|
||||
if(this->m_childs[i] == pNode)
|
||||
return i;
|
||||
return (int)i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -1059,7 +1059,7 @@ public:
|
||||
|
||||
void removeChilds()
|
||||
{
|
||||
for(int i = 0; i < m_childs.size(); i++)
|
||||
for(size_t i = 0; i < m_childs.size(); i++)
|
||||
{
|
||||
m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
|
||||
delete m_childs[i];
|
||||
@ -1067,6 +1067,14 @@ public:
|
||||
m_childs.clear();
|
||||
}
|
||||
|
||||
int getDepth()
|
||||
{
|
||||
int count = 0;
|
||||
Node *pParent = m_pParent;
|
||||
while(pParent) count++, pParent = pParent->m_pParent;
|
||||
return count;
|
||||
}
|
||||
|
||||
public:
|
||||
OBJECT m_payload;
|
||||
Node<OBJECT>* m_pParent;
|
||||
@ -1094,10 +1102,19 @@ enum IMPL
|
||||
IMPL_OPENCL,
|
||||
};
|
||||
|
||||
struct NodeDataTls
|
||||
{
|
||||
NodeDataTls()
|
||||
{
|
||||
m_ticksTotal = 0;
|
||||
}
|
||||
uint64 m_ticksTotal;
|
||||
};
|
||||
|
||||
class CV_EXPORTS NodeData
|
||||
{
|
||||
public:
|
||||
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
|
||||
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
|
||||
NodeData(NodeData &ref);
|
||||
~NodeData();
|
||||
NodeData& operator=(const NodeData&);
|
||||
@ -1107,17 +1124,18 @@ public:
|
||||
cv::instr::IMPL m_implType;
|
||||
const char* m_fileName;
|
||||
int m_lineNum;
|
||||
|
||||
volatile int m_counter;
|
||||
volatile uint64 m_ticksTotal;
|
||||
|
||||
// No synchronization
|
||||
double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); }
|
||||
// No synchronization
|
||||
double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); }
|
||||
|
||||
void* m_retAddress;
|
||||
bool m_alwaysExpand;
|
||||
bool m_funError;
|
||||
bool m_stopPoint;
|
||||
|
||||
volatile int m_counter;
|
||||
volatile uint64 m_ticksTotal;
|
||||
TLSData<NodeDataTls> m_tls;
|
||||
int m_threads;
|
||||
|
||||
// No synchronization
|
||||
double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
|
||||
double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
|
||||
};
|
||||
bool operator==(const NodeData& lhs, const NodeData& rhs);
|
||||
|
||||
@ -1134,8 +1152,9 @@ CV_EXPORTS void resetTrace();
|
||||
|
||||
enum FLAGS
|
||||
{
|
||||
FLAGS_NONE = 0,
|
||||
FLAGS_MAPPING = 1 << 0,
|
||||
FLAGS_NONE = 0,
|
||||
FLAGS_MAPPING = 0x01,
|
||||
FLAGS_EXPAND_SAME_NAMES = 0x02,
|
||||
};
|
||||
|
||||
CV_EXPORTS void setFlags(FLAGS modeFlags);
|
||||
|
@ -1564,8 +1564,6 @@ public:
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
IppStatus status;
|
||||
Ipp8u* pBuffer = 0;
|
||||
Ipp8u* pMemInit= 0;
|
||||
@ -1647,8 +1645,6 @@ public:
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
IppStatus status;
|
||||
Ipp8u* pBuffer = 0;
|
||||
Ipp8u* pMemInit= 0;
|
||||
@ -3809,8 +3805,6 @@ public:
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
if(*ok == false)
|
||||
return;
|
||||
|
||||
|
@ -3450,7 +3450,7 @@ int Kernel::set(int i, const KernelArg& arg)
|
||||
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
bool sync, const Queue& q)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL);
|
||||
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
|
||||
|
||||
if(!p || !p->handle || p->e != 0)
|
||||
return false;
|
||||
@ -3563,7 +3563,7 @@ struct Program::Impl
|
||||
Impl(const ProgramSource& _src,
|
||||
const String& _buildflags, String& errmsg)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
|
||||
CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
|
||||
refcount = 1;
|
||||
const Context& ctx = Context::getDefault();
|
||||
src = _src;
|
||||
|
@ -144,7 +144,33 @@ namespace cv
|
||||
namespace
|
||||
{
|
||||
#ifdef CV_PARALLEL_FRAMEWORK
|
||||
class ParallelLoopBodyWrapper
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
static void SyncNodes(cv::instr::InstrNode *pNode)
|
||||
{
|
||||
std::vector<cv::instr::NodeDataTls*> data;
|
||||
pNode->m_payload.m_tls.gather(data);
|
||||
|
||||
uint64 ticksMax = 0;
|
||||
int threads = 0;
|
||||
for(size_t i = 0; i < data.size(); i++)
|
||||
{
|
||||
if(data[i] && data[i]->m_ticksTotal)
|
||||
{
|
||||
ticksMax = MAX(ticksMax, data[i]->m_ticksTotal);
|
||||
pNode->m_payload.m_ticksTotal -= data[i]->m_ticksTotal;
|
||||
data[i]->m_ticksTotal = 0;
|
||||
threads++;
|
||||
}
|
||||
}
|
||||
pNode->m_payload.m_ticksTotal += ticksMax;
|
||||
pNode->m_payload.m_threads = MAX(pNode->m_payload.m_threads, threads);
|
||||
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
SyncNodes(pNode->m_childs[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
|
||||
@ -159,6 +185,13 @@ namespace
|
||||
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
|
||||
#endif
|
||||
}
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
~ParallelLoopBodyWrapper()
|
||||
{
|
||||
for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++)
|
||||
SyncNodes(pThreadRoot->m_childs[i]);
|
||||
}
|
||||
#endif
|
||||
void operator()(const cv::Range& sr) const
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
@ -167,6 +200,7 @@ namespace
|
||||
pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
|
||||
}
|
||||
#endif
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
cv::Range r;
|
||||
r.start = (int)(wholeRange.start +
|
||||
@ -267,7 +301,9 @@ static SchedPtr pplScheduler;
|
||||
|
||||
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
CV_INSTRUMENT_REGION_MT_FORK()
|
||||
if (range.empty())
|
||||
return;
|
||||
|
||||
#ifdef CV_PARALLEL_FRAMEWORK
|
||||
|
||||
@ -326,7 +362,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
|
||||
|
||||
#elif defined HAVE_PTHREADS_PF
|
||||
|
||||
parallel_for_pthreads(range, body, nstripes);
|
||||
parallel_for_pthreads(pbody.stripeRange(), pbody, pbody.stripeRange().size());
|
||||
|
||||
#else
|
||||
|
||||
|
@ -1597,7 +1597,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
|
||||
|
||||
size_t globalsize = groups * wgs;
|
||||
|
||||
if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
|
||||
if(!k.run(1, &globalsize, &wgs, false))
|
||||
return false;
|
||||
|
||||
typedef Scalar (* part_sum)(Mat m);
|
||||
|
@ -1340,7 +1340,7 @@ void resetTrace()
|
||||
void setFlags(FLAGS modeFlags)
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
|
||||
getInstrumentStruct().flags = modeFlags;
|
||||
#else
|
||||
CV_UNUSED(modeFlags);
|
||||
#endif
|
||||
@ -1348,31 +1348,27 @@ void setFlags(FLAGS modeFlags)
|
||||
FLAGS getFlags()
|
||||
{
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
int flags = 0;
|
||||
if(getInstrumentStruct().enableMapping)
|
||||
flags |= FLAGS_MAPPING;
|
||||
return (FLAGS)flags;
|
||||
return (FLAGS)getInstrumentStruct().flags;
|
||||
#else
|
||||
return (FLAGS)0;
|
||||
#endif
|
||||
}
|
||||
|
||||
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType)
|
||||
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
|
||||
{
|
||||
m_instrType = TYPE_GENERAL;
|
||||
m_implType = IMPL_PLAIN;
|
||||
m_funName = funName;
|
||||
m_instrType = instrType;
|
||||
m_implType = implType;
|
||||
m_fileName = fileName;
|
||||
m_lineNum = lineNum;
|
||||
m_retAddress = retAddress;
|
||||
m_alwaysExpand = alwaysExpand;
|
||||
|
||||
m_funName = funName;
|
||||
m_instrType = instrType;
|
||||
m_implType = implType;
|
||||
m_fileName = fileName;
|
||||
m_lineNum = lineNum;
|
||||
|
||||
m_counter = 0;
|
||||
m_threads = 1;
|
||||
m_counter = 0;
|
||||
m_ticksTotal = 0;
|
||||
|
||||
m_funError = false;
|
||||
m_stopPoint = false;
|
||||
m_funError = false;
|
||||
}
|
||||
NodeData::NodeData(NodeData &ref)
|
||||
{
|
||||
@ -1380,15 +1376,20 @@ NodeData::NodeData(NodeData &ref)
|
||||
}
|
||||
NodeData& NodeData::operator=(const NodeData &right)
|
||||
{
|
||||
this->m_funName = right.m_funName;
|
||||
this->m_instrType = right.m_instrType;
|
||||
this->m_implType = right.m_implType;
|
||||
this->m_fileName = right.m_fileName;
|
||||
this->m_lineNum = right.m_lineNum;
|
||||
this->m_funName = right.m_funName;
|
||||
this->m_instrType = right.m_instrType;
|
||||
this->m_implType = right.m_implType;
|
||||
this->m_fileName = right.m_fileName;
|
||||
this->m_lineNum = right.m_lineNum;
|
||||
this->m_retAddress = right.m_retAddress;
|
||||
this->m_alwaysExpand = right.m_alwaysExpand;
|
||||
|
||||
this->m_threads = right.m_threads;
|
||||
this->m_counter = right.m_counter;
|
||||
this->m_ticksTotal = right.m_ticksTotal;
|
||||
|
||||
this->m_funError = right.m_funError;
|
||||
this->m_stopPoint = right.m_stopPoint;
|
||||
|
||||
return *this;
|
||||
}
|
||||
NodeData::~NodeData()
|
||||
@ -1397,7 +1398,10 @@ NodeData::~NodeData()
|
||||
bool operator==(const NodeData& left, const NodeData& right)
|
||||
{
|
||||
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
|
||||
return true;
|
||||
{
|
||||
if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1418,7 +1422,7 @@ InstrNode* getCurrentNode()
|
||||
return getInstrumentTLSStruct().pCurrentNode;
|
||||
}
|
||||
|
||||
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType)
|
||||
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType)
|
||||
{
|
||||
m_disabled = false;
|
||||
m_regionTicks = 0;
|
||||
@ -1435,14 +1439,17 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
|
||||
return;
|
||||
}
|
||||
|
||||
m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
|
||||
if(m_disabled)
|
||||
int depth = pTLS->pCurrentNode->getDepth();
|
||||
if(pStruct->maxDepth && pStruct->maxDepth <= depth)
|
||||
{
|
||||
m_disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
NodeData payload(funName, fileName, lineNum, instrType, implType);
|
||||
NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType);
|
||||
Node<NodeData>* pChild = NULL;
|
||||
|
||||
if(pStruct->enableMapping)
|
||||
if(pStruct->flags&FLAGS_MAPPING)
|
||||
{
|
||||
// Critical section
|
||||
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
|
||||
@ -1458,7 +1465,7 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
|
||||
pChild = pTLS->pCurrentNode->findChild(payload);
|
||||
if(!pChild)
|
||||
{
|
||||
pTLS->pCurrentNode->m_payload.m_stopPoint = true;
|
||||
m_disabled = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1476,28 +1483,23 @@ IntrumentationRegion::~IntrumentationRegion()
|
||||
if(!m_disabled)
|
||||
{
|
||||
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
|
||||
if(pTLS->pCurrentNode->m_payload.m_stopPoint)
|
||||
{
|
||||
pTLS->pCurrentNode->m_payload.m_stopPoint = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
|
||||
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
|
||||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
|
||||
{
|
||||
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
|
||||
}
|
||||
|
||||
uint64 ticks = (getTickCount() - m_regionTicks);
|
||||
{
|
||||
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
|
||||
pTLS->pCurrentNode->m_payload.m_counter++;
|
||||
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
|
||||
}
|
||||
|
||||
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
|
||||
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
|
||||
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
|
||||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
|
||||
{
|
||||
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
|
||||
}
|
||||
|
||||
uint64 ticks = (getTickCount() - m_regionTicks);
|
||||
{
|
||||
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
|
||||
pTLS->pCurrentNode->m_payload.m_counter++;
|
||||
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
|
||||
pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks;
|
||||
}
|
||||
|
||||
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -142,6 +142,8 @@ template <bool useCustomDeriv>
|
||||
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
|
||||
int aperture_size, bool L2gradient, int cn, const Size & size)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_OPENCL()
|
||||
|
||||
UMat map;
|
||||
|
||||
const ocl::Device &dev = ocl::Device::getDefault();
|
||||
|
@ -259,8 +259,6 @@ public:
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
const void *yS = src_data + src_step * range.start;
|
||||
void *yD = dst_data + dst_step * range.start;
|
||||
if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) )
|
||||
|
@ -1188,8 +1188,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range & range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
Ipp32s levelNum = histSize + 1;
|
||||
Mat phist(hist->size(), hist->type(), Scalar::all(0));
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
|
@ -2795,8 +2795,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
if (*ok == false)
|
||||
return;
|
||||
|
||||
@ -4772,8 +4770,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range & range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
|
||||
Mat dstRoi = dst.rowRange(range);
|
||||
IppiSize dstRoiSize = ippiSize(dstRoi.size());
|
||||
@ -5609,8 +5605,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
IppiSize srcsize = { src.cols, src.rows };
|
||||
IppiRect srcroi = { 0, 0, src.cols, src.rows };
|
||||
IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
|
||||
@ -6254,8 +6248,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
IppiSize srcsize = {src.cols, src.rows};
|
||||
IppiRect srcroi = {0, 0, src.cols, src.rows};
|
||||
IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
|
||||
|
@ -3368,8 +3368,6 @@ public:
|
||||
|
||||
virtual void operator() (const Range& range) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
int d = radius * 2 + 1;
|
||||
IppiSize kernel = {d, d};
|
||||
IppiSize roi={dst.cols, range.end - range.start};
|
||||
|
@ -46,7 +46,7 @@ static bool param_verify_sanity;
|
||||
static bool param_collect_impl;
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
static bool param_instrument;
|
||||
static int param_instrument;
|
||||
#endif
|
||||
extern bool test_ipp_check;
|
||||
|
||||
@ -744,7 +744,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if parents have more childs
|
||||
// Check if parents have more childes
|
||||
std::vector<cv::instr::InstrNode*> cache;
|
||||
cv::instr::InstrNode *pTmpNode = pNode;
|
||||
while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
|
||||
@ -756,7 +756,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
|
||||
{
|
||||
if(cache[i]->m_pParent)
|
||||
{
|
||||
if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
|
||||
if(cache[i]->m_pParent->findChild(cache[i]) == (int)cache[i]->m_pParent->m_childs.size()-1)
|
||||
printf(" ");
|
||||
else
|
||||
printf("| ");
|
||||
@ -810,48 +810,39 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo
|
||||
|
||||
if(pNode->m_pParent)
|
||||
{
|
||||
printf(" - C:%d", pNode->m_payload.m_counter);
|
||||
printf(" T:%.4fms", pNode->m_payload.getMeanMs());
|
||||
printf(" - TC:%d C:%d", pNode->m_payload.m_threads, pNode->m_payload.m_counter);
|
||||
printf(" T:%.2fms", pNode->m_payload.getTotalMs());
|
||||
if(pNode->m_pParent->m_pParent)
|
||||
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Group childes
|
||||
std::vector<cv::String> groups;
|
||||
{
|
||||
bool bFound = false;
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
// Group childes by name
|
||||
for(size_t i = 1; i < pNode->m_childs.size(); i++)
|
||||
{
|
||||
bFound = false;
|
||||
for(size_t j = 0; j < groups.size(); j++)
|
||||
if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[i]->m_payload.m_funName )
|
||||
continue;
|
||||
for(size_t j = i+1; j < pNode->m_childs.size(); j++)
|
||||
{
|
||||
if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
|
||||
if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[j]->m_payload.m_funName )
|
||||
{
|
||||
bFound = true;
|
||||
break;
|
||||
cv::swap(pNode->m_childs[i], pNode->m_childs[j]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if(!bFound)
|
||||
groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t g = 0; g < groups.size(); g++)
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
{
|
||||
for(size_t i = 0; i < pNode->m_childs.size(); i++)
|
||||
{
|
||||
if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
|
||||
{
|
||||
printShift(pNode->m_childs[i], pRoot);
|
||||
printShift(pNode->m_childs[i], pRoot);
|
||||
|
||||
if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
|
||||
printf("\\---");
|
||||
else
|
||||
printf("|---");
|
||||
printNodeRec(pNode->m_childs[i], pRoot);
|
||||
}
|
||||
}
|
||||
if(i == pNode->m_childs.size()-1)
|
||||
printf("\\---");
|
||||
else
|
||||
printf("|---");
|
||||
printNodeRec(pNode->m_childs[i], pRoot);
|
||||
}
|
||||
}
|
||||
|
||||
@ -871,7 +862,7 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode)
|
||||
else
|
||||
{
|
||||
string = "#";
|
||||
string += std::to_string(pNode->m_payload.m_instrType);
|
||||
string += std::to_string((int)pNode->m_payload.m_instrType);
|
||||
string += pNode->m_payload.m_funName;
|
||||
string += " - L:";
|
||||
string += to_string_with_precision(calcLocalWeight(pNode));
|
||||
@ -931,19 +922,16 @@ static uint64 getTotalTime()
|
||||
|
||||
void InstumentData::printTree()
|
||||
{
|
||||
if(cv::instr::getTrace()->m_childs.size())
|
||||
{
|
||||
printf("[ TRACE ]\n");
|
||||
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
|
||||
printf("[ TRACE ]\n");
|
||||
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
|
||||
#ifdef HAVE_IPP
|
||||
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
|
||||
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
|
||||
#endif
|
||||
#ifdef HAVE_OPENCL
|
||||
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
|
||||
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
|
||||
#endif
|
||||
printf("\n[/TRACE ]\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
printf("\n[/TRACE ]\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -994,7 +982,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
"{ perf_collect_impl |false |collect info about executed implementations}"
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
"{ perf_instrument |false |instrument code to collect implementations trace}"
|
||||
"{ perf_instrument |0 |instrument code to collect implementations trace: 1 - perform instrumentation; 2 - separate functions with the same name }"
|
||||
#endif
|
||||
"{ help h |false |print help info}"
|
||||
#ifdef HAVE_CUDA
|
||||
@ -1048,7 +1036,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
param_collect_impl = args.get<bool>("perf_collect_impl");
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
param_instrument = args.get<bool>("perf_instrument");
|
||||
param_instrument = args.get<int>("perf_instrument");
|
||||
#endif
|
||||
#ifdef ANDROID
|
||||
param_affinity_mask = args.get<int>("perf_affinity_mask");
|
||||
@ -1081,8 +1069,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
|
||||
cv::setUseCollection(0);
|
||||
#endif
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(param_instrument)
|
||||
if(param_instrument > 0)
|
||||
{
|
||||
if(param_instrument == 2)
|
||||
cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_EXPAND_SAME_NAMES);
|
||||
cv::instr::setUseInstrumentation(true);
|
||||
}
|
||||
else
|
||||
cv::instr::setUseInstrumentation(false);
|
||||
#endif
|
||||
@ -1856,6 +1848,11 @@ void TestBase::TearDown()
|
||||
if (HasFailure())
|
||||
{
|
||||
reportMetrics(false);
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
if(cv::instr::useInstrumentation())
|
||||
InstumentData::printTree();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user