--perf_instrument parameter now has int type and 0, 1, 2 modes (1 - simple trees, 2 - expanded trees for functions with same name but different calling address);

Maximum depth limit var was added to the instrumentation structure;

Trace names output console output fix: improper tree formatting could happen;
Output in case of error was added;

Custom regions improvements;

Improved timing and weight calculation for parallel regions; New TC (threads counter) value to indicate how many different threads accessed particular node;

parallel_for, warnings fixes and ReturnAddress code from Alexander Alekhin;
This commit is contained in:
Pavel Vlasov 2016-11-07 12:15:51 +03:00
parent 442380bfac
commit 349d5ba012
13 changed files with 211 additions and 161 deletions

View File

@ -457,10 +457,11 @@ class InstrStruct
public:
InstrStruct()
{
useInstr = false;
enableMapping = true;
useInstr = false;
flags = FLAGS_MAPPING;
maxDepth = 0;
rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
rootNode.m_payload = NodeData("ROOT", NULL, 0, NULL, false, TYPE_GENERAL, IMPL_PLAIN);
tlsStruct.get()->pCurrentNode = &rootNode;
}
@ -468,7 +469,8 @@ public:
Mutex mutexCount;
bool useInstr;
bool enableMapping;
int flags;
int maxDepth;
InstrNode rootNode;
TLSData<InstrTLSStruct> tlsStruct;
};
@ -476,7 +478,7 @@ public:
class CV_EXPORTS IntrumentationRegion
{
public:
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
~IntrumentationRegion();
private:
@ -484,20 +486,28 @@ private:
uint64 m_regionTicks;
};
InstrStruct& getInstrumentStruct();
InstrTLSStruct& getInstrumentTLSStruct();
CV_EXPORTS InstrNode* getCurrentNode();
CV_EXPORTS InstrStruct& getInstrumentStruct();
InstrTLSStruct& getInstrumentTLSStruct();
CV_EXPORTS InstrNode* getCurrentNode();
}
}
///// General instrumentation
#ifdef _WIN32
#define CV_INSTRUMENT_GET_RETURN_ADDRESS _ReturnAddress()
#else
#define CV_INSTRUMENT_GET_RETURN_ADDRESS __builtin_extract_return_addr(__builtin_return_address(0))
#endif
// Instrument region
#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL);
#define CV_INSTRUMENT_REGION_META(NAME, ALWAYS_EXPAND, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, CV_INSTRUMENT_GET_RETURN_ADDRESS, ALWAYS_EXPAND, TYPE, IMPL);
#define CV_INSTRUMENT_REGION_CUSTOM_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)\
void *__curr_address__ = [&]() {return CV_INSTRUMENT_GET_RETURN_ADDRESS;}();\
::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, __curr_address__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
// Instrument functions with non-void return type
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
{\
if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
try{\
auto status = ((FUN)(__VA_ARGS__));\
if(ERROR_COND){\
@ -518,7 +528,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
{\
if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
try{\
(FUN)(__VA_ARGS__);\
}catch(...){\
@ -531,17 +541,19 @@ CV_EXPORTS InstrNode* getCurrentNode();
}\
}())
// Instrumentation information marker
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, NULL, false, ::cv::instr::TYPE_MARKER, IMPL);}
///// General instrumentation
// General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
// Parallel OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Custom OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
#define CV_INSTRUMENT_REGION_MT_FORK() CV_INSTRUMENT_REGION_META(__FUNCTION__, true, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
///// IPP instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
// Diagnostic markers
@ -549,26 +561,28 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// OpenCL instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
// OpenCL kernel compilation wrapper
#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
// OpenCL kernel run wrapper
#define CV_INSTRUMENT_REGION_OPENCL_RUN(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL)
// Diagnostic markers
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
#else
#define CV_INSTRUMENT_REGION_META(...)
#define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION_MT()
#define CV_INSTRUMENT_REGION_NAME(...)
#define CV_INSTRUMENT_REGION_MT_FORK()
#define CV_INSTRUMENT_REGION_IPP()
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_IPP(NAME)
#define CV_INSTRUMENT_MARK_IPP(...)
#define CV_INSTRUMENT_REGION_OPENCL()
#define CV_INSTRUMENT_REGION_OPENCL_(...)
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_OPENCL(NAME)
#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...)
#define CV_INSTRUMENT_REGION_OPENCL_RUN(...)
#define CV_INSTRUMENT_MARK_OPENCL(...)
#endif
//! @endcond

View File

@ -1029,7 +1029,7 @@ public:
Node<OBJECT>* findChild(OBJECT& payload) const
{
for(int i = 0; i < this->m_childs.size(); i++)
for(size_t i = 0; i < this->m_childs.size(); i++)
{
if(this->m_childs[i]->m_payload == payload)
return this->m_childs[i];
@ -1039,10 +1039,10 @@ public:
int findChild(Node<OBJECT> *pNode) const
{
for (int i = 0; i < this->m_childs.size(); i++)
for (size_t i = 0; i < this->m_childs.size(); i++)
{
if(this->m_childs[i] == pNode)
return i;
return (int)i;
}
return -1;
}
@ -1059,7 +1059,7 @@ public:
void removeChilds()
{
for(int i = 0; i < m_childs.size(); i++)
for(size_t i = 0; i < m_childs.size(); i++)
{
m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
delete m_childs[i];
@ -1067,6 +1067,14 @@ public:
m_childs.clear();
}
int getDepth()
{
int count = 0;
Node *pParent = m_pParent;
while(pParent) count++, pParent = pParent->m_pParent;
return count;
}
public:
OBJECT m_payload;
Node<OBJECT>* m_pParent;
@ -1094,10 +1102,19 @@ enum IMPL
IMPL_OPENCL,
};
struct NodeDataTls
{
NodeDataTls()
{
m_ticksTotal = 0;
}
uint64 m_ticksTotal;
};
class CV_EXPORTS NodeData
{
public:
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
NodeData(NodeData &ref);
~NodeData();
NodeData& operator=(const NodeData&);
@ -1107,17 +1124,18 @@ public:
cv::instr::IMPL m_implType;
const char* m_fileName;
int m_lineNum;
volatile int m_counter;
volatile uint64 m_ticksTotal;
// No synchronization
double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); }
// No synchronization
double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); }
void* m_retAddress;
bool m_alwaysExpand;
bool m_funError;
bool m_stopPoint;
volatile int m_counter;
volatile uint64 m_ticksTotal;
TLSData<NodeDataTls> m_tls;
int m_threads;
// No synchronization
double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
};
bool operator==(const NodeData& lhs, const NodeData& rhs);
@ -1134,8 +1152,9 @@ CV_EXPORTS void resetTrace();
enum FLAGS
{
FLAGS_NONE = 0,
FLAGS_MAPPING = 1 << 0,
FLAGS_NONE = 0,
FLAGS_MAPPING = 0x01,
FLAGS_EXPAND_SAME_NAMES = 0x02,
};
CV_EXPORTS void setFlags(FLAGS modeFlags);

View File

@ -1564,8 +1564,6 @@ public:
virtual void operator()(const Range& range) const
{
CV_INSTRUMENT_REGION_IPP();
IppStatus status;
Ipp8u* pBuffer = 0;
Ipp8u* pMemInit= 0;
@ -1647,8 +1645,6 @@ public:
virtual void operator()(const Range& range) const
{
CV_INSTRUMENT_REGION_IPP();
IppStatus status;
Ipp8u* pBuffer = 0;
Ipp8u* pMemInit= 0;
@ -3809,8 +3805,6 @@ public:
virtual void operator()(const Range& range) const
{
CV_INSTRUMENT_REGION_IPP()
if(*ok == false)
return;

View File

@ -3450,7 +3450,7 @@ int Kernel::set(int i, const KernelArg& arg)
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
bool sync, const Queue& q)
{
CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL);
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
if(!p || !p->handle || p->e != 0)
return false;
@ -3563,7 +3563,7 @@ struct Program::Impl
Impl(const ProgramSource& _src,
const String& _buildflags, String& errmsg)
{
CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
refcount = 1;
const Context& ctx = Context::getDefault();
src = _src;

View File

@ -144,7 +144,33 @@ namespace cv
namespace
{
#ifdef CV_PARALLEL_FRAMEWORK
class ParallelLoopBodyWrapper
#ifdef ENABLE_INSTRUMENTATION
static void SyncNodes(cv::instr::InstrNode *pNode)
{
std::vector<cv::instr::NodeDataTls*> data;
pNode->m_payload.m_tls.gather(data);
uint64 ticksMax = 0;
int threads = 0;
for(size_t i = 0; i < data.size(); i++)
{
if(data[i] && data[i]->m_ticksTotal)
{
ticksMax = MAX(ticksMax, data[i]->m_ticksTotal);
pNode->m_payload.m_ticksTotal -= data[i]->m_ticksTotal;
data[i]->m_ticksTotal = 0;
threads++;
}
}
pNode->m_payload.m_ticksTotal += ticksMax;
pNode->m_payload.m_threads = MAX(pNode->m_payload.m_threads, threads);
for(size_t i = 0; i < pNode->m_childs.size(); i++)
SyncNodes(pNode->m_childs[i]);
}
#endif
class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
{
public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
@ -159,6 +185,13 @@ namespace
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
#endif
}
#ifdef ENABLE_INSTRUMENTATION
~ParallelLoopBodyWrapper()
{
for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++)
SyncNodes(pThreadRoot->m_childs[i]);
}
#endif
void operator()(const cv::Range& sr) const
{
#ifdef ENABLE_INSTRUMENTATION
@ -167,6 +200,7 @@ namespace
pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
}
#endif
CV_INSTRUMENT_REGION()
cv::Range r;
r.start = (int)(wholeRange.start +
@ -267,7 +301,9 @@ static SchedPtr pplScheduler;
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
CV_INSTRUMENT_REGION()
CV_INSTRUMENT_REGION_MT_FORK()
if (range.empty())
return;
#ifdef CV_PARALLEL_FRAMEWORK
@ -326,7 +362,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
#elif defined HAVE_PTHREADS_PF
parallel_for_pthreads(range, body, nstripes);
parallel_for_pthreads(pbody.stripeRange(), pbody, pbody.stripeRange().size());
#else

View File

@ -1597,7 +1597,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
size_t globalsize = groups * wgs;
if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
if(!k.run(1, &globalsize, &wgs, false))
return false;
typedef Scalar (* part_sum)(Mat m);

View File

@ -1340,7 +1340,7 @@ void resetTrace()
void setFlags(FLAGS modeFlags)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
getInstrumentStruct().flags = modeFlags;
#else
CV_UNUSED(modeFlags);
#endif
@ -1348,31 +1348,27 @@ void setFlags(FLAGS modeFlags)
FLAGS getFlags()
{
#ifdef ENABLE_INSTRUMENTATION
int flags = 0;
if(getInstrumentStruct().enableMapping)
flags |= FLAGS_MAPPING;
return (FLAGS)flags;
return (FLAGS)getInstrumentStruct().flags;
#else
return (FLAGS)0;
#endif
}
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType)
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
{
m_instrType = TYPE_GENERAL;
m_implType = IMPL_PLAIN;
m_funName = funName;
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_retAddress = retAddress;
m_alwaysExpand = alwaysExpand;
m_funName = funName;
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_counter = 0;
m_threads = 1;
m_counter = 0;
m_ticksTotal = 0;
m_funError = false;
m_stopPoint = false;
m_funError = false;
}
NodeData::NodeData(NodeData &ref)
{
@ -1380,15 +1376,20 @@ NodeData::NodeData(NodeData &ref)
}
NodeData& NodeData::operator=(const NodeData &right)
{
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
this->m_retAddress = right.m_retAddress;
this->m_alwaysExpand = right.m_alwaysExpand;
this->m_threads = right.m_threads;
this->m_counter = right.m_counter;
this->m_ticksTotal = right.m_ticksTotal;
this->m_funError = right.m_funError;
this->m_stopPoint = right.m_stopPoint;
return *this;
}
NodeData::~NodeData()
@ -1397,7 +1398,10 @@ NodeData::~NodeData()
bool operator==(const NodeData& left, const NodeData& right)
{
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
return true;
{
if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand))
return true;
}
return false;
}
@ -1418,7 +1422,7 @@ InstrNode* getCurrentNode()
return getInstrumentTLSStruct().pCurrentNode;
}
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType)
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType)
{
m_disabled = false;
m_regionTicks = 0;
@ -1435,14 +1439,17 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
return;
}
m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
if(m_disabled)
int depth = pTLS->pCurrentNode->getDepth();
if(pStruct->maxDepth && pStruct->maxDepth <= depth)
{
m_disabled = true;
return;
}
NodeData payload(funName, fileName, lineNum, instrType, implType);
NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType);
Node<NodeData>* pChild = NULL;
if(pStruct->enableMapping)
if(pStruct->flags&FLAGS_MAPPING)
{
// Critical section
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
@ -1458,7 +1465,7 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
pTLS->pCurrentNode->m_payload.m_stopPoint = true;
m_disabled = true;
return;
}
}
@ -1476,28 +1483,23 @@ IntrumentationRegion::~IntrumentationRegion()
if(!m_disabled)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
if(pTLS->pCurrentNode->m_payload.m_stopPoint)
{
pTLS->pCurrentNode->m_payload.m_stopPoint = false;
}
else
{
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
}
}
}

View File

@ -142,6 +142,8 @@ template <bool useCustomDeriv>
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
int aperture_size, bool L2gradient, int cn, const Size & size)
{
CV_INSTRUMENT_REGION_OPENCL()
UMat map;
const ocl::Device &dev = ocl::Device::getDefault();

View File

@ -259,8 +259,6 @@ public:
virtual void operator()(const Range& range) const
{
CV_INSTRUMENT_REGION_IPP();
const void *yS = src_data + src_step * range.start;
void *yD = dst_data + dst_step * range.start;
if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) )

View File

@ -1188,8 +1188,6 @@ public:
virtual void operator() (const Range & range) const
{
CV_INSTRUMENT_REGION_IPP()
Ipp32s levelNum = histSize + 1;
Mat phist(hist->size(), hist->type(), Scalar::all(0));
#if IPP_VERSION_X100 >= 900

View File

@ -2795,8 +2795,6 @@ public:
virtual void operator() (const Range& range) const
{
CV_INSTRUMENT_REGION_IPP()
if (*ok == false)
return;
@ -4772,8 +4770,6 @@ public:
virtual void operator() (const Range & range) const
{
CV_INSTRUMENT_REGION_IPP()
IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
Mat dstRoi = dst.rowRange(range);
IppiSize dstRoiSize = ippiSize(dstRoi.size());
@ -5609,8 +5605,6 @@ public:
virtual void operator() (const Range& range) const
{
CV_INSTRUMENT_REGION_IPP()
IppiSize srcsize = { src.cols, src.rows };
IppiRect srcroi = { 0, 0, src.cols, src.rows };
IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
@ -6254,8 +6248,6 @@ public:
virtual void operator() (const Range& range) const
{
CV_INSTRUMENT_REGION_IPP()
IppiSize srcsize = {src.cols, src.rows};
IppiRect srcroi = {0, 0, src.cols, src.rows};
IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};

View File

@ -3368,8 +3368,6 @@ public:
virtual void operator() (const Range& range) const
{
CV_INSTRUMENT_REGION_IPP()
int d = radius * 2 + 1;
IppiSize kernel = {d, d};
IppiSize roi={dst.cols, range.end - range.start};

View File

@ -46,7 +46,7 @@ static bool param_verify_sanity;
static bool param_collect_impl;
#endif
#ifdef ENABLE_INSTRUMENTATION
static bool param_instrument;
static int param_instrument;
#endif
extern bool test_ipp_check;
@ -744,7 +744,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
}
}
// Check if parents have more childs
// Check if parents have more childes
std::vector<cv::instr::InstrNode*> cache;
cv::instr::InstrNode *pTmpNode = pNode;
while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
@ -756,7 +756,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
{
if(cache[i]->m_pParent)
{
if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
if(cache[i]->m_pParent->findChild(cache[i]) == (int)cache[i]->m_pParent->m_childs.size()-1)
printf(" ");
else
printf("| ");
@ -810,48 +810,39 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo
if(pNode->m_pParent)
{
printf(" - C:%d", pNode->m_payload.m_counter);
printf(" T:%.4fms", pNode->m_payload.getMeanMs());
printf(" - TC:%d C:%d", pNode->m_payload.m_threads, pNode->m_payload.m_counter);
printf(" T:%.2fms", pNode->m_payload.getTotalMs());
if(pNode->m_pParent->m_pParent)
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
}
printf("\n");
// Group childes
std::vector<cv::String> groups;
{
bool bFound = false;
for(size_t i = 0; i < pNode->m_childs.size(); i++)
// Group childes by name
for(size_t i = 1; i < pNode->m_childs.size(); i++)
{
bFound = false;
for(size_t j = 0; j < groups.size(); j++)
if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[i]->m_payload.m_funName )
continue;
for(size_t j = i+1; j < pNode->m_childs.size(); j++)
{
if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[j]->m_payload.m_funName )
{
bFound = true;
break;
cv::swap(pNode->m_childs[i], pNode->m_childs[j]);
i++;
}
}
if(!bFound)
groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
}
}
for(size_t g = 0; g < groups.size(); g++)
for(size_t i = 0; i < pNode->m_childs.size(); i++)
{
for(size_t i = 0; i < pNode->m_childs.size(); i++)
{
if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
{
printShift(pNode->m_childs[i], pRoot);
printShift(pNode->m_childs[i], pRoot);
if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
printf("\\---");
else
printf("|---");
printNodeRec(pNode->m_childs[i], pRoot);
}
}
if(i == pNode->m_childs.size()-1)
printf("\\---");
else
printf("|---");
printNodeRec(pNode->m_childs[i], pRoot);
}
}
@ -871,7 +862,7 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode)
else
{
string = "#";
string += std::to_string(pNode->m_payload.m_instrType);
string += std::to_string((int)pNode->m_payload.m_instrType);
string += pNode->m_payload.m_funName;
string += " - L:";
string += to_string_with_precision(calcLocalWeight(pNode));
@ -931,19 +922,16 @@ static uint64 getTotalTime()
void InstumentData::printTree()
{
if(cv::instr::getTrace()->m_childs.size())
{
printf("[ TRACE ]\n");
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
printf("[ TRACE ]\n");
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
#ifdef HAVE_IPP
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
#endif
#ifdef HAVE_OPENCL
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
#endif
printf("\n[/TRACE ]\n");
fflush(stdout);
}
printf("\n[/TRACE ]\n");
fflush(stdout);
}
#endif
@ -994,7 +982,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
"{ perf_collect_impl |false |collect info about executed implementations}"
#endif
#ifdef ENABLE_INSTRUMENTATION
"{ perf_instrument |false |instrument code to collect implementations trace}"
"{ perf_instrument |0 |instrument code to collect implementations trace: 1 - perform instrumentation; 2 - separate functions with the same name }"
#endif
"{ help h |false |print help info}"
#ifdef HAVE_CUDA
@ -1048,7 +1036,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
param_collect_impl = args.get<bool>("perf_collect_impl");
#endif
#ifdef ENABLE_INSTRUMENTATION
param_instrument = args.get<bool>("perf_instrument");
param_instrument = args.get<int>("perf_instrument");
#endif
#ifdef ANDROID
param_affinity_mask = args.get<int>("perf_affinity_mask");
@ -1081,8 +1069,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
cv::setUseCollection(0);
#endif
#ifdef ENABLE_INSTRUMENTATION
if(param_instrument)
if(param_instrument > 0)
{
if(param_instrument == 2)
cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_EXPAND_SAME_NAMES);
cv::instr::setUseInstrumentation(true);
}
else
cv::instr::setUseInstrumentation(false);
#endif
@ -1856,6 +1848,11 @@ void TestBase::TearDown()
if (HasFailure())
{
reportMetrics(false);
#ifdef ENABLE_INSTRUMENTATION
if(cv::instr::useInstrumentation())
InstumentData::printTree();
#endif
return;
}
}