instrumentation: minor refactoring, wrap OpenCL

- calculate ticksTotal instead of ticksMean
- local / global width is based on ticksTotal value
- added instrumentation for OpenCL program compilation
- added instrumentation for OpenCL kernel execution
This commit is contained in:
Alexander Alekhin 2016-08-25 17:26:46 +03:00
parent 4013ce570f
commit 10d18a315c
6 changed files with 86 additions and 54 deletions

View File

@ -43,7 +43,7 @@ string(REPLACE ";" "\n " OPENCV_ABI_SKIP_HEADERS "${OPENCV_ABI_SKIP_HEADERS}"
string(REPLACE ";" "\n " OPENCV_ABI_SKIP_LIBRARIES "${OPENCV_ABI_SKIP_LIBRARIES}")
# Options
set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} -DOPENCV_ABI_CHECK=1")
string(REGEX REPLACE "([^ ]) +([^ ])" "\\1\\n \\2" OPENCV_ABI_GCC_OPTIONS "${OPENCV_ABI_GCC_OPTIONS}")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/opencv_abi.xml.in" "${path1}")

View File

@ -476,7 +476,7 @@ public:
class CV_EXPORTS IntrumentationRegion
{
public:
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
~IntrumentationRegion();
private:
@ -492,7 +492,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// General instrumentation
// Instrument region
#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL);
// Instrument functions with non-void return type
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
{\
@ -535,13 +535,13 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// General instrumentation
// General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
// Parallel OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
///// IPP instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
// Diagnostic markers
@ -549,12 +549,15 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// OpenCL instrumentation
// Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
// Function instrumentation macro
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
// Diagnostic markers
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
#else
#define CV_INSTRUMENT_REGION_META(...)
#define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION_MT()
@ -563,6 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
#define CV_INSTRUMENT_MARK_IPP(NAME)
#define CV_INSTRUMENT_REGION_OPENCL()
#define CV_INSTRUMENT_REGION_OPENCL_(...)
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_OPENCL(NAME)
#endif

View File

@ -1076,7 +1076,10 @@ public:
// Instrumentation external interface
namespace instr
{
enum
#if !defined OPENCV_ABI_CHECK
enum TYPE
{
TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
TYPE_MARKER, // Information marker
@ -1084,48 +1087,60 @@ enum
TYPE_FUN, // Simple function call
};
enum
enum IMPL
{
IMPL_PLAIN = 0,
IMPL_IPP,
IMPL_OPENCL,
};
enum
{
FLAGS_MAPPING = 0x01,
};
class CV_EXPORTS NodeData
{
public:
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
NodeData(NodeData &ref);
~NodeData();
NodeData& operator=(const NodeData&);
cv::String m_funName;
int m_instrType;
int m_implType;
cv::String m_fileName;
cv::instr::TYPE m_instrType;
cv::instr::IMPL m_implType;
const char* m_fileName;
int m_lineNum;
bool m_funError;
volatile int m_counter;
bool m_stopPoint;
uint64 m_ticksMean;
volatile uint64 m_ticksTotal;
// No synchronization
double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); }
// No synchronization
double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); }
bool m_funError;
bool m_stopPoint;
};
bool operator==(const NodeData& lhs, const NodeData& rhs);
typedef Node<NodeData> InstrNode;
CV_EXPORTS InstrNode* getTrace();
#endif // !defined OPENCV_ABI_CHECK
CV_EXPORTS bool useInstrumentation();
CV_EXPORTS void setUseInstrumentation(bool flag);
CV_EXPORTS InstrNode* getTrace();
CV_EXPORTS void resetTrace();
CV_EXPORTS void setFlags(int modeFlags);
CV_EXPORTS int getFlags();
enum FLAGS
{
FLAGS_NONE = 0,
FLAGS_MAPPING = 1 << 0,
};
CV_EXPORTS void setFlags(FLAGS modeFlags);
static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
CV_EXPORTS FLAGS getFlags();
}
} //namespace cv

View File

@ -45,6 +45,7 @@
#include <string>
#include <sstream>
#include <iostream> // std::cerr
#include <inttypes.h>
#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
#define CV_OPENCL_SHOW_RUN_ERRORS 0
@ -3169,6 +3170,9 @@ struct Kernel::Impl
{
cl_program ph = (cl_program)prog.ptr();
cl_int retval = 0;
#ifdef ENABLE_INSTRUMENTATION
name = kname;
#endif
handle = ph != 0 ?
clCreateKernel(ph, kname, &retval) : 0;
CV_OclDbgAssert(retval == CL_SUCCESS);
@ -3221,6 +3225,9 @@ struct Kernel::Impl
IMPLEMENT_REFCOUNTABLE();
#ifdef ENABLE_INSTRUMENTATION
cv::String name;
#endif
cl_kernel handle;
cl_event e;
enum { MAX_ARRS = 16 };
@ -3441,6 +3448,8 @@ int Kernel::set(int i, const KernelArg& arg)
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
bool sync, const Queue& q)
{
CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL);
if(!p || !p->handle || p->e != 0)
return false;
@ -3552,6 +3561,7 @@ struct Program::Impl
Impl(const ProgramSource& _src,
const String& _buildflags, String& errmsg)
{
CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
refcount = 1;
const Context& ctx = Context::getDefault();
src = _src;

View File

@ -1337,7 +1337,7 @@ void resetTrace()
#endif
}
void setFlags(int modeFlags)
void setFlags(FLAGS modeFlags)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
@ -1345,33 +1345,34 @@ void setFlags(int modeFlags)
CV_UNUSED(modeFlags);
#endif
}
int getFlags()
FLAGS getFlags()
{
#ifdef ENABLE_INSTRUMENTATION
int flags = 0;
if(getInstrumentStruct().enableMapping)
flags |= FLAGS_MAPPING;
return flags;
return (FLAGS)flags;
#else
return 0;
return (FLAGS)0;
#endif
}
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType)
{
m_instrType = TYPE_GENERAL;
m_implType = IMPL_PLAIN;
m_funError = false;
m_counter = 0;
m_stopPoint = false;
m_ticksMean = 0;
m_funName = funName;
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_counter = 0;
m_ticksTotal = 0;
m_funError = false;
m_stopPoint = false;
}
NodeData::NodeData(NodeData &ref)
{
@ -1382,13 +1383,12 @@ NodeData& NodeData::operator=(const NodeData &right)
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_funError = right.m_funError;
this->m_counter = right.m_counter;
this->m_stopPoint = right.m_stopPoint;
this->m_ticksMean = right.m_ticksMean;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
this->m_counter = right.m_counter;
this->m_ticksTotal = right.m_ticksTotal;
this->m_funError = right.m_funError;
this->m_stopPoint = right.m_stopPoint;
return *this;
}
NodeData::~NodeData()
@ -1418,7 +1418,7 @@ InstrNode* getCurrentNode()
return getInstrumentTLSStruct().pCurrentNode;
}
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType)
{
m_disabled = false;
m_regionTicks = 0;
@ -1482,18 +1482,18 @@ IntrumentationRegion::~IntrumentationRegion()
}
else
{
if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
cv::ocl::finish();
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
else
pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;

View File

@ -767,7 +767,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
static double calcLocalWeight(cv::instr::InstrNode *pNode)
{
if(pNode->m_pParent && pNode->m_pParent->m_pParent)
return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
return ((double)pNode->m_payload.m_ticksTotal*100/pNode->m_pParent->m_payload.m_ticksTotal);
else
return 100;
}
@ -779,7 +779,7 @@ static double calcGlobalWeight(cv::instr::InstrNode *pNode)
while(globNode->m_pParent && globNode->m_pParent->m_pParent)
globNode = globNode->m_pParent;
return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
return ((double)pNode->m_payload.m_ticksTotal*100/(double)globNode->m_payload.m_ticksTotal);
}
static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
@ -811,7 +811,7 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo
if(pNode->m_pParent)
{
printf(" - C:%d", pNode->m_payload.m_counter);
printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
printf(" T:%.4fms", pNode->m_payload.getMeanMs());
if(pNode->m_pParent->m_pParent)
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
}
@ -886,12 +886,15 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode)
return string;
}
static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, cv::instr::TYPE type, cv::instr::IMPL impl)
{
uint64 ticks = 0;
if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
ticks = pNode->m_payload.m_ticksMean;
if (pNode->m_pParent && (type < 0 || pNode->m_payload.m_instrType == type) && pNode->m_payload.m_implType == impl)
{
ticks = pNode->m_payload.m_ticksTotal;
return ticks;
}
for(size_t i = 0; i < pNode->m_childs.size(); i++)
ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
@ -899,7 +902,7 @@ static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
return ticks;
}
static uint64 getImplTime(int impl)
static uint64 getImplTime(cv::instr::IMPL impl)
{
uint64 ticks = 0;
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
@ -915,7 +918,7 @@ static uint64 getTotalTime()
cv::instr::InstrNode *pRoot = cv::instr::getTrace();
for(size_t i = 0; i < pRoot->m_childs.size(); i++)
ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
ticks += pRoot->m_childs[i]->m_payload.m_ticksTotal;
return ticks;
}