opencv/modules/core/src/parallel.cpp

1049 lines
30 KiB
C++
Raw Normal View History

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <opencv2/core/utils/configuration.private.hpp>
2017-05-25 23:59:01 +08:00
#include <opencv2/core/utils/trace.private.hpp>
#include "opencv2/core/parallel/parallel_backend.hpp"
2021-02-04 06:04:35 +08:00
#include "parallel/parallel.hpp"
2017-07-25 18:23:44 +08:00
#if defined _WIN32 || defined WINCE
#include <windows.h>
#undef small
#undef min
#undef max
#undef abs
#endif
#if defined __unix__ || defined __APPLE__ || defined __GLIBC__ \
|| defined __HAIKU__ || defined __EMSCRIPTEN__ \
|| defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <fstream>
#if defined __ANDROID__
#include <sys/sysconf.h>
#include <sys/syscall.h>
#include <sched.h>
#elif defined __APPLE__
#include <sys/sysctl.h>
#endif
#endif
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
#include <thread>
#endif
#ifdef _OPENMP
#define HAVE_OPENMP
#endif
#ifdef __APPLE__
#define HAVE_GCD
#endif
2015-03-18 20:13:08 +08:00
#if defined _MSC_VER && _MSC_VER >= 1600
#define HAVE_CONCURRENCY
#endif
/* IMPORTANT: always use the same order of defines
2018-03-28 20:34:59 +08:00
- HAVE_TBB - 3rdparty library, should be explicitly enabled
- HAVE_HPX - 3rdparty library, should be explicitly enabled
2018-03-28 20:34:59 +08:00
- HAVE_OPENMP - integrated to compiler, should be explicitly enabled
- HAVE_GCD - system wide, used automatically (APPLE only)
- WINRT - system wide, used automatically (Windows RT only)
- HAVE_CONCURRENCY - part of runtime, used automatically (Windows only - MSVS 10, MSVS 11)
- HAVE_PTHREADS_PF - pthreads if available
*/
#if defined HAVE_TBB
2020-03-27 03:05:07 +08:00
#ifndef TBB_SUPPRESS_DEPRECATED_MESSAGES // supress warning
#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
#endif
#include "tbb/tbb.h"
#include "tbb/task.h"
#if TBB_INTERFACE_VERSION >= 8000
#include "tbb/task_arena.h"
#endif
#undef min
#undef max
#elif defined HAVE_HPX
#include <hpx/parallel/algorithms/for_loop.hpp>
#include <hpx/parallel/execution.hpp>
//
#include <hpx/hpx_start.hpp>
#include <hpx/hpx_suspend.hpp>
#include <hpx/include/apply.hpp>
#include <hpx/util/yield_while.hpp>
#include <hpx/include/threadmanager.hpp>
#elif defined HAVE_OPENMP
#include <omp.h>
#elif defined HAVE_GCD
#include <dispatch/dispatch.h>
#include <pthread.h>
#elif defined WINRT && _MSC_VER < 1900
#include <ppltasks.h>
#elif defined HAVE_CONCURRENCY
#include <ppl.h>
#elif defined HAVE_PTHREADS_PF
#include <pthread.h>
#endif
#if defined HAVE_TBB
# define CV_PARALLEL_FRAMEWORK "tbb"
#elif defined HAVE_HPX
# define CV_PARALLEL_FRAMEWORK "hpx"
#elif defined HAVE_OPENMP
# define CV_PARALLEL_FRAMEWORK "openmp"
#elif defined HAVE_GCD
# define CV_PARALLEL_FRAMEWORK "gcd"
2015-03-18 20:13:08 +08:00
#elif defined WINRT
# define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
#elif defined HAVE_CONCURRENCY
# define CV_PARALLEL_FRAMEWORK "ms-concurrency"
2015-06-11 21:53:07 +08:00
#elif defined HAVE_PTHREADS_PF
# define CV_PARALLEL_FRAMEWORK "pthreads"
#endif
#include <atomic>
#include "parallel_impl.hpp"
2019-04-25 15:22:14 +08:00
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/fp_control.private.hpp>
using namespace cv;
namespace cv {
ParallelLoopBody::~ParallelLoopBody() {}
using namespace cv::parallel;
namespace {
#ifdef ENABLE_INSTRUMENTATION
static void SyncNodes(cv::instr::InstrNode *pNode)
{
std::vector<cv::instr::NodeDataTls*> data;
pNode->m_payload.m_tls.gather(data);
uint64 ticksMax = 0;
int threads = 0;
for(size_t i = 0; i < data.size(); i++)
{
if(data[i] && data[i]->m_ticksTotal)
{
ticksMax = MAX(ticksMax, data[i]->m_ticksTotal);
pNode->m_payload.m_ticksTotal -= data[i]->m_ticksTotal;
data[i]->m_ticksTotal = 0;
threads++;
}
}
pNode->m_payload.m_ticksTotal += ticksMax;
pNode->m_payload.m_threads = MAX(pNode->m_payload.m_threads, threads);
for(size_t i = 0; i < pNode->m_childs.size(); i++)
SyncNodes(pNode->m_childs[i]);
}
#endif
2017-05-25 23:59:01 +08:00
class ParallelLoopBodyWrapperContext
{
public:
2017-05-25 23:59:01 +08:00
ParallelLoopBodyWrapperContext(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) :
is_rng_used(false), hasException(false)
{
body = &_body;
wholeRange = _r;
double len = wholeRange.end - wholeRange.start;
nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
// propagate main thread state
rng = cv::theRNG();
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details::saveFPDenormalsState(fp_denormals_base_state);
#endif
2017-05-25 23:59:01 +08:00
#ifdef OPENCV_TRACE
traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
traceRootContext = CV_TRACE_NS::details::getTraceManager().tls.get();
#endif
#ifdef ENABLE_INSTRUMENTATION
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
#endif
}
void finalize()
{
#ifdef ENABLE_INSTRUMENTATION
for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++)
SyncNodes(pThreadRoot->m_childs[i]);
#endif
if (is_rng_used)
{
// Some parallel backends execute nested jobs in the main thread,
// so we need to restore initial RNG state here.
cv::theRNG() = rng;
// We can't properly update RNG state based on RNG usage in worker threads,
// so lets just change main thread RNG state to the next value.
// Note: this behaviour is not equal to single-threaded mode.
cv::theRNG().next();
}
2017-05-25 23:59:01 +08:00
#ifdef OPENCV_TRACE
if (traceRootRegion)
CV_TRACE_NS::details::parallelForFinalize(*traceRootRegion);
#endif
if (hasException)
{
#if CV__EXCEPTION_PTR
std::rethrow_exception(pException);
#else
CV_Error(Error::StsError, "Exception in parallel_for() body: " + exception_message);
#endif
}
2017-05-25 23:59:01 +08:00
}
~ParallelLoopBodyWrapperContext() {}
2017-05-25 23:59:01 +08:00
const cv::ParallelLoopBody* body;
cv::Range wholeRange;
int nstripes;
cv::RNG rng;
mutable bool is_rng_used;
#ifdef OPENCV_TRACE
CV_TRACE_NS::details::Region* traceRootRegion;
CV_TRACE_NS::details::TraceManagerThreadLocal* traceRootContext;
#endif
#ifdef ENABLE_INSTRUMENTATION
cv::instr::InstrNode *pThreadRoot;
#endif
bool hasException;
#if CV__EXCEPTION_PTR
std::exception_ptr pException;
#else
cv::String exception_message;
#endif
#if CV__EXCEPTION_PTR
void recordException()
#else
void recordException(const cv::String& msg)
#endif
{
#ifndef CV_THREAD_SANITIZER
if (!hasException)
#endif
{
cv::AutoLock lock(cv::getInitializationMutex());
if (!hasException)
{
hasException = true;
#if CV__EXCEPTION_PTR
pException = std::current_exception();
#else
exception_message = msg;
#endif
}
}
}
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details::FPDenormalsModeState fp_denormals_base_state;
#endif
2017-05-25 23:59:01 +08:00
private:
ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
};
class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
{
public:
ParallelLoopBodyWrapper(ParallelLoopBodyWrapperContext& ctx_) :
ctx(ctx_)
{
}
~ParallelLoopBodyWrapper()
{
}
2018-03-15 21:16:50 +08:00
void operator()(const cv::Range& sr) const CV_OVERRIDE
{
2017-05-25 23:59:01 +08:00
#ifdef OPENCV_TRACE
// TODO CV_TRACE_NS::details::setCurrentRegion(rootRegion);
if (ctx.traceRootRegion && ctx.traceRootContext)
CV_TRACE_NS::details::parallelForSetRootRegion(*ctx.traceRootRegion, *ctx.traceRootContext);
CV__TRACE_OPENCV_FUNCTION_NAME("parallel_for_body");
if (ctx.traceRootRegion)
CV_TRACE_NS::details::parallelForAttachNestedRegion(*ctx.traceRootRegion);
#endif
#ifdef ENABLE_INSTRUMENTATION
{
cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
2017-05-25 23:59:01 +08:00
pInstrTLS->pCurrentNode = ctx.pThreadRoot; // Initialize TLS node for thread
}
CV_INSTRUMENT_REGION();
2017-05-25 23:59:01 +08:00
#endif
// propagate main thread state
2017-05-25 23:59:01 +08:00
cv::theRNG() = ctx.rng;
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
#endif
cv::Range r;
2017-05-25 23:59:01 +08:00
cv::Range wholeRange = ctx.wholeRange;
int nstripes = ctx.nstripes;
r.start = (int)(wholeRange.start +
((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
2017-05-25 23:59:01 +08:00
#ifdef OPENCV_TRACE
CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)r.start);
CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)r.end);
#endif
try
{
(*ctx.body)(r);
}
#if CV__EXCEPTION_PTR
catch (...)
{
ctx.recordException();
}
#else
catch (const cv::Exception& e)
{
ctx.recordException(e.what());
}
catch (const std::exception& e)
{
ctx.recordException(e.what());
}
catch (...)
{
ctx.recordException("Unknown exception");
}
#endif
2017-05-25 23:59:01 +08:00
if (!ctx.is_rng_used && !(cv::theRNG() == ctx.rng))
ctx.is_rng_used = true;
}
2017-05-25 23:59:01 +08:00
cv::Range stripeRange() const { return cv::Range(0, ctx.nstripes); }
protected:
2017-05-25 23:59:01 +08:00
ParallelLoopBodyWrapperContext& ctx;
};
#if defined HAVE_TBB
class ProxyLoopBody : public ParallelLoopBodyWrapper
{
public:
2017-05-25 23:59:01 +08:00
ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx_)
: ParallelLoopBodyWrapper(ctx_)
{}
void operator ()(const tbb::blocked_range<int>& range) const
{
this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
}
void operator ()() const // run parallel job
{
2018-04-09 17:52:14 +08:00
cv::Range range = this->stripeRange();
tbb::parallel_for(tbb::blocked_range<int>(range.start, range.end), *this);
}
};
#elif defined HAVE_HPX
class ProxyLoopBody : public ParallelLoopBodyWrapper
{
public:
ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx_)
: ParallelLoopBodyWrapper(ctx_)
{}
void operator ()() const // run parallel job
{
cv::Range stripeRange = this->stripeRange();
hpx::parallel::for_loop(
hpx::parallel::execution::par,
stripeRange.start, stripeRange.end,
[&](const int &i) { ;
this->ParallelLoopBodyWrapper::operator()(
cv::Range(i, i + 1));
});
}
};
#elif defined HAVE_OPENMP
typedef ParallelLoopBodyWrapper ProxyLoopBody;
#elif defined HAVE_GCD
typedef ParallelLoopBodyWrapper ProxyLoopBody;
static void block_function(void* context, size_t index)
{
ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
2014-10-17 20:32:53 +08:00
(*ptr_body)(cv::Range((int)index, (int)index + 1));
}
2015-03-18 20:13:08 +08:00
#elif defined WINRT || defined HAVE_CONCURRENCY
class ProxyLoopBody : public ParallelLoopBodyWrapper
{
public:
2017-05-25 23:59:01 +08:00
ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx)
: ParallelLoopBodyWrapper(ctx)
{}
void operator ()(int i) const
{
this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
}
};
#else
typedef ParallelLoopBodyWrapper ProxyLoopBody;
#endif
#if defined HAVE_TBB
#if TBB_INTERFACE_VERSION >= 8000
static tbb::task_arena tbbArena(tbb::task_arena::automatic);
#else
static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
#endif
#elif defined HAVE_HPX
// nothing for HPX
#elif defined HAVE_OPENMP
static inline int _initMaxThreads()
{
int maxThreads = omp_get_max_threads();
if (!utils::getConfigurationParameterBool("OPENCV_FOR_OPENMP_DYNAMIC_DISABLE", false))
{
omp_set_dynamic(1);
}
return maxThreads;
}
static int numThreadsMax = _initMaxThreads();
#elif defined HAVE_GCD
// nothing for GCD
2015-03-18 20:13:08 +08:00
#elif defined WINRT
// nothing for WINRT
#elif defined HAVE_CONCURRENCY
2015-03-18 20:13:08 +08:00
class SchedPtr
{
Concurrency::Scheduler* sched_;
public:
Concurrency::Scheduler* operator->() { return sched_; }
operator Concurrency::Scheduler*() { return sched_; }
void operator=(Concurrency::Scheduler* sched)
{
if (sched_) sched_->Release();
sched_ = sched;
}
SchedPtr() : sched_(0) {}
~SchedPtr() {}
};
static SchedPtr pplScheduler;
2015-03-18 20:13:08 +08:00
#endif
} // namespace anon
/* ================================ parallel_for_ ================================ */
static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes); // forward declaration
void parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
2017-05-25 23:59:01 +08:00
#ifdef OPENCV_TRACE
CV__TRACE_OPENCV_FUNCTION_NAME_("parallel_for", 0);
CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)range.start);
CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)range.end);
CV_TRACE_ARG_VALUE(nstripes, "nstripes", (int64)nstripes);
#endif
CV_INSTRUMENT_REGION_MT_FORK();
if (range.empty())
return;
static std::atomic<bool> flagNestedParallelFor(false);
bool isNotNestedRegion = !flagNestedParallelFor.load();
if (isNotNestedRegion)
isNotNestedRegion = !flagNestedParallelFor.exchange(true);
if (isNotNestedRegion)
{
try
{
parallel_for_impl(range, body, nstripes);
flagNestedParallelFor = false;
}
catch (...)
{
flagNestedParallelFor = false;
throw;
}
}
else // nested parallel_for_() calls are not parallelized
{
2018-09-07 19:33:52 +08:00
CV_UNUSED(nstripes);
body(range);
}
}
static
void parallel_for_cb(int start, int end, void* data)
{
CV_DbgAssert(data);
const cv::ParallelLoopBody& body = *(const cv::ParallelLoopBody*)data;
body(Range(start, end));
}
static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
using namespace cv::parallel;
if ((numThreads < 0 || numThreads > 1) && range.end - range.start > 1)
{
2017-05-25 23:59:01 +08:00
ParallelLoopBodyWrapperContext ctx(body, range, nstripes);
ProxyLoopBody pbody(ctx);
cv::Range stripeRange = pbody.stripeRange();
if( stripeRange.end - stripeRange.start == 1 )
{
body(range);
return;
}
std::shared_ptr<ParallelForAPI>& api = getCurrentParallelForAPI();
if (api)
{
CV_CheckEQ(stripeRange.start, 0, "");
api->parallel_for(stripeRange.end, parallel_for_cb, (void*)&pbody);
ctx.finalize(); // propagate exceptions if exists
return;
}
#ifdef CV_PARALLEL_FRAMEWORK
#if defined HAVE_TBB
#if TBB_INTERFACE_VERSION >= 8000
tbbArena.execute(pbody);
#else
pbody();
#endif
#elif defined HAVE_HPX
pbody();
#elif defined HAVE_OPENMP
#pragma omp parallel for schedule(dynamic) num_threads(numThreads > 0 ? numThreads : numThreadsMax)
for (int i = stripeRange.start; i < stripeRange.end; ++i)
pbody(Range(i, i + 1));
#elif defined HAVE_GCD
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
2015-03-18 20:13:08 +08:00
#elif defined WINRT
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
#elif defined HAVE_CONCURRENCY
if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
{
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
}
else
{
pplScheduler->Attach();
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
Concurrency::CurrentScheduler::Detach();
}
2015-06-11 21:53:07 +08:00
#elif defined HAVE_PTHREADS_PF
parallel_for_pthreads(pbody.stripeRange(), pbody, pbody.stripeRange().size());
#else
#error You have hacked and compiling with unsupported parallel framework
#endif
ctx.finalize(); // propagate exceptions if exists
return;
#endif // CV_PARALLEL_FRAMEWORK
}
body(range);
}
int getNumThreads(void)
{
std::shared_ptr<ParallelForAPI>& api = getCurrentParallelForAPI();
if (api)
{
return api->getNumThreads();
}
if (numThreads == 0)
return 1;
#if defined HAVE_TBB
#if TBB_INTERFACE_VERSION >= 9100
return tbbArena.max_concurrency();
#elif TBB_INTERFACE_VERSION >= 8000
return numThreads > 0
? numThreads
: tbb::task_scheduler_init::default_num_threads();
#else
return tbbScheduler.is_active()
? numThreads
: tbb::task_scheduler_init::default_num_threads();
#endif
#elif defined HAVE_HPX
return numThreads;
#elif defined HAVE_OPENMP
return numThreads > 0
? numThreads
: numThreadsMax;
#elif defined HAVE_GCD
return cv::getNumberOfCPUs(); // the GCD thread pool limit
2015-03-18 20:13:08 +08:00
#elif defined WINRT
return 0;
#elif defined HAVE_CONCURRENCY
return (pplScheduler == 0)
2015-03-18 20:13:08 +08:00
? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
: (1 + pplScheduler->GetNumberOfVirtualProcessors());
2015-06-11 21:53:07 +08:00
#elif defined HAVE_PTHREADS_PF
return parallel_pthreads_get_threads_num();
#else
return 1;
#endif
}
unsigned defaultNumberOfThreads()
{
#ifdef __ANDROID__
// many modern phones/tables have 4-core CPUs. Let's use no more
// than 2 threads by default not to overheat the devices
const unsigned int default_number_of_threads = 2;
#else
const unsigned int default_number_of_threads = (unsigned int)std::max(1, cv::getNumberOfCPUs());
#endif
unsigned result = default_number_of_threads;
static int config_num_threads = (int)utils::getConfigurationParameterSizeT("OPENCV_FOR_THREADS_NUM", 0);
if (config_num_threads)
{
result = (unsigned)std::max(1, config_num_threads);
//do we need upper limit of threads number?
}
return result;
}
void setNumThreads( int threads_ )
{
2018-09-07 19:33:52 +08:00
CV_UNUSED(threads_);
int threads = (threads_ < 0) ? defaultNumberOfThreads() : (unsigned)threads_;
numThreads = threads;
std::shared_ptr<ParallelForAPI>& api = getCurrentParallelForAPI();
if (api)
{
api->setNumThreads(numThreads);
}
#ifdef HAVE_TBB
#if TBB_INTERFACE_VERSION >= 8000
if(tbbArena.is_active()) tbbArena.terminate();
if(threads > 0) tbbArena.initialize(threads);
#else
if(tbbScheduler.is_active()) tbbScheduler.terminate();
if(threads > 0) tbbScheduler.initialize(threads);
#endif
#elif defined HAVE_HPX
return; // nothing needed as numThreads is used
#elif defined HAVE_OPENMP
return; // nothing needed as num_threads clause is used in #pragma omp parallel for
#elif defined HAVE_GCD
// unsupported
// there is only private dispatch_queue_set_width() and only for desktop
2015-03-18 20:13:08 +08:00
#elif defined WINRT
return;
#elif defined HAVE_CONCURRENCY
if (threads <= 0)
{
pplScheduler = 0;
}
else if (threads == 1)
{
// Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
numThreads = 0;
}
else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
{
pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
2012-10-22 22:46:26 +08:00
Concurrency::MinConcurrency, threads-1,
Concurrency::MaxConcurrency, threads-1));
}
2015-06-11 21:53:07 +08:00
#elif defined HAVE_PTHREADS_PF
parallel_pthreads_set_threads_num(threads);
#endif
}
int getThreadNum()
{
std::shared_ptr<ParallelForAPI>& api = getCurrentParallelForAPI();
if (api)
{
return api->getThreadNum();
}
#if defined HAVE_TBB
#if TBB_INTERFACE_VERSION >= 9100
return tbb::this_task_arena::current_thread_index();
#elif TBB_INTERFACE_VERSION >= 8000
return tbb::task_arena::current_thread_index();
#else
return 0;
#endif
#elif defined HAVE_HPX
return (int)(hpx::get_num_worker_threads());
#elif defined HAVE_OPENMP
return omp_get_thread_num();
#elif defined HAVE_GCD
return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
2015-03-18 20:13:08 +08:00
#elif defined WINRT
return 0;
#elif defined HAVE_CONCURRENCY
return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
2015-06-11 21:53:07 +08:00
#elif defined HAVE_PTHREADS_PF
return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
#else
return 0;
#endif
}
#if defined __linux__ || defined __GLIBC__ || defined __HAIKU__ || defined __ANDROID__
#define CV_CPU_GROUPS_1
#endif
#if defined __linux__ || defined __ANDROID__
#define CV_HAVE_CGROUPS 1
#endif
#if defined CV_CPU_GROUPS_1
static inline
std::string getFileContents(const char *filename)
{
std::ifstream ifs(filename);
if (!ifs.is_open())
return std::string();
std::string content( (std::istreambuf_iterator<char>(ifs) ),
(std::istreambuf_iterator<char>() ) );
if (ifs.fail())
return std::string();
return content;
}
static inline
int getNumberOfCPUsImpl(const char *filename)
{
std::string file_contents = getFileContents(filename);
if(file_contents.empty())
return 0;
char *pbuf = const_cast<char*>(file_contents.c_str());
//parse string of form "0-1,3,5-7,10,13-15"
int cpusAvailable = 0;
while(*pbuf)
{
const char* pos = pbuf;
bool range = false;
while(*pbuf && *pbuf != ',')
{
if(*pbuf == '-') range = true;
++pbuf;
}
if(*pbuf) *pbuf++ = 0;
if(!range)
++cpusAvailable;
else
{
int rstart = 0, rend = 0;
sscanf(pos, "%d-%d", &rstart, &rend);
cpusAvailable += rend - rstart + 1;
}
}
return cpusAvailable;
}
#endif
#if defined CV_HAVE_CGROUPS
static inline
unsigned getNumberOfCPUsCFS()
{
int cfs_quota = 0;
{
std::ifstream ss_period("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", std::ios::in | std::ios::binary);
ss_period >> cfs_quota;
if (ss_period.fail() || cfs_quota < 1) /* cfs_quota must not be 0 or negative */
return 0;
}
int cfs_period = 0;
{
std::ifstream ss_quota("/sys/fs/cgroup/cpu/cpu.cfs_period_us", std::ios::in | std::ios::binary);
ss_quota >> cfs_period;
if (ss_quota.fail() || cfs_period < 1)
return 0;
}
return (unsigned)max(1, cfs_quota/cfs_period);
}
#endif
template <typename T> static inline
T minNonZero(const T& val_1, const T& val_2)
{
if ((val_1 != 0) && (val_2 != 0))
return std::min(val_1, val_2);
return (val_1 != 0) ? val_1 : val_2;
}
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
static
int getNumberOfCPUs_()
{
Merge pull request #20392 from fpetrogalli:aarch64-semihosting AArch64 semihosting * [ts] Disable filesystem support in the TS module. Because of this change, all the tests loading data will file, but tat least the core module can be tested with the following line: opencv_test_core --gtest_filter=-"*Core_InputOutput*:*Core_globbing.accuracy*" * [aarch64] Build OpenCV for AArch64 semihosting. This patch provide a toolchain file that allows to build the library for semihosting applications [1]. Minimal changes have been applied to the code to be able to compile with a baremetal toolchain. [1] https://developer.arm.com/documentation/100863/latest The option `CV_SEMIHOSTING` is used to guard the bits in the code that are specific to the target. To build the code: cmake ../opencv/ \ -DCMAKE_TOOLCHAIN_FILE=../opencv/platforms/semihosting/aarch64-semihosting.toolchain.cmake \ -DSEMIHOSTING_TOOLCHAIN_PATH=/path/to/baremetal-toolchain/bin/ \ -DBUILD_EXAMPLES=ON -GNinja A barematel toolchain for targeting aarch64 semihosting can be found at [2], under `aarch64-none-elf`. [2] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads The folder `samples/semihosting` provides two example semihosting applications. The two binaries can be executed on the host platform with: qemu-aarch64 ./bin/example_semihosting_histogram qemu-aarch64 ./bin/example_semihosting_norm Similarly, the test and perf executables of the modules can be run with: qemu-aarch64 ./bin/opecv_[test|perf]_<module> Notice that filesystem support is disabled by the toolchain file, hence some of the test that depend on filesystem support will fail. * [semihosting] Remove blank like at the end of file. [NFC] The spurious blankline was reported by https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31158. * [semihosting] Make the raw pixel file generation OS independent. Use the facilities provided by Cmake to generate the header file instead of a shell script, so that the build doesn't fail on systems that do not have a unix shell. * [semihosting] Rename variable for semihosting compilation. * [semihosting] Move the cmake configuration to a variable file. * [semihosting] Make the guard macro private for the core module. * [semihosting] Remove space. [NFC] * [semihosting] Improve comment with information about semihosting. [NFC] * [semihosting] Update license statement on top of sourvce file. [NFC] * [semihosting] Replace BM_SUFFIX with SEMIHOSTING_SUFFIX. [NFC] * [semihosting] Remove double space. [NFC] * [semihosting] Add some text output to the sample applications. * [semihosting] Remove duplicate entry in cmake configuration. [NFCI] * [semihosting] Replace `long` with `int` in sample apps. [NFCI] * [semihosting] Use `configure_file` to create the random pixels. [NFCI] * [semihosting][bugfix] Fix name of cmakedefine variable. * [semihosting][samples] Use CV_8UC1 for grayscale images. [NFCI] * [semihosting] Add readme file. * [semihosting] Remove blank like at the end of README. [NFC] This fixes the failure at https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31272.
2021-07-21 23:46:05 +08:00
#ifndef OPENCV_SEMIHOSTING
/*
* Logic here is to try different methods of getting CPU counts and return
* the minimum most value as it has high probablity of being right and safe.
* Return 1 if we get 0 or not found on all methods.
*/
#if defined CV_CXX11 \
&& !defined(__MINGW32__) /* not implemented (2020-03) */ \
/*
* Check for this standard C++11 way, we do not return directly because
* running in a docker or K8s environment will mean this is the host
* machines config not the containers or pods and as per docs this value
* must be "considered only a hint".
*/
unsigned ncpus = std::thread::hardware_concurrency(); /* If the value is not well defined or not computable, returns 0 */
#else
unsigned ncpus = 0; /* 0 means we have to find out some other way */
#endif
2017-07-25 18:23:44 +08:00
#if defined _WIN32
SYSTEM_INFO sysinfo = {};
#if (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(WINRT)) && _WIN32_WINNT >= 0x501
GetNativeSystemInfo( &sysinfo );
#else
GetSystemInfo( &sysinfo );
#endif
unsigned ncpus_sysinfo = sysinfo.dwNumberOfProcessors;
ncpus = minNonZero(ncpus, ncpus_sysinfo);
#elif defined __APPLE__
int numCPU=0;
int mib[4];
size_t len = sizeof(numCPU);
/* set the mib for hw.ncpu */
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
/* get the number of CPUs from the system */
sysctl(mib, 2, &numCPU, &len, NULL, 0);
if( numCPU < 1 )
{
mib[1] = HW_NCPU;
sysctl( mib, 2, &numCPU, &len, NULL, 0 );
if( numCPU < 1 )
numCPU = 1;
}
ncpus = minNonZero(ncpus, (unsigned)numCPU);
#elif defined CV_CPU_GROUPS_1
#if defined CV_HAVE_CGROUPS
static unsigned ncpus_impl_cpuset = (unsigned)getNumberOfCPUsImpl("/sys/fs/cgroup/cpuset/cpuset.cpus");
ncpus = minNonZero(ncpus, ncpus_impl_cpuset);
static unsigned ncpus_impl_cfs = getNumberOfCPUsCFS();
ncpus = minNonZero(ncpus, ncpus_impl_cfs);
#endif
static unsigned ncpus_impl_devices = (unsigned)getNumberOfCPUsImpl("/sys/devices/system/cpu/online");
ncpus = minNonZero(ncpus, ncpus_impl_devices);
#endif
#if defined _GNU_SOURCE \
&& !defined(__MINGW32__) /* not implemented (2020-03) */ \
&& !defined(__EMSCRIPTEN__) \
&& !defined(__ANDROID__) // TODO: add check for modern Android NDK
cpu_set_t cpu_set;
if (0 == sched_getaffinity(0, sizeof(cpu_set), &cpu_set))
{
unsigned cpu_count_cpu_set = CPU_COUNT(&cpu_set);
ncpus = minNonZero(ncpus, cpu_count_cpu_set);
}
#endif
#if !defined(_WIN32) && !defined(__APPLE__) && defined(_SC_NPROCESSORS_ONLN)
static unsigned cpu_count_sysconf = (unsigned)sysconf( _SC_NPROCESSORS_ONLN );
ncpus = minNonZero(ncpus, cpu_count_sysconf);
#endif
return ncpus != 0 ? ncpus : 1;
Merge pull request #20392 from fpetrogalli:aarch64-semihosting AArch64 semihosting * [ts] Disable filesystem support in the TS module. Because of this change, all the tests loading data will file, but tat least the core module can be tested with the following line: opencv_test_core --gtest_filter=-"*Core_InputOutput*:*Core_globbing.accuracy*" * [aarch64] Build OpenCV for AArch64 semihosting. This patch provide a toolchain file that allows to build the library for semihosting applications [1]. Minimal changes have been applied to the code to be able to compile with a baremetal toolchain. [1] https://developer.arm.com/documentation/100863/latest The option `CV_SEMIHOSTING` is used to guard the bits in the code that are specific to the target. To build the code: cmake ../opencv/ \ -DCMAKE_TOOLCHAIN_FILE=../opencv/platforms/semihosting/aarch64-semihosting.toolchain.cmake \ -DSEMIHOSTING_TOOLCHAIN_PATH=/path/to/baremetal-toolchain/bin/ \ -DBUILD_EXAMPLES=ON -GNinja A barematel toolchain for targeting aarch64 semihosting can be found at [2], under `aarch64-none-elf`. [2] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads The folder `samples/semihosting` provides two example semihosting applications. The two binaries can be executed on the host platform with: qemu-aarch64 ./bin/example_semihosting_histogram qemu-aarch64 ./bin/example_semihosting_norm Similarly, the test and perf executables of the modules can be run with: qemu-aarch64 ./bin/opecv_[test|perf]_<module> Notice that filesystem support is disabled by the toolchain file, hence some of the test that depend on filesystem support will fail. * [semihosting] Remove blank like at the end of file. [NFC] The spurious blankline was reported by https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31158. * [semihosting] Make the raw pixel file generation OS independent. Use the facilities provided by Cmake to generate the header file instead of a shell script, so that the build doesn't fail on systems that do not have a unix shell. * [semihosting] Rename variable for semihosting compilation. * [semihosting] Move the cmake configuration to a variable file. * [semihosting] Make the guard macro private for the core module. * [semihosting] Remove space. [NFC] * [semihosting] Improve comment with information about semihosting. [NFC] * [semihosting] Update license statement on top of sourvce file. [NFC] * [semihosting] Replace BM_SUFFIX with SEMIHOSTING_SUFFIX. [NFC] * [semihosting] Remove double space. [NFC] * [semihosting] Add some text output to the sample applications. * [semihosting] Remove duplicate entry in cmake configuration. [NFCI] * [semihosting] Replace `long` with `int` in sample apps. [NFCI] * [semihosting] Use `configure_file` to create the random pixels. [NFCI] * [semihosting][bugfix] Fix name of cmakedefine variable. * [semihosting][samples] Use CV_8UC1 for grayscale images. [NFCI] * [semihosting] Add readme file. * [semihosting] Remove blank like at the end of README. [NFC] This fixes the failure at https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31272.
2021-07-21 23:46:05 +08:00
#else // OPENCV_SEMIHOSTING
return 1;
#endif //OPENCV_SEMIHOSTING
}
int getNumberOfCPUs()
{
static int nCPUs = getNumberOfCPUs_();
return nCPUs; // cached value
}
#else // OPENCV_DISABLE_THREAD_SUPPORT
int getNumberOfCPUs()
{
return 1;
}
#endif // OPENCV_DISABLE_THREAD_SUPPORT
const char* currentParallelFramework()
{
std::shared_ptr<ParallelForAPI>& api = getCurrentParallelForAPI();
if (api)
{
return api->getName();
}
#ifdef CV_PARALLEL_FRAMEWORK
return CV_PARALLEL_FRAMEWORK;
#else
return NULL;
#endif
}
} // namespace cv::
CV_IMPL void cvSetNumThreads(int nt)
{
cv::setNumThreads(nt);
}
CV_IMPL int cvGetNumThreads()
{
return cv::getNumThreads();
}
CV_IMPL int cvGetThreadNum()
{
return cv::getThreadNum();
2013-02-04 21:34:18 +08:00
}