diff --git a/hwy/contrib/thread_pool/topology.cc b/hwy/contrib/thread_pool/topology.cc index 3d24f4f4..17f35630 100644 --- a/hwy/contrib/thread_pool/topology.cc +++ b/hwy/contrib/thread_pool/topology.cc @@ -64,7 +64,7 @@ namespace hwy { -HWY_DLLEXPORT bool HaveThreadingSupport() { +HWY_CONTRIB_DLLEXPORT bool HaveThreadingSupport() { #if HWY_ARCH_WASM return emscripten_has_threading_support() != 0; #else @@ -72,7 +72,7 @@ HWY_DLLEXPORT bool HaveThreadingSupport() { #endif } -HWY_DLLEXPORT size_t TotalLogicalProcessors() { +HWY_CONTRIB_DLLEXPORT size_t TotalLogicalProcessors() { size_t lp = 0; #if HWY_ARCH_WASM const int num_cores = emscripten_num_logical_cores(); @@ -111,7 +111,7 @@ HWY_DLLEXPORT size_t TotalLogicalProcessors() { #include #endif -HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) { +HWY_CONTRIB_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) { #if HWY_OS_WIN // Only support the first 64 because WINE does not support processor groups. const HANDLE hThread = GetCurrentThread(); @@ -173,7 +173,7 @@ HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) { #endif } -HWY_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps) { +HWY_CONTRIB_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps) { #if HWY_OS_WIN const HANDLE hThread = GetCurrentThread(); const DWORD_PTR prev = SetThreadAffinityMask(hThread, lps.Get64()); @@ -385,7 +385,7 @@ std::vector DetectPackages(std::vector& lps) { } // namespace #endif // HWY_OS_LINUX -HWY_DLLEXPORT Topology::Topology() { +HWY_CONTRIB_DLLEXPORT Topology::Topology() { #if HWY_OS_LINUX lps.resize(TotalLogicalProcessors()); const std::vector& per_package = DetectPackages(lps); diff --git a/hwy/contrib/thread_pool/topology.h b/hwy/contrib/thread_pool/topology.h index 95b0835b..f80fc47c 100644 --- a/hwy/contrib/thread_pool/topology.h +++ b/hwy/contrib/thread_pool/topology.h @@ -28,7 +28,7 @@ namespace hwy { // Returns false if std::thread should not be used. -HWY_DLLEXPORT bool HaveThreadingSupport(); +HWY_CONTRIB_DLLEXPORT bool HaveThreadingSupport(); // Upper bound on logical processors, including hyperthreads. static constexpr size_t kMaxLogicalProcessors = 1024; // matches glibc @@ -38,12 +38,12 @@ using LogicalProcessorSet = BitSet4096; // Returns false, or sets `lps` to all logical processors which are online and // available to the current thread. -HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps); +HWY_CONTRIB_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps); // Ensures the current thread can only run on the logical processors in `lps`. // Returns false if not supported (in particular on Apple), or if the // intersection between `lps` and `GetThreadAffinity` is the empty set. -HWY_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps); +HWY_CONTRIB_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps); // Returns false, or ensures the current thread will only run on `lp`, which // must not exceed `TotalLogicalProcessors`. Note that this merely calls @@ -58,11 +58,11 @@ static inline bool PinThreadToLogicalProcessor(size_t lp) { // provided by the hardware clamped to `kMaxLogicalProcessors`. // These processors are not necessarily all usable; you can determine which are // via GetThreadAffinity(). -HWY_DLLEXPORT size_t TotalLogicalProcessors(); +HWY_CONTRIB_DLLEXPORT size_t TotalLogicalProcessors(); struct Topology { // Caller must check packages.empty(); if so, do not use any fields. - HWY_DLLEXPORT Topology(); + HWY_CONTRIB_DLLEXPORT Topology(); // Clique of cores with lower latency to each other. On Apple M1 these are // four cores sharing an L2. On Zen4 these 'CCX' are up to eight cores sharing