mirror of
https://github.com/microsoft/vcpkg.git
synced 2024-12-17 14:57:55 +08:00
92 lines
3.8 KiB
Diff
92 lines
3.8 KiB
Diff
diff --git a/hwy/contrib/thread_pool/topology.cc b/hwy/contrib/thread_pool/topology.cc
|
|
index 3d24f4f4..17f35630 100644
|
|
--- a/hwy/contrib/thread_pool/topology.cc
|
|
+++ b/hwy/contrib/thread_pool/topology.cc
|
|
@@ -64,7 +64,7 @@
|
|
|
|
namespace hwy {
|
|
|
|
-HWY_DLLEXPORT bool HaveThreadingSupport() {
|
|
+HWY_CONTRIB_DLLEXPORT bool HaveThreadingSupport() {
|
|
#if HWY_ARCH_WASM
|
|
return emscripten_has_threading_support() != 0;
|
|
#else
|
|
@@ -72,7 +72,7 @@ HWY_DLLEXPORT bool HaveThreadingSupport() {
|
|
#endif
|
|
}
|
|
|
|
-HWY_DLLEXPORT size_t TotalLogicalProcessors() {
|
|
+HWY_CONTRIB_DLLEXPORT size_t TotalLogicalProcessors() {
|
|
size_t lp = 0;
|
|
#if HWY_ARCH_WASM
|
|
const int num_cores = emscripten_num_logical_cores();
|
|
@@ -111,7 +111,7 @@ HWY_DLLEXPORT size_t TotalLogicalProcessors() {
|
|
#include <sys/syscall.h>
|
|
#endif
|
|
|
|
-HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) {
|
|
+HWY_CONTRIB_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) {
|
|
#if HWY_OS_WIN
|
|
// Only support the first 64 because WINE does not support processor groups.
|
|
const HANDLE hThread = GetCurrentThread();
|
|
@@ -173,7 +173,7 @@ HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps) {
|
|
#endif
|
|
}
|
|
|
|
-HWY_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps) {
|
|
+HWY_CONTRIB_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps) {
|
|
#if HWY_OS_WIN
|
|
const HANDLE hThread = GetCurrentThread();
|
|
const DWORD_PTR prev = SetThreadAffinityMask(hThread, lps.Get64());
|
|
@@ -385,7 +385,7 @@ std::vector<PerPackage> DetectPackages(std::vector<Topology::LP>& lps) {
|
|
} // namespace
|
|
#endif // HWY_OS_LINUX
|
|
|
|
-HWY_DLLEXPORT Topology::Topology() {
|
|
+HWY_CONTRIB_DLLEXPORT Topology::Topology() {
|
|
#if HWY_OS_LINUX
|
|
lps.resize(TotalLogicalProcessors());
|
|
const std::vector<PerPackage>& per_package = DetectPackages(lps);
|
|
diff --git a/hwy/contrib/thread_pool/topology.h b/hwy/contrib/thread_pool/topology.h
|
|
index 95b0835b..f80fc47c 100644
|
|
--- a/hwy/contrib/thread_pool/topology.h
|
|
+++ b/hwy/contrib/thread_pool/topology.h
|
|
@@ -28,7 +28,7 @@
|
|
namespace hwy {
|
|
|
|
// Returns false if std::thread should not be used.
|
|
-HWY_DLLEXPORT bool HaveThreadingSupport();
|
|
+HWY_CONTRIB_DLLEXPORT bool HaveThreadingSupport();
|
|
|
|
// Upper bound on logical processors, including hyperthreads.
|
|
static constexpr size_t kMaxLogicalProcessors = 1024; // matches glibc
|
|
@@ -38,12 +38,12 @@ using LogicalProcessorSet = BitSet4096<kMaxLogicalProcessors>;
|
|
|
|
// Returns false, or sets `lps` to all logical processors which are online and
|
|
// available to the current thread.
|
|
-HWY_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps);
|
|
+HWY_CONTRIB_DLLEXPORT bool GetThreadAffinity(LogicalProcessorSet& lps);
|
|
|
|
// Ensures the current thread can only run on the logical processors in `lps`.
|
|
// Returns false if not supported (in particular on Apple), or if the
|
|
// intersection between `lps` and `GetThreadAffinity` is the empty set.
|
|
-HWY_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps);
|
|
+HWY_CONTRIB_DLLEXPORT bool SetThreadAffinity(const LogicalProcessorSet& lps);
|
|
|
|
// Returns false, or ensures the current thread will only run on `lp`, which
|
|
// must not exceed `TotalLogicalProcessors`. Note that this merely calls
|
|
@@ -58,11 +58,11 @@ static inline bool PinThreadToLogicalProcessor(size_t lp) {
|
|
// provided by the hardware clamped to `kMaxLogicalProcessors`.
|
|
// These processors are not necessarily all usable; you can determine which are
|
|
// via GetThreadAffinity().
|
|
-HWY_DLLEXPORT size_t TotalLogicalProcessors();
|
|
+HWY_CONTRIB_DLLEXPORT size_t TotalLogicalProcessors();
|
|
|
|
struct Topology {
|
|
// Caller must check packages.empty(); if so, do not use any fields.
|
|
- HWY_DLLEXPORT Topology();
|
|
+ HWY_CONTRIB_DLLEXPORT Topology();
|
|
|
|
// Clique of cores with lower latency to each other. On Apple M1 these are
|
|
// four cores sharing an L2. On Zen4 these 'CCX' are up to eight cores sharing
|