Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
wnuma.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2010 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "precompiled.h"
24 #include "lib/sysdep/numa.h"
25 
26 #include "lib/bits.h" // PopulationCount
27 #include "lib/alignment.h"
28 #include "lib/timer.h"
29 #include "lib/module_init.h"
30 #include "lib/sysdep/vm.h"
31 #include "lib/sysdep/acpi.h"
32 #include "lib/sysdep/os_cpu.h"
33 #include "lib/sysdep/os/win/win.h"
35 #include "lib/sysdep/os/win/wcpu.h"
36 #include <Psapi.h>
37 
38 #if ARCH_X86_X64
39 #include "lib/sysdep/arch/x86_x64/apic.h" // ProcessorFromApicId
40 #endif
41 
42 
43 //-----------------------------------------------------------------------------
44 // nodes
45 
46 struct Node // POD
47 {
48  // (Windows doesn't guarantee node numbers are contiguous, so
49  // we associate them with contiguous indices in nodes[])
50  UCHAR nodeNumber;
51 
53  uintptr_t processorMask;
54 };
55 
57 static size_t numNodes;
58 
59 static Node* AddNode()
60 {
61  ENSURE(numNodes < ARRAY_SIZE(nodes));
62  return &nodes[numNodes++];
63 }
64 
65 static Node* FindNodeWithProcessorMask(uintptr_t processorMask)
66 {
67  for(size_t node = 0; node < numNodes; node++)
68  {
69  if(nodes[node].processorMask == processorMask)
70  return &nodes[node];
71  }
72 
73  return 0;
74 }
75 
76 static Node* FindNodeWithProcessor(size_t processor)
77 {
78  for(size_t node = 0; node < numNodes; node++)
79  {
80  if(IsBitSet(nodes[node].processorMask, processor))
81  return &nodes[node];
82  }
83 
84  return 0;
85 }
86 
87 
88 //-----------------------------------------------------------------------------
89 // Windows topology
90 
91 static UCHAR HighestNodeNumber()
92 {
93  WUTIL_FUNC(pGetNumaHighestNodeNumber, BOOL, (PULONG));
94  WUTIL_IMPORT_KERNEL32(GetNumaHighestNodeNumber, pGetNumaHighestNodeNumber);
95  if(!pGetNumaHighestNodeNumber)
96  return 0; // NUMA not supported => only one node
97 
98  ULONG highestNodeNumber;
99  const BOOL ok = pGetNumaHighestNodeNumber(&highestNodeNumber);
100  WARN_IF_FALSE(ok);
101  return (UCHAR)highestNodeNumber;
102 }
103 
104 static void PopulateNodes()
105 {
106  WUTIL_FUNC(pGetNumaNodeProcessorMask, BOOL, (UCHAR, PULONGLONG));
107  WUTIL_IMPORT_KERNEL32(GetNumaNodeProcessorMask, pGetNumaNodeProcessorMask);
108  if(!pGetNumaNodeProcessorMask)
109  return;
110 
111  DWORD_PTR processAffinity, systemAffinity;
112  {
113  const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity);
114  WARN_IF_FALSE(ok);
115  }
116  ENSURE(PopulationCount(processAffinity) <= PopulationCount(systemAffinity));
117 
118  for(UCHAR nodeNumber = 0; nodeNumber <= HighestNodeNumber(); nodeNumber++)
119  {
120  ULONGLONG affinity;
121  {
122  const BOOL ok = pGetNumaNodeProcessorMask(nodeNumber, &affinity);
123  WARN_IF_FALSE(ok);
124  }
125  if(!affinity)
126  continue; // empty node, skip
127 
128  Node* node = AddNode();
129  node->nodeNumber = nodeNumber;
130  node->processorMask = wcpu_ProcessorMaskFromAffinity(processAffinity, (DWORD_PTR)affinity);
131  }
132 }
133 
134 
135 //-----------------------------------------------------------------------------
136 // ACPI SRAT topology
137 
138 #if ARCH_X86_X64
139 
140 #pragma pack(push, 1)
141 
142 // fields common to Affinity* structures
143 struct AffinityHeader
144 {
145  u8 type;
146  u8 length; // size [bytes], including this header
147 };
148 
149 struct AffinityAPIC
150 {
151  static const u8 type = 0;
152 
153  AffinityHeader header;
154  u8 proximityDomainNumber0;
155  u8 apicId;
156  u32 flags;
157  u8 sapicId;
158  u8 proximityDomainNumber123[3];
159  u32 clockDomain;
160 
161  u32 ProximityDomainNumber() const
162  {
163  // (this is the apparent result of backwards compatibility, ugh.)
164  u32 proximityDomainNumber;
165  memcpy(&proximityDomainNumber, &proximityDomainNumber123[0]-1, sizeof(proximityDomainNumber));
166  proximityDomainNumber &= ~0xFF;
167  proximityDomainNumber |= proximityDomainNumber0;
168  return proximityDomainNumber;
169  }
170 };
171 
172 struct AffinityMemory
173 {
174  static const u8 type = 1;
175 
176  AffinityHeader header;
177  u32 proximityDomainNumber;
178  u16 reserved1;
179  u64 baseAddress;
180  u64 length;
181  u32 reserved2;
182  u32 flags;
183  u64 reserved3;
184 };
185 
186 // AffinityX2APIC omitted, since the APIC ID is sufficient for our purposes
187 
188 // Static Resource Affinity Table
189 struct SRAT
190 {
191  AcpiTable header;
192  u32 reserved1;
193  u8 reserved2[8];
194  AffinityHeader affinities[1];
195 };
196 
197 #pragma pack(pop)
198 
199 template<class Affinity>
200 static const Affinity* DynamicCastFromHeader(const AffinityHeader* header)
201 {
202  if(header->type != Affinity::type)
203  return 0;
204 
205  // sanity check: ensure no padding was inserted
206  ENSURE(header->length == sizeof(Affinity));
207 
208  const Affinity* affinity = (const Affinity*)header;
209  if(!IsBitSet(affinity->flags, 0)) // not enabled
210  return 0;
211 
212  return affinity;
213 }
214 
215 struct ProximityDomain
216 {
217  uintptr_t processorMask;
218  // (AffinityMemory's fields are not currently needed)
219 };
220 
221 typedef std::map<u32, ProximityDomain> ProximityDomains;
222 
223 static ProximityDomains ExtractProximityDomainsFromSRAT(const SRAT* srat)
224 {
225  ProximityDomains proximityDomains;
226 
227  for(const AffinityHeader* header = srat->affinities;
228  header < (const AffinityHeader*)(uintptr_t(srat)+srat->header.size);
229  header = (const AffinityHeader*)(uintptr_t(header) + header->length))
230  {
231  const AffinityAPIC* affinityAPIC = DynamicCastFromHeader<AffinityAPIC>(header);
232  if(affinityAPIC)
233  {
234  const size_t processor = ProcessorFromApicId(affinityAPIC->apicId);
235  const u32 proximityDomainNumber = affinityAPIC->ProximityDomainNumber();
236  ProximityDomain& proximityDomain = proximityDomains[proximityDomainNumber];
237  proximityDomain.processorMask |= Bit<uintptr_t>(processor);
238  }
239  }
240 
241  return proximityDomains;
242 }
243 
244 static void PopulateNodesFromProximityDomains(const ProximityDomains& proximityDomains)
245 {
246  for(ProximityDomains::const_iterator it = proximityDomains.begin(); it != proximityDomains.end(); ++it)
247  {
248  const u32 proximityDomainNumber = it->first;
249  const ProximityDomain& proximityDomain = it->second;
250 
251  Node* node = FindNodeWithProcessorMask(proximityDomain.processorMask);
252  if(!node)
253  node = AddNode();
254  // (we don't know Windows' nodeNumber; it has hopefully already been set)
255  node->proximityDomainNumber = proximityDomainNumber;
256  node->processorMask = proximityDomain.processorMask;
257  }
258 }
259 
260 #endif // #if ARCH_X86_X64
261 
262 
263 //-----------------------------------------------------------------------------
264 
266 
268 {
269  PopulateNodes();
270 
271 #if ARCH_X86_X64
272  const SRAT* srat = (const SRAT*)acpi_GetTable("SRAT");
273  if(srat && AreApicIdsReliable())
274  {
275  const ProximityDomains proximityDomains = ExtractProximityDomainsFromSRAT(srat);
276  PopulateNodesFromProximityDomains(proximityDomains);
277  }
278 #endif
279 
280  // neither OS nor ACPI information is available
281  if(numNodes == 0)
282  {
283  // add dummy node that contains all system processors
284  Node* node = AddNode();
285  node->nodeNumber = 0;
286  node->proximityDomainNumber = 0;
288  }
289 
290  return INFO::OK;
291 }
292 
294 {
295  (void)ModuleInit(&initState, InitTopology);
296  return numNodes;
297 }
298 
299 size_t numa_NodeFromProcessor(size_t processor)
300 {
301  (void)ModuleInit(&initState, InitTopology);
302  ENSURE(processor < os_cpu_NumProcessors());
303  Node* node = FindNodeWithProcessor(processor);
304  ENSURE(node);
305  return nodes-node;
306 }
307 
308 uintptr_t numa_ProcessorMaskFromNode(size_t node)
309 {
310  (void)ModuleInit(&initState, InitTopology);
311  ENSURE(node < numNodes);
312  return nodes[node].processorMask;
313 }
314 
315 static UCHAR NodeNumberFromNode(size_t node)
316 {
317  (void)ModuleInit(&initState, InitTopology);
318  ENSURE(node < numa_NumNodes());
319  return nodes[node].nodeNumber;
320 }
321 
322 
323 //-----------------------------------------------------------------------------
324 // memory info
325 
326 size_t numa_AvailableMemory(size_t node)
327 {
328  // note: it is said that GetNumaAvailableMemoryNode sometimes incorrectly
329  // reports zero bytes. the actual cause may however be unexpected
330  // RAM configuration, e.g. not all slots filled.
331  WUTIL_FUNC(pGetNumaAvailableMemoryNode, BOOL, (UCHAR, PULONGLONG));
332  WUTIL_IMPORT_KERNEL32(GetNumaAvailableMemoryNode, pGetNumaAvailableMemoryNode);
333  if(pGetNumaAvailableMemoryNode)
334  {
335  const UCHAR nodeNumber = NodeNumberFromNode(node);
336  ULONGLONG availableBytes;
337  const BOOL ok = pGetNumaAvailableMemoryNode(nodeNumber, &availableBytes);
338  WARN_IF_FALSE(ok);
339  const size_t availableMiB = size_t(availableBytes / MiB);
340  return availableMiB;
341  }
342  // NUMA not supported - return available system memory
343  else
344  return os_cpu_MemoryAvailable();
345 }
346 
347 
348 #pragma pack(push, 1)
349 
350 // ACPI System Locality Information Table
351 // (System Locality == Proximity Domain)
352 struct SLIT
353 {
356  u8 entries[1]; // numSystemLocalities*numSystemLocalities entries
357 };
358 
359 #pragma pack(pop)
360 
361 static double ReadRelativeDistanceFromSLIT(const SLIT* slit)
362 {
363  const size_t n = slit->numSystemLocalities;
364  ENSURE(slit->header.size == sizeof(SLIT)-sizeof(slit->entries)+n*n);
365  // diagonals are specified to be 10
366  for(size_t i = 0; i < n; i++)
367  ENSURE(slit->entries[i*n+i] == 10);
368  // entries = relativeDistance * 10
369  return *std::max_element(slit->entries, slit->entries+n*n) / 10.0;
370 }
371 
372 // @return ratio between max/min time required to access one node's
373 // memory from each processor.
374 static double MeasureRelativeDistance()
375 {
376  const size_t size = 32*MiB;
377  void* mem = vm::Allocate(size);
378  ASSUME_ALIGNED(mem, pageSize);
379 
380  const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask());
381 
382  double minTime = 1e10, maxTime = 0.0;
383  for(size_t node = 0; node < numa_NumNodes(); node++)
384  {
385  const uintptr_t processorMask = numa_ProcessorMaskFromNode(node);
386  os_cpu_SetThreadAffinityMask(processorMask);
387 
388  const double startTime = timer_Time();
389  memset(mem, 0, size);
390  const double elapsedTime = timer_Time() - startTime;
391 
392  minTime = std::min(minTime, elapsedTime);
393  maxTime = std::max(maxTime, elapsedTime);
394  }
395 
396  (void)os_cpu_SetThreadAffinityMask(previousProcessorMask);
397 
398  vm::Free(mem, size);
399 
400  return maxTime / minTime;
401 }
402 
403 static double relativeDistance;
404 
406 {
407  // early-out for non-NUMA systems (saves some time)
408  if(numa_NumNodes() == 1)
409  {
410  relativeDistance = 1.0;
411  return INFO::OK;
412  }
413 
414  // trust values reported by the BIOS, if available
415  const SLIT* slit = (const SLIT*)acpi_GetTable("SLIT");
416  if(slit)
417  relativeDistance = ReadRelativeDistanceFromSLIT(slit);
418  else
419  relativeDistance = MeasureRelativeDistance();
420 
421  ENSURE(relativeDistance >= 1.0);
422  ENSURE(relativeDistance <= 4.0);
423  return INFO::OK;
424 }
425 
426 double numa_Factor()
427 {
428  static ModuleInitState initState;
429  (void)ModuleInit(&initState, InitRelativeDistance);
430  return relativeDistance;
431 }
432 
433 
434 static bool IsMemoryInterleaved()
435 {
436  if(numa_NumNodes() == 1)
437  return false;
438 
439  if(!acpi_GetTable("FACP")) // no ACPI tables available
440  return false; // indeterminate, assume not interleaved
441 
442  if(acpi_GetTable("SRAT")) // present iff not interleaved
443  return false;
444 
445  return true;
446 }
447 
449 
451 {
452  isMemoryInterleaved = IsMemoryInterleaved();
453  return INFO::OK;
454 }
455 
457 {
458  static ModuleInitState initState;
459  (void)ModuleInit(&initState, InitMemoryInterleaved);
460  return isMemoryInterleaved;
461 }
462 
463 
464 //-----------------------------------------------------------------------------
465 
466 #if 0
467 
468 static bool VerifyPages(void* mem, size_t size, size_t pageSize, size_t node)
469 {
470  WUTIL_FUNC(pQueryWorkingSetEx, BOOL, (HANDLE, PVOID, DWORD));
471  WUTIL_IMPORT_KERNEL32(QueryWorkingSetEx, pQueryWorkingSetEx);
472  if(!pQueryWorkingSetEx)
473  return true; // can't do anything
474 
475 #if WINVER >= 0x600
477  ENSURE(largePageSize != 0); // this value is needed for later
478 
479  // retrieve attributes of all pages constituting mem
480  const size_t numPages = (size + pageSize-1) / pageSize;
481  PSAPI_WORKING_SET_EX_INFORMATION* wsi = new PSAPI_WORKING_SET_EX_INFORMATION[numPages];
482  for(size_t i = 0; i < numPages; i++)
483  wsi[i].VirtualAddress = (u8*)mem + i*pageSize;
484  pQueryWorkingSetEx(GetCurrentProcess(), wsi, DWORD(sizeof(PSAPI_WORKING_SET_EX_INFORMATION)*numPages));
485 
486  // ensure each is valid and allocated on the correct node
487  for(size_t i = 0; i < numPages; i++)
488  {
489  const PSAPI_WORKING_SET_EX_BLOCK& attributes = wsi[i].VirtualAttributes;
490  if(!attributes.Valid)
491  return false;
492  if((attributes.LargePage != 0) != (pageSize == largePageSize))
493  {
494  debug_printf(L"NUMA: is not a large page\n");
495  return false;
496  }
497  if(attributes.Node != node)
498  {
499  debug_printf(L"NUMA: allocated from remote node\n");
500  return false;
501  }
502  }
503 
504  delete[] wsi;
505 #else
506  UNUSED2(mem);
507  UNUSED2(size);
508  UNUSED2(pageSize);
509  UNUSED2(node);
510 #endif
511 
512  return true;
513 }
514 
515 #endif
#define u8
Definition: types.h:39
u32 proximityDomainNumber
Definition: wnuma.cpp:52
bool AreApicIdsReliable()
Definition: apic.cpp:105
LIB_API size_t numa_NodeFromProcessor(size_t processor)
Definition: unuma.cpp:34
static const size_t pageSize
Definition: alignment.h:61
const Status OK
Definition: status.h:386
#define WUTIL_FUNC(varName, ret, params)
Definition: wutil.h:44
static const size_t os_cpu_MaxProcessors
maximum number of processors supported by the OS (determined by the number of bits in an affinity mas...
Definition: os_cpu.h:50
#define ASSUME_ALIGNED(ptr, multiple)
u8 entries[1]
Definition: wnuma.cpp:356
static size_t numNodes
Definition: wnuma.cpp:57
LIB_API size_t numa_AvailableMemory(size_t node)
Definition: unuma.cpp:45
static Status InitMemoryInterleaved()
Definition: wnuma.cpp:450
Definition: wnuma.cpp:46
static size_t PopulationCount(T x)
Definition: bits.h:148
static Node nodes[os_cpu_MaxProcessors]
Definition: wnuma.cpp:56
const AcpiTable * acpi_GetTable(const char *signature)
Definition: acpi.cpp:362
LIB_API double numa_Factor()
Definition: unuma.cpp:51
UCHAR nodeNumber
Definition: wnuma.cpp:50
static UCHAR HighestNodeNumber()
Definition: wnuma.cpp:91
LIB_API uintptr_t numa_ProcessorMaskFromNode(size_t node)
Definition: unuma.cpp:39
size_t os_cpu_NumProcessors()
Definition: bcpu.cpp:34
size_t os_cpu_LargePageSize()
Definition: bcpu.cpp:79
uintptr_t processorMask
Definition: wnuma.cpp:53
size_t ProcessorFromApicId(ApicId apicId)
Definition: apic.cpp:129
int BOOL
Definition: wgl.h:51
uintptr_t os_cpu_SetThreadAffinityMask(uintptr_t processorMask)
restrict the current thread to a set of processors.
Definition: bcpu.cpp:109
u32 size
Definition: acpi.h:36
#define ARRAY_SIZE(name)
static UCHAR NodeNumberFromNode(size_t node)
Definition: wnuma.cpp:315
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
#define UNUSED2(param)
mark a function local variable or parameter as unused and avoid the corresponding compiler warning...
intptr_t ModuleInitState
initialization state of a module (class, source file, etc.) must be initialized to zero (e...
Definition: module_init.h:35
static void PopulateNodes()
Definition: wnuma.cpp:104
void * HANDLE
Definition: wgl.h:62
bool IsBitSet(T value, size_t index)
Definition: bits.h:54
size_t os_cpu_MemoryAvailable()
Definition: bcpu.cpp:98
unsigned long DWORD
Definition: wgl.h:56
uintptr_t wcpu_ProcessorMaskFromAffinity(DWORD_PTR processAffinity, DWORD_PTR affinity)
Definition: wcpu.cpp:189
static const size_t MiB
Definition: alignment.h:72
static Node * FindNodeWithProcessorMask(uintptr_t processorMask)
Definition: wnuma.cpp:65
static double relativeDistance
Definition: wnuma.cpp:403
Definition: acpi.h:33
i64 Status
Error handling system.
Definition: status.h:171
AcpiTable header
Definition: wnuma.cpp:354
static bool IsMemoryInterleaved()
Definition: wnuma.cpp:434
double timer_Time()
Definition: timer.cpp:98
uintptr_t os_cpu_ProcessorMask()
Definition: bcpu.cpp:57
static Node * FindNodeWithProcessor(size_t processor)
Definition: wnuma.cpp:76
#define u16
Definition: types.h:40
static Status InitTopology()
Definition: wnuma.cpp:267
#define u64
Definition: types.h:42
static double MeasureRelativeDistance()
Definition: wnuma.cpp:374
LIB_API size_t numa_NumNodes()
Definition: unuma.cpp:29
LIB_API bool numa_IsMemoryInterleaved()
Definition: unuma.cpp:56
static bool isMemoryInterleaved
Definition: wnuma.cpp:448
#define u32
Definition: types.h:41
#define WARN_IF_FALSE(expression)
Definition: status.h:360
static const size_t largePageSize
Definition: alignment.h:62
static ModuleInitState initState
Definition: wnuma.cpp:265
void * Allocate(size_t size, PageType pageType, int prot)
reserve address space and commit memory.
Definition: uvm.cpp:98
static double ReadRelativeDistanceFromSLIT(const SLIT *slit)
Definition: wnuma.cpp:361
Status ModuleInit(volatile ModuleInitState *initState, Status(*init)())
calls a user-defined init function if initState is zero.
Definition: module_init.cpp:40
void Free(void *p, size_t size)
decommit memory and release address space.
Definition: uvm.cpp:113
#define WUTIL_IMPORT_KERNEL32(procName, varName)
Definition: wutil.h:63
static Node * AddNode()
Definition: wnuma.cpp:59
void debug_printf(const wchar_t *fmt,...)
write a formatted string to the debug channel, subject to filtering (see below).
Definition: debug.cpp:142
Definition: wnuma.cpp:352
static Status InitRelativeDistance()
Definition: wnuma.cpp:405
u64 numSystemLocalities
Definition: wnuma.cpp:355