Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
topology.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2011 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * detection of CPU and cache topology
25  */
26 
27 #include "precompiled.h"
29 
30 #include <bitset>
31 
32 #include "lib/bits.h"
33 #include "lib/module_init.h"
34 #include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING
35 #include "lib/sysdep/os_cpu.h"
36 #include "lib/sysdep/numa.h"
40 
41 namespace topology {
42 
43 //---------------------------------------------------------------------------------------------------------------------
44 // detect *maximum* number of cores/packages/caches.
45 // note: some of them may be disabled by the OS or BIOS.
46 // note: Intel Appnote 485 assures us that they are uniform across packages.
47 
48 static size_t MaxCoresPerPackage()
49 {
50  // assume single-core unless one of the following applies:
51  size_t maxCoresPerPackage = 1;
52 
53  x86_x64::CpuidRegs regs = { 0 };
54  switch(x86_x64::Vendor())
55  {
57  regs.eax = 4;
58  regs.ecx = 0;
59  if(x86_x64::cpuid(&regs))
60  maxCoresPerPackage = bits(regs.eax, 26, 31)+1;
61  break;
62 
64  regs.eax = 0x80000008;
65  if(x86_x64::cpuid(&regs))
66  maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
67  break;
68 
69  default:
70  break;
71  }
72 
73  return maxCoresPerPackage;
74 }
75 
76 
77 static size_t MaxLogicalPerCore()
78 {
79  struct IsHyperthreadingCapable
80  {
81  bool operator()() const
82  {
83  // definitely not
85  return false;
86 
87  // multi-core AMD systems falsely set the HT bit for reasons of
88  // compatibility. we'll just ignore it, because clearing it might
89  // confuse other callers.
91  return false;
92 
93  return true;
94  }
95  };
96  if(IsHyperthreadingCapable()())
97  {
98  x86_x64::CpuidRegs regs = { 0 };
99  regs.eax = 1;
100  if(!x86_x64::cpuid(&regs))
102  const size_t logicalPerPackage = bits(regs.ebx, 16, 23);
103  const size_t maxCoresPerPackage = MaxCoresPerPackage();
104  // cores ought to be uniform WRT # logical processors
105  ENSURE(logicalPerPackage % maxCoresPerPackage == 0);
106  const size_t maxLogicalPerCore = logicalPerPackage / maxCoresPerPackage;
107  return maxLogicalPerCore;
108  }
109  else
110  return 1;
111 }
112 
113 
114 static size_t MaxLogicalPerCache()
115 {
117 }
118 
119 
120 //---------------------------------------------------------------------------------------------------------------------
121 // CPU topology interface
122 
123 // APIC IDs consist of variable-length bit fields indicating the logical,
124 // core, package and cache IDs. Vol3a says they aren't guaranteed to be
125 // contiguous, but that also applies to the individual fields.
126 // for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7.
127 struct ApicField // POD
128 {
129  size_t operator()(size_t bits) const
130  {
131  return (bits >> shift) & mask;
132  }
133 
134  size_t mask; // zero for zero-width fields
135  size_t shift;
136 };
137 
138 struct CpuTopology // POD
139 {
140  size_t numProcessors; // total reported by OS
141 
145 
146  // how many are actually enabled
149  size_t numPackages;
150 };
153 
155 {
157 
158  const size_t maxLogicalPerCore = MaxLogicalPerCore();
159  const size_t maxCoresPerPackage = MaxCoresPerPackage();
160  const size_t maxPackages = 256; // "enough"
161 
162  const size_t logicalWidth = ceil_log2(maxLogicalPerCore);
163  const size_t coreWidth = ceil_log2(maxCoresPerPackage);
164  const size_t packageWidth = ceil_log2(maxPackages);
165 
166  cpuTopology.logical.mask = bit_mask<size_t>(logicalWidth);
167  cpuTopology.core.mask = bit_mask<size_t>(coreWidth);
168  cpuTopology.package.mask = bit_mask<size_t>(packageWidth);
169 
171  cpuTopology.core.shift = logicalWidth;
172  cpuTopology.package.shift = logicalWidth + coreWidth;
173 
174  if(AreApicIdsReliable())
175  {
176  struct NumUniqueValuesInField
177  {
178  size_t operator()(const ApicField& apicField) const
179  {
180  std::bitset<os_cpu_MaxProcessors> values;
181  for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
182  {
183  const ApicId apicId = ApicIdFromProcessor(processor);
184  const size_t value = apicField(apicId);
185  values.set(value);
186  }
187  return values.count();
188  }
189  };
190 
191  cpuTopology.logicalPerCore = NumUniqueValuesInField()(cpuTopology.logical);
192  cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.core);
193  cpuTopology.numPackages = NumUniqueValuesInField()(cpuTopology.package);
194  }
195  else // processor lacks an xAPIC, or IDs are invalid
196  {
197  struct MinPackages
198  {
199  size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
200  {
201  const size_t numNodes = numa_NumNodes();
202  const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
203  // NB: some cores or logical processors may be disabled.
204  const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
205  const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
206  return minPackagesPerNode*numNodes;
207  }
208  };
209 
210  // we can't differentiate between cores and logical processors.
211  // since the former are less likely to be disabled, we seek the
212  // maximum feasible number of cores and minimal number of packages:
213  const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore);
214  for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++)
215  {
216  if(cpuTopology.numProcessors % numPackages != 0)
217  continue;
218  const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages;
219  const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
220  for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
221  {
222  if(logicalPerPackage % coresPerPackage != 0)
223  continue;
224  const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
225  if(logicalPerCore <= maxLogicalPerCore)
226  {
227  ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore);
228  cpuTopology.logicalPerCore = logicalPerCore;
229  cpuTopology.coresPerPackage = coresPerPackage;
230  cpuTopology.numPackages = numPackages;
231 
232  return INFO::OK;
233  }
234  }
235  }
236 
237  DEBUG_WARN_ERR(ERR::LOGIC); // didn't find a feasible topology
238  }
239 
240  return INFO::OK;
241 }
242 
243 
244 size_t NumPackages()
245 {
247  return cpuTopology.numPackages;
248 }
249 
251 {
254 }
255 
257 {
260 }
261 
262 size_t LogicalFromApicId(ApicId apicId)
263 {
264  const size_t contiguousId = ContiguousIdFromApicId(apicId);
265  return contiguousId % cpuTopology.logicalPerCore;
266 }
267 
268 size_t CoreFromApicId(ApicId apicId)
269 {
270  const size_t contiguousId = ContiguousIdFromApicId(apicId);
271  return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage;
272 }
273 
274 size_t PackageFromApicId(ApicId apicId)
275 {
276  const size_t contiguousId = ContiguousIdFromApicId(apicId);
277  return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage);
278 }
279 
280 
281 ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
282 {
284 
285  size_t contiguousId = 0;
286  ENSURE(idxPackage < cpuTopology.numPackages);
287  contiguousId += idxPackage;
288 
289  contiguousId *= cpuTopology.coresPerPackage;
291  contiguousId += idxCore;
292 
293  contiguousId *= cpuTopology.logicalPerCore;
294  ENSURE(idxLogical < cpuTopology.logicalPerCore);
295  contiguousId += idxLogical;
296 
297  ENSURE(contiguousId < cpuTopology.numProcessors);
298  return ApicIdFromContiguousId(contiguousId);
299 }
300 
301 
302 //---------------------------------------------------------------------------------------------------------------------
303 // cache topology
304 
305 // note: Windows 2003 GetLogicalProcessorInformation provides similar
306 // functionality but returns incorrect results. (it claims all cores in
307 // an Intel Core2 Quad processor share a single L2 cache.)
308 
310 {
311 public:
312  /**
313  * add processor to the processor mask owned by cache identified by <id>
314  **/
315  void Add(u8 cacheId, size_t processor)
316  {
317  SharedCache* cache = Find(cacheId);
318  if(!cache)
319  {
320  m_caches.push_back(cacheId);
321  cache = &m_caches.back();
322  }
323  cache->Add(processor);
324  }
325 
326  size_t NumCaches() const
327  {
328  return m_caches.size();
329  }
330 
331  /**
332  * store topology in an array (one entry per cache) of masks
333  * representing the processors that share a cache.
334  **/
335  void StoreProcessorMasks(uintptr_t* cachesProcessorMask)
336  {
337  for(size_t i = 0; i < NumCaches(); i++)
338  cachesProcessorMask[i] = m_caches[i].ProcessorMask();
339  }
340 
341 private:
342  /**
343  * stores ID and tracks which processors share this cache
344  **/
346  {
347  public:
348  SharedCache(u8 cacheId)
349  : m_cacheId(cacheId), m_processorMask(0)
350  {
351  }
352 
353  bool Matches(u8 cacheId) const
354  {
355  return m_cacheId == cacheId;
356  }
357 
358  void Add(size_t processor)
359  {
360  m_processorMask |= uintptr_t(1) << processor;
361  }
362 
363  uintptr_t ProcessorMask() const
364  {
365  return m_processorMask;
366  }
367 
368  private:
370  uintptr_t m_processorMask;
371  };
372 
373  SharedCache* Find(u8 cacheId)
374  {
375  for(size_t i = 0; i < m_caches.size(); i++)
376  {
377  if(m_caches[i].Matches(cacheId))
378  return &m_caches[i];
379  }
380 
381  return 0;
382  }
383 
384  std::vector<SharedCache> m_caches;
385 };
386 
387 static void DetermineCachesProcessorMask(uintptr_t* cachesProcessorMask, size_t& numCaches)
388 {
389  CacheRelations cacheRelations;
390  if(AreApicIdsReliable())
391  {
392  const size_t numBits = ceil_log2(MaxLogicalPerCache());
393  const u8 cacheIdMask = u8((0xFF << numBits) & 0xFF);
394  for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
395  {
396  const ApicId apicId = ApicIdFromProcessor(processor);
397  const u8 cacheId = u8(apicId & cacheIdMask);
398  cacheRelations.Add(cacheId, processor);
399  }
400  }
401  else
402  {
403  for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
404  {
405  // assume each processor has exactly one cache with matching IDs
406  const u8 cacheId = (u8)processor;
407  cacheRelations.Add(cacheId, processor);
408  }
409  }
410 
411  numCaches = cacheRelations.NumCaches();
412  cacheRelations.StoreProcessorMasks(cachesProcessorMask);
413 }
414 
415 
416 static void DetermineProcessorsCache(const uintptr_t* cachesProcessorMask, size_t numCaches, size_t* processorsCache, size_t numProcessors)
417 {
418  for(size_t cache = 0; cache < numCaches; cache++)
419  {
420  // write to all entries that share this cache
421  const uintptr_t processorMask = cachesProcessorMask[cache];
422  for(size_t processor = 0; processor < numProcessors; processor++)
423  {
424  if(IsBitSet(processorMask, processor))
425  {
426  ENSURE(processorsCache[processor] == 0);
427  processorsCache[processor] = cache;
428  }
429  }
430  }
431 }
432 
433 
434 //---------------------------------------------------------------------------------------------------------------------
435 // cache topology interface
436 
437 struct CacheTopology // POD
438 {
439  size_t numCaches;
442 };
445 
447 {
451  return INFO::OK;
452 }
453 
454 size_t NumCaches()
455 {
457  return cacheTopology.numCaches;
458 }
459 
460 size_t CacheFromProcessor(size_t processor)
461 {
463  ENSURE(processor < os_cpu_NumProcessors());
464  return cacheTopology.processorsCache[processor];
465 }
466 
467 uintptr_t ProcessorMaskFromCache(size_t cache)
468 {
470  ENSURE(cache < cacheTopology.numCaches);
471  return cacheTopology.cachesProcessorMask[cache];
472 }
473 
474 } // namespace topology
size_t CoreFromApicId(ApicId apicId)
Definition: topology.cpp:268
#define u8
Definition: types.h:39
const Status LOGIC
Definition: status.h:409
size_t NumPackages()
Definition: topology.cpp:244
bool AreApicIdsReliable()
Definition: apic.cpp:105
static CpuTopology cpuTopology
Definition: topology.cpp:151
SharedCache * Find(u8 cacheId)
Definition: topology.cpp:373
static size_t MaxLogicalPerCore()
Definition: topology.cpp:77
T DivideRoundUp(T dividend, T divisor)
Definition: lib.h:75
bool Matches(u8 cacheId) const
Definition: topology.cpp:353
size_t LogicalPerCore()
Definition: topology.cpp:256
const Status OK
Definition: status.h:386
const x86_x64::Cache * Caches(size_t idxCache)
Definition: cache.cpp:649
std::vector< SharedCache > m_caches
Definition: topology.cpp:384
static const size_t os_cpu_MaxProcessors
maximum number of processors supported by the OS (determined by the number of bits in an affinity mas...
Definition: os_cpu.h:50
static size_t MaxLogicalPerCache()
Definition: topology.cpp:114
static size_t numNodes
Definition: wnuma.cpp:57
static ModuleInitState cacheInitState
Definition: topology.cpp:444
size_t PackageFromApicId(ApicId apicId)
Definition: topology.cpp:274
static const size_t numCaches
Definition: cache.cpp:37
static Status InitCpuTopology()
Definition: topology.cpp:154
static size_t PopulationCount(T x)
Definition: bits.h:148
static size_t MaxCoresPerPackage()
Definition: topology.cpp:48
Vendors Vendor()
Definition: x86_x64.cpp:200
LIB_API uintptr_t numa_ProcessorMaskFromNode(size_t node)
Definition: unuma.cpp:39
size_t os_cpu_NumProcessors()
Definition: bcpu.cpp:34
ApicId ApicIdFromProcessor(size_t processor)
Definition: apic.cpp:147
size_t NumCaches() const
Definition: topology.cpp:326
size_t processorsCache[os_cpu_MaxProcessors]
Definition: topology.cpp:440
ApicId ApicIdFromContiguousId(size_t contiguousId)
Definition: apic.cpp:152
size_t LogicalFromApicId(ApicId apicId)
Definition: topology.cpp:262
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
intptr_t ModuleInitState
initialization state of a module (class, source file, etc.) must be initialized to zero (e...
Definition: module_init.h:35
bool cpuid(CpuidRegs *regs)
invoke CPUID instruction.
Definition: x86_x64.cpp:98
void StoreProcessorMasks(uintptr_t *cachesProcessorMask)
store topology in an array (one entry per cache) of masks representing the processors that share a ca...
Definition: topology.cpp:335
size_t sharedBy
how many logical processors share this cache?
Definition: cache.h:72
bool IsBitSet(T value, size_t index)
Definition: bits.h:54
size_t CoresPerPackage()
Definition: topology.cpp:250
static void DetermineProcessorsCache(const uintptr_t *cachesProcessorMask, size_t numCaches, size_t *processorsCache, size_t numProcessors)
Definition: topology.cpp:416
size_t NumCaches()
Definition: topology.cpp:454
static Status InitCacheTopology()
Definition: topology.cpp:446
ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
Definition: topology.cpp:281
size_t operator()(size_t bits) const
Definition: topology.cpp:129
i64 Status
Error handling system.
Definition: status.h:171
size_t ContiguousIdFromApicId(ApicId apicId)
Definition: apic.cpp:134
T bits(T num, size_t lo_idx, size_t hi_idx)
extract the value of bits hi_idx:lo_idx within num
Definition: bits.h:97
const Status CPU_FEATURE_MISSING
Definition: cpu.h:35
#define DEBUG_WARN_ERR(status)
display the error dialog with text corresponding to the given error code.
Definition: debug.h:331
static void DetermineCachesProcessorMask(uintptr_t *cachesProcessorMask, size_t &numCaches)
Definition: topology.cpp:387
LIB_API size_t numa_NumNodes()
Definition: unuma.cpp:29
bool Cap(Caps cap)
Definition: x86_x64.cpp:142
size_t ceil_log2(T x)
ceil(log2(x))
Definition: bits.h:197
void Add(u8 cacheId, size_t processor)
add processor to the processor mask owned by cache identified by &lt;id&gt;
Definition: topology.cpp:315
u8 ApicId
Definition: apic.h:26
uintptr_t ProcessorMaskFromCache(size_t cache)
Definition: topology.cpp:467
registers used/returned by cpuid
Definition: x86_x64.h:46
static CacheTopology cacheTopology
Definition: topology.cpp:443
uintptr_t cachesProcessorMask[os_cpu_MaxProcessors]
Definition: topology.cpp:441
static ModuleInitState cpuInitState
Definition: topology.cpp:152
Status ModuleInit(volatile ModuleInitState *initState, Status(*init)())
calls a user-defined init function if initState is zero.
Definition: module_init.cpp:40
stores ID and tracks which processors share this cache
Definition: topology.cpp:345
size_t CacheFromProcessor(size_t processor)
Definition: topology.cpp:460