Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cache.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2011 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "precompiled.h"
25 
26 #include "lib/bits.h"
27 #include "lib/alignment.h"
28 #include "lib/module_init.h"
29 #include "lib/sysdep/os_cpu.h"
31 
32 namespace x86_x64 {
33 
34 static const size_t maxTLBs = 2*2*4; // (level0, level1) x (D,I) x (4K, 2M, 4M, 1G)
35 static size_t numTLBs = 0;
36 
37 static const size_t numCaches = x86_x64::Cache::maxLevels * 2 + maxTLBs;
39 
40 
41 static void AddCache(const x86_x64::Cache& cache)
42 {
43  ENSURE(cache.Validate());
44 
46  caches[L1D + cache.level-1] = cache;
48  caches[L1I + cache.level-1] = cache;
49 }
50 
51 
52 static void AddTLB(const x86_x64::Cache& tlb)
53 {
54  ENSURE(tlb.Validate());
55  ENSURE(tlb.level == 1 || tlb.level == 2); // see maxTLBs
56 
58  caches[TLB+numTLBs++] = tlb;
59 }
60 
61 
62 //-----------------------------------------------------------------------------
63 // AMD
64 
65 // (Intel has subsequently added support for function 0x80000006, but
66 // only returns ECX, i.e. L2 information.)
67 namespace AMD
68 {
69 
71 {
72  x86_x64::Cache cache;
73  cache.Initialize(1, type);
74 
75  const size_t lineSize = bits(reg, 0, 7);
76  const size_t associativity = bits(reg, 16, 23); // 0 = reserved
77  const size_t totalSize = bits(reg, 24, 31)*KiB;
78  if(lineSize != 0 && associativity != 0 && totalSize != 0)
79  {
80  cache.numEntries = totalSize / lineSize;
81  cache.entrySize = lineSize;
82  cache.associativity = associativity;
83  cache.sharedBy = 1;
84  }
85  return cache;
86 }
87 
88 // applies to L2, L3 and TLB2
89 static const size_t associativityTable[16] =
90 {
91  0, 1, 2, 0, 4, 0, 8, 0,
92  16, 0, 32, 48, 64, 96, 128, x86_x64::Cache::fullyAssociative
93 };
94 
96 {
97  x86_x64::Cache cache;
98  cache.Initialize(2, type);
99 
100  const size_t lineSize = bits(reg, 0, 7);
101  const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled
102  const size_t totalSize = bits(reg, 16, 31)*KiB;
103  if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0)
104  {
105  cache.numEntries = totalSize / lineSize;
106  cache.entrySize = lineSize;
107  cache.associativity = associativityTable[idxAssociativity];
108  cache.sharedBy = 1;
109  }
110  return cache;
111 }
112 
113 // (same as L2 except for the size)
115 {
116  x86_x64::Cache cache;
117  cache.Initialize(3, type);
118 
119  const size_t lineSize = bits(reg, 0, 7);
120  const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled
121  const size_t totalSize = bits(reg, 18, 31)*512*KiB; // (rounded down)
122  // NB: some Athlon 64 X2 models have no L3 cache
123  if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0)
124  {
125  cache.numEntries = totalSize / lineSize;
126  cache.entrySize = lineSize;
127  cache.associativity = associativityTable[idxAssociativity];
128  cache.sharedBy = 1;
129  }
130  return cache;
131 }
132 
133 static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
134 {
135  x86_x64::Cache cache;
136  cache.Initialize(1, type);
137 
138  const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7);
139  const size_t associativity = bits(reg, bitOffset+8, bitOffset+15); // 0 = reserved
140  if(numEntries != 0 && associativity != 0)
141  {
142  cache.numEntries = numEntries;
143  cache.entrySize = pageSize;
144  cache.associativity = associativity;
145  cache.sharedBy = 1;
146  }
147  return cache;
148 }
149 
150 static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
151 {
152  x86_x64::Cache cache;
153  cache.Initialize(2, type);
154 
155  const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11);
156  const size_t idxAssociativity = bits(reg, bitOffset+12, bitOffset+15); // 0 = disabled
157  if(numEntries != 0 && idxAssociativity != 0)
158  {
159  cache.numEntries = numEntries;
160  cache.entrySize = pageSize;
161  cache.associativity = associativityTable[idxAssociativity];
162  cache.sharedBy = 1;
163  }
164  return cache;
165 }
166 
167 static void AddTLB2Pair(u32 reg, size_t pageSize)
168 {
170  if(bits(reg, 16, 31) != 0) // not unified
171  {
172  AddTLB(TLB2(reg, 16, pageSize, x86_x64::Cache::kData));
174  }
175  AddTLB(TLB2(reg, 0, pageSize, type));
176 }
177 
178 // AMD reports maxCpuidIdFunction > 4 but consider functions 2..4 to be
179 // "reserved". cache characteristics are returned via ext. functions.
180 static void DetectCacheAndTLB()
181 {
182  x86_x64::CpuidRegs regs = { 0 };
183 
184  regs.eax = 0x80000005;
185  if(x86_x64::cpuid(&regs))
186  {
189 
191  AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64::Cache::kData));
193  AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64::Cache::kData));
194  }
195 
196  regs.eax = 0x80000006;
197  if(x86_x64::cpuid(&regs))
198  {
201 
202  AddTLB2Pair(regs.eax, 2*MiB);
203  AddTLB2Pair(regs.ebx, 4*KiB);
204  }
205 }
206 
207 } // namespace AMD
208 
209 
210 //-----------------------------------------------------------------------------
211 // CPUID.4
212 
213 namespace CPUID4 {
214 
215 static bool DetectCache()
216 {
217  // note: level order is unspecified (see Intel AP-485)
218  for(u32 count = 0; ; count++)
219  {
220  x86_x64::CpuidRegs regs = { 0 };
221  regs.eax = 4;
222  regs.ecx = count;
223  if(!x86_x64::cpuid(&regs))
224  return false;
225 
226  const x86_x64::Cache::Type type = (x86_x64::Cache::Type)bits(regs.eax, 0, 4);
227  if(type == x86_x64::Cache::kNull) // no more remaining
228  break;
229 
230  const size_t level = (size_t)bits(regs.eax, 5, 7);
231  const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
232  const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1;
233 
234  x86_x64::Cache cache;
235  cache.Initialize(level, type);
236  cache.entrySize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
237  cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
238  cache.sharedBy = (size_t)bits(regs.eax, 14, 25)+1;
239  cache.numEntries = cache.associativity * partitions * sets;
240 
241  AddCache(cache);
242  }
243 
244  return true;
245 }
246 
247 } // namespace CPUID4
248 
249 
250 //-----------------------------------------------------------------------------
251 // CPUID.2 (descriptors)
252 
253 namespace CPUID2 {
254 
255 typedef u8 Descriptor;
256 typedef std::vector<Descriptor> Descriptors;
257 
258 static void AppendDescriptors(u32 reg, Descriptors& descriptors)
259 {
260  if(IsBitSet(reg, 31)) // register contents are reserved
261  return;
262  for(int pos = 24; pos >= 0; pos -= 8)
263  {
264  const u8 descriptor = (u8)bits(reg, pos, pos+7);
265  if(descriptor != 0)
266  descriptors.push_back(descriptor);
267  }
268 }
269 
270 
272 {
273  // ensure consistency by pinning to a CPU.
274  // (don't use a hard-coded mask because process affinity may be restricted)
275  const uintptr_t allProcessors = os_cpu_ProcessorMask();
276  const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors);
277  const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor);
278 
279  x86_x64::CpuidRegs regs = { 0 };
280  regs.eax = 2;
281  if(!x86_x64::cpuid(&regs))
282  return Descriptors();
283 
284  Descriptors descriptors;
285  size_t iterations = bits(regs.eax, 0, 7);
286  for(;;) // abort mid-loop (invoke CPUID exactly <iterations> times)
287  {
288  AppendDescriptors(bits(regs.eax, 8, 31), descriptors);
289  AppendDescriptors(regs.ebx, descriptors);
290  AppendDescriptors(regs.ecx, descriptors);
291  AppendDescriptors(regs.edx, descriptors);
292  if(--iterations == 0)
293  break;
294  regs.eax = 2;
295  const bool ok = x86_x64::cpuid(&regs);
296  ENSURE(ok);
297  }
298 
299  os_cpu_SetThreadAffinityMask(prevAffinityMask);
300 
301  return descriptors;
302 }
303 
304 
305 // note: the following cannot be moved into a function because
306 // ARRAY_SIZE's template argument must not reference a local type.
307 
308 enum Flags
309 {
310  // level (bits 0..1)
311  L1 = 1,
312  L2,
313  L3,
314 
315  // type (bits 2..3)
316  I = 0x04, // instruction
317  D = 0x08, // data
318  U = I|D // unified
319 
320  // largeSize (bits 4..31 with bits 0..3 zeroed): TLB entrySize or cache numEntries
321 };
322 
323 // (there are > 100 descriptors, so we squeeze all fields into 8 bytes.)
324 struct Characteristics // POD
325 {
327  {
328  switch(flags & U)
329  {
330  case D:
331  return x86_x64::Cache::kData;
332  case I:
334  case U:
336  default:
338  return x86_x64::Cache::kNull;
339  }
340  }
341 
342  size_t Level() const
343  {
344  const size_t level = flags & 3;
345  ENSURE(level != 0);
346  return level;
347  }
348 
349  bool IsTLB() const
350  {
351  return smallSize >= 0;
352  }
353 
354  size_t NumEntries() const
355  {
356  return IsTLB()? (size_t)smallSize : (flags & ~0xF);
357  }
358 
359  size_t EntrySize() const
360  {
361  return IsTLB()? (flags & ~0xF) : (size_t)(-smallSize);
362  }
363 
366  i16 smallSize; // negative cache entrySize or TLB numEntries
367  u32 flags; // level, type, largeSize
368 };
369 
371 
372 #define CACHE(descriptor, flags, totalSize, assoc, entrySize) { descriptor, assoc, -entrySize, flags | ((totalSize)/(entrySize)) }
373 #define TLB(descriptor, flags, entrySize, assoc, numEntries) { descriptor, assoc, numEntries, flags | (entrySize) }
374 
375 // (we need to include cache descriptors because early Pentium4 don't implement CPUID.4)
376 // references: [accessed 2011-02-26]
377 // AP485 http://www.intel.com/Assets/PDF/appnote/241618.pdf
378 // sdman http://www.intel.com/Assets/PDF/manual/253666.pdf
379 // sandp http://www.sandpile.org/ia32/cpuid.htm
380 // opsol http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/i86pc/os/cpuid.c
382 {
383  TLB (0x01, L1|I, 4*KiB, 4, 32),
384  TLB (0x02, L1|I, 4*MiB, F, 2),
385  TLB (0x03, L1|D, 4*KiB, 4, 64),
386  TLB (0x04, L1|D, 4*MiB, 4, 8),
387  TLB (0x05, L1|D, 4*MiB, 4, 32),
388 
389  CACHE(0x06, L1|I, 8*KiB, 4, 32),
390  CACHE(0x08, L1|I, 16*KiB, 4, 32),
391  CACHE(0x09, L1|I, 32*KiB, 4, 64),
392  CACHE(0x0A, L1|I, 8*KiB, 2, 32),
393 
394  TLB (0x0B, L1|I, 4*MiB, 4, 4),
395 
396  CACHE(0x0C, L1|D, 16*KiB, 4, 32),
397  CACHE(0x0D, L1|D, 16*KiB, 4, 64), // opsol: 32B (would be redundant with 0x0C), AP485: 64B, sdman: 64B
398  CACHE(0x0E, L1|D, 24*KiB, 6, 64),
399 
400  CACHE(0x21, L2|U, 256*KiB, 8, 64),
401 
402  CACHE(0x22, L3|U, 512*KiB, 4, 64),
403  CACHE(0x23, L3|U, 1*MiB, 8, 64),
404  CACHE(0x25, L3|U, 2*MiB, 8, 64),
405  CACHE(0x29, L3|U, 4*MiB, 8, 64),
406 
407  CACHE(0x2c, L1|D, 32*KiB, 8, 64),
408 
409  CACHE(0x30, L1|I, 32*KiB, 8, 64),
410 
411  CACHE(0x39, L2|U, 128*KiB, 4, 64),
412  CACHE(0x3A, L2|U, 192*KiB, 6, 64),
413  CACHE(0x3B, L2|U, 128*KiB, 2, 64),
414  CACHE(0x3C, L2|U, 256*KiB, 4, 64),
415  CACHE(0x3D, L2|U, 384*KiB, 6, 64),
416  CACHE(0x3E, L2|U, 512*KiB, 4, 64),
417  CACHE(0x41, L2|U, 128*KiB, 4, 32),
418  CACHE(0x42, L2|U, 256*KiB, 4, 32),
419  CACHE(0x43, L2|U, 512*KiB, 4, 32),
420  CACHE(0x44, L2|U, 1*MiB, 4, 32),
421  CACHE(0x45, L2|U, 2*MiB, 4, 32),
422 
423  CACHE(0x46, L3|U, 4*MiB, 4, 64),
424  CACHE(0x47, L3|U, 8*MiB, 8, 64),
425  CACHE(0x48, L2|U, 3*MiB, 12, 64),
426  CACHE(0x49, L2|U, 4*MiB, 16, 64),
427  CACHE(0x49, L3|U, 4*MiB, 16, 64),
428  CACHE(0x4A, L3|U, 6*MiB, 12, 64),
429  CACHE(0x4B, L3|U, 8*MiB, 16, 64),
430  CACHE(0x4C, L3|U, 12*MiB, 12, 64),
431  CACHE(0x4D, L3|U, 16*MiB, 16, 64),
432  CACHE(0x4E, L2|U, 6*MiB, 24, 64),
433 
434  TLB (0x4F, L1|I, 4*KiB, F, 32), // sandp: unknown assoc, opsol: full, AP485: unspecified
435  TLB (0x50, L1|I, 4*KiB, F, 64),
436  TLB (0x50, L1|I, 4*MiB, F, 64),
437  TLB (0x50, L1|I, 2*MiB, F, 64),
438  TLB (0x51, L1|I, 4*KiB, F, 128),
439  TLB (0x51, L1|I, 4*MiB, F, 128),
440  TLB (0x51, L1|I, 2*MiB, F, 128),
441  TLB (0x52, L1|I, 4*KiB, F, 256),
442  TLB (0x52, L1|I, 4*MiB, F, 256),
443  TLB (0x52, L1|I, 2*MiB, F, 256),
444  TLB (0x55, L1|I, 4*MiB, F, 7),
445  TLB (0x55, L1|I, 2*MiB, F, 7),
446 
447  TLB (0x56, L1|D, 4*MiB, 4, 16),
448  TLB (0x57, L1|D, 4*KiB, 4, 16),
449  TLB (0x59, L1|D, 4*KiB, F, 16),
450  TLB (0x5A, L1|D, 4*MiB, 4, 32),
451  TLB (0x5A, L1|D, 2*MiB, 4, 32),
452  TLB (0x5B, L1|D, 4*KiB, F, 64),
453  TLB (0x5B, L1|D, 4*MiB, F, 64),
454  TLB (0x5C, L1|D, 4*KiB, F, 128),
455  TLB (0x5C, L1|D, 4*MiB, F, 128),
456  TLB (0x5D, L1|D, 4*KiB, F, 256),
457  TLB (0x5D, L1|D, 4*MiB, F, 256),
458 
459  CACHE(0x60, L1|D, 16*KiB, 8, 64),
460  TLB (0x63, L1|D, 1*GiB, 4, 4), // speculation
461  CACHE(0x66, L1|D, 8*KiB, 4, 64),
462  CACHE(0x67, L1|D, 16*KiB, 4, 64),
463  CACHE(0x68, L1|D, 32*KiB, 4, 64),
464 
465  CACHE(0x70, L1|I, 12*KiB, 8, 1),
466  CACHE(0x71, L1|I, 16*KiB, 8, 1),
467  CACHE(0x72, L1|I, 32*KiB, 8, 1),
468  CACHE(0x73, L1|I, 64*KiB, 8, 1),
469 
470  TLB (0x76, L1|I, 4*MiB, F, 8), // AP485: internally inconsistent, sdman: TLB
471  TLB (0x76, L1|I, 2*MiB, F, 8),
472 
473  CACHE(0x78, L2|U, 1*MiB, 4, 64),
474  CACHE(0x79, L2|U, 128*KiB, 8, 64),
475  CACHE(0x7A, L2|U, 256*KiB, 8, 64),
476  CACHE(0x7B, L2|U, 512*KiB, 8, 64),
477  CACHE(0x7C, L2|U, 1*MiB, 8, 64),
478  CACHE(0x7D, L2|U, 2*MiB, 8, 64),
479  CACHE(0x7F, L2|U, 512*KiB, 2, 64),
480 
481  CACHE(0x80, L2|U, 512*KiB, 8, 64),
482  CACHE(0x82, L2|U, 256*KiB, 8, 32),
483  CACHE(0x83, L2|U, 512*KiB, 8, 32),
484  CACHE(0x84, L2|U, 1*MiB, 8, 32),
485  CACHE(0x85, L2|U, 2*MiB, 8, 32),
486  CACHE(0x86, L2|U, 512*KiB, 4, 64),
487  CACHE(0x87, L2|U, 1*MiB, 8, 64),
488 
489  TLB (0xB0, L1|I, 4*KiB, 4, 128),
490  TLB (0xB1, L1|I, 2*MiB, 4, 8),
491  TLB (0xB1, L1|I, 4*MiB, 4, 4),
492  TLB (0xB2, L1|I, 4*KiB, 4, 64),
493 
494  TLB (0xB3, L1|D, 4*KiB, 4, 128),
495  TLB (0xB3, L1|D, 4*MiB, 4, 128),
496  TLB (0xB4, L1|D, 4*KiB, 4, 256),
497  TLB (0xB4, L1|D, 4*MiB, 4, 256),
498  TLB (0xB5, L1|I, 4*KiB, 4, 128), // speculation
499  TLB (0xB6, L1|I, 4*KiB, 8, 128), // http://software.intel.com/en-us/forums/topic/401012
500 
501  TLB (0xBA, L1|D, 4*KiB, 4, 64),
502  TLB (0xC0, L1|D, 4*KiB, 4, 8),
503  TLB (0xC0, L1|D, 4*MiB, 4, 8),
504  TLB (0xC1, L2|U, 4*KiB, 8, 1024), // http://software.intel.com/en-us/forums/topic/401012
505  TLB (0xC1, L2|U, 4*MiB, 8, 1024),
506  TLB (0xC1, L2|U, 2*MiB, 8, 1024),
507  TLB (0xCA, L2|U, 4*KiB, 4, 512),
508 
509  CACHE(0xD0, L3|U, 512*KiB, 4, 64),
510  CACHE(0xD1, L3|U, 1*MiB, 4, 64),
511  CACHE(0xD2, L3|U, 2*MiB, 4, 64),
512  CACHE(0xD6, L3|U, 1*MiB, 8, 64),
513  CACHE(0xD7, L3|U, 2*MiB, 8, 64),
514  CACHE(0xD8, L3|U, 4*MiB, 8, 64),
515  CACHE(0xDC, L3|U, 3*MiB/2, 12, 64),
516  CACHE(0xDD, L3|U, 3*MiB, 12, 64),
517  CACHE(0xDE, L3|U, 6*MiB, 12, 64),
518  CACHE(0xE2, L3|U, 2*MiB, 16, 64),
519  CACHE(0xE3, L3|U, 4*MiB, 16, 64),
520  CACHE(0xE4, L3|U, 8*MiB, 16, 64),
521  CACHE(0xEA, L3|U, 12*MiB, 24, 64),
522  CACHE(0xEB, L3|U, 18*MiB, 24, 64),
523  CACHE(0xEC, L3|U, 24*MiB, 24, 64),
524 };
525 #undef CACHE
526 #undef TLB
527 
529 {
530  // note: we can't use bsearch because characteristicsTable contains multiple
531  // entries with the same descriptor.
532  for(size_t i = 0; i < ARRAY_SIZE(characteristicsTable); i++)
533  {
534  const Characteristics& characteristics = characteristicsTable[i];
535  if(characteristics.descriptor == descriptor)
536  return &characteristics;
537  }
538 
539  debug_printf(L"Unknown cache/TLB descriptor 0x%x\n", (unsigned int)descriptor);
540  return 0;
541 }
542 
543 
545 {
550 };
551 
552 static bool HandleSpecialDescriptor(Descriptor descriptor, size_t& descriptorFlags)
553 {
554  switch(descriptor)
555  {
556  case 0: // carries no information
557  return true;
558 
559  case 0x40:
560  descriptorFlags |= NO_LAST_LEVEL_CACHE;
561  return true;
562 
563  case 0xF0:
564  descriptorFlags |= PREFETCH64;
565  return true;
566 
567  case 0xF1:
568  descriptorFlags |= PREFETCH128;
569  return true;
570 
571  case 0xFF: // descriptors don't include caches (use CPUID.4 instead)
572  descriptorFlags |= SKIP_CACHE_DESCRIPTORS;
573  return true;
574 
575  default:
576  return false;
577  }
578 }
579 
580 
581 static void DetectCacheAndTLB(size_t& descriptorFlags)
582 {
583  const Descriptors descriptors = GetDescriptors();
584  for(Descriptors::const_iterator it = descriptors.begin(); it != descriptors.end(); ++it)
585  {
586  const Descriptor descriptor = *it;
587  if(HandleSpecialDescriptor(descriptor, descriptorFlags))
588  continue;
589 
590  const Characteristics* characteristics = CharacteristicsFromDescriptor(*it);
591  if(!characteristics)
592  continue;
593 
594  if((descriptorFlags & SKIP_CACHE_DESCRIPTORS) && !characteristics->IsTLB())
595  continue;
596 
597  x86_x64::Cache cache;
598  cache.Initialize(characteristics->Level(), characteristics->Type());
599  cache.numEntries = characteristics->NumEntries();
600  cache.entrySize = characteristics->EntrySize();
601  cache.associativity = characteristics->associativity;
602  cache.sharedBy = 1; // (safe default)
603  if(characteristics->IsTLB())
604  AddTLB(cache);
605  else
606  AddCache(cache);
607  }
608 }
609 
610 } // namespace CPUID2
611 
612 
614 {
615  // ensure all cache entries are initialized (DetectCache* might not set them all)
616  for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
617  {
618  caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kData);
619  caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kInstruction);
620  }
621 
624  else
625  {
626  size_t descriptorFlags = 0;
627  if(CPUID4::DetectCache()) // success, ignore less reliable CPUID.2 cache information
628  descriptorFlags |= CPUID2::SKIP_CACHE_DESCRIPTORS;
629  CPUID2::DetectCacheAndTLB(descriptorFlags);
630  }
631 
632  // sanity checks
633  for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
634  {
635  ENSURE(caches[L1D+idxLevel].type == x86_x64::Cache::kData || caches[L1D+idxLevel].type == x86_x64::Cache::kUnified);
636  ENSURE(caches[L1D+idxLevel].level == idxLevel+1);
637  ENSURE(caches[L1D+idxLevel].Validate() == true);
638 
639  ENSURE(caches[L1I+idxLevel].type == x86_x64::Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64::Cache::kUnified);
640  ENSURE(caches[L1I+idxLevel].level == idxLevel+1);
641  ENSURE(caches[L1I+idxLevel].Validate() == true);
642  }
643  for(size_t i = 0; i < numTLBs; i++)
644  ENSURE(caches[TLB+i].Validate() == true);
645 
646  return INFO::OK;
647 }
648 
649 const x86_x64::Cache* Caches(size_t idxCache)
650 {
651  static ModuleInitState initState;
652  ModuleInit(&initState, DetectCacheAndTLB);
653 
654  if(idxCache >= TLB+numTLBs)
655  return 0;
656 
657  return &caches[idxCache];
658 }
659 
660 } // namespace x86_x64
#define u8
Definition: types.h:39
const Status LOGIC
Definition: status.h:409
#define CACHE(descriptor, flags, totalSize, assoc, entrySize)
Definition: cache.cpp:372
static const u8 F
Definition: cache.cpp:370
static const size_t pageSize
Definition: alignment.h:61
static x86_x64::Cache L2Cache(u32 reg, x86_x64::Cache::Type type)
Definition: cache.cpp:95
const Status OK
Definition: status.h:386
const x86_x64::Cache * Caches(size_t idxCache)
Definition: cache.cpp:649
static void AppendDescriptors(u32 reg, Descriptors &descriptors)
Definition: cache.cpp:258
Type type
never kNull
Definition: cache.h:52
static bool DetectCache()
Definition: cache.cpp:215
static const size_t numCaches
Definition: cache.cpp:37
static Descriptors GetDescriptors()
Definition: cache.cpp:271
Vendors Vendor()
Definition: x86_x64.cpp:200
static ModuleInitState initState
Definition: h_mgr.cpp:742
static const size_t fullyAssociative
Definition: cache.h:42
uintptr_t os_cpu_SetThreadAffinityMask(uintptr_t processorMask)
restrict the current thread to a set of processors.
Definition: bcpu.cpp:109
#define i16
Definition: types.h:35
static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
Definition: cache.cpp:150
#define ARRAY_SIZE(name)
static const size_t GiB
Definition: alignment.h:73
size_t NumEntries() const
Definition: cache.cpp:354
static Status DetectCacheAndTLB()
Definition: cache.cpp:613
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
intptr_t ModuleInitState
initialization state of a module (class, source file, etc.) must be initialized to zero (e...
Definition: module_init.h:35
bool cpuid(CpuidRegs *regs)
invoke CPUID instruction.
Definition: x86_x64.cpp:98
static x86_x64::Cache L3Cache(u32 reg, x86_x64::Cache::Type type)
Definition: cache.cpp:114
size_t sharedBy
how many logical processors share this cache?
Definition: cache.h:72
static const size_t maxLevels
Definition: cache.h:40
static void DetectCacheAndTLB()
Definition: cache.cpp:180
bool IsBitSet(T value, size_t index)
Definition: bits.h:54
static const size_t KiB
Definition: alignment.h:71
static const size_t MiB
Definition: alignment.h:72
static void AddTLB(const x86_x64::Cache &tlb)
Definition: cache.cpp:52
static x86_x64::Cache L1Cache(u32 reg, x86_x64::Cache::Type type)
Definition: cache.cpp:70
x86_x64::Cache::Type Type() const
Definition: cache.cpp:326
static const size_t associativityTable[16]
Definition: cache.cpp:89
i64 Status
Error handling system.
Definition: status.h:171
static void AddCache(const x86_x64::Cache &cache)
Definition: cache.cpp:41
void Initialize(size_t level, Type type)
Definition: cache.h:74
T bits(T num, size_t lo_idx, size_t hi_idx)
extract the value of bits hi_idx:lo_idx within num
Definition: bits.h:97
static void AddTLB2Pair(u32 reg, size_t pageSize)
Definition: cache.cpp:167
uintptr_t os_cpu_ProcessorMask()
Definition: bcpu.cpp:57
#define DEBUG_WARN_ERR(status)
display the error dialog with text corresponding to the given error code.
Definition: debug.h:331
size_t level
1..maxLevels
Definition: cache.h:47
#define u32
Definition: types.h:41
static void DetectCacheAndTLB(size_t &descriptorFlags)
Definition: cache.cpp:581
static bool HandleSpecialDescriptor(Descriptor descriptor, size_t &descriptorFlags)
Definition: cache.cpp:552
static const size_t maxTLBs
Definition: cache.cpp:34
static const Characteristics characteristicsTable[]
Definition: cache.cpp:381
static size_t numTLBs
Definition: cache.cpp:35
registers used/returned by cpuid
Definition: x86_x64.h:46
size_t numEntries
if 0, the cache is disabled and all other values are zero
Definition: cache.h:57
std::vector< Descriptor > Descriptors
Definition: cache.cpp:256
size_t associativity
= fullyAssociative or the actual ways of associativity
Definition: cache.h:67
bool Validate() const
Definition: cache.h:86
size_t entrySize
NB: cache entries are lines, TLB entries are pages.
Definition: cache.h:62
static Cache caches[numCaches]
Definition: cache.cpp:38
Status ModuleInit(volatile ModuleInitState *initState, Status(*init)())
calls a user-defined init function if initState is zero.
Definition: module_init.cpp:40
static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
Definition: cache.cpp:133
void debug_printf(const wchar_t *fmt,...)
write a formatted string to the debug channel, subject to filtering (see below).
Definition: debug.cpp:142
static const Characteristics * CharacteristicsFromDescriptor(Descriptor descriptor)
Definition: cache.cpp:528