Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
x86_x64.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2011 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * CPU-specific routines common to 32 and 64-bit x86
25  */
26 
27 #include "precompiled.h"
29 
30 #include <cstring>
31 #include <cstdio>
32 #include <vector>
33 #include <set>
34 #include <algorithm>
35 
37 #include "lib/bits.h"
38 #include "lib/timer.h"
39 #include "lib/module_init.h"
40 #include "lib/sysdep/cpu.h"
41 #include "lib/sysdep/os_cpu.h"
42 
43 #if MSC_VERSION
44 # include <intrin.h> // __rdtsc
45 #endif
46 
47 namespace x86_x64 {
48 
49 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
50 // VC10+ and VC9 SP1: __cpuidex is already available
51 #elif GCC_VERSION
52 # define __cpuidex(regsArray, level, index)\
53  __asm__ __volatile__ ("cpuid"\
54  : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3])\
55  : "0" (level), "2" (index));
56 #else
57 # error "compiler not supported"
58 #endif
59 
60 
61 // some of this module's functions are frequently called but require
62 // non-trivial initialization, so caching is helpful. isInitialized
63 // flags aren't thread-safe, so we use ModuleInit. calling it from
64 // every function is a bit wasteful, but it is convenient to avoid
65 // requiring users to pass around a global state object.
66 // one big Init() would be prone to deadlock if its subroutines also
67 // call a public function (that re-enters ModuleInit), so each
68 // function gets its own initState.
69 
70 //-----------------------------------------------------------------------------
71 // CPUID
72 
73 static void Invoke_cpuid(CpuidRegs* regs)
74 {
75  cassert(sizeof(regs->eax) == sizeof(int));
76  cassert(sizeof(*regs) == 4*sizeof(int));
77  __cpuidex((int*)regs, regs->eax, regs->ecx);
78 }
79 
82 
83 static Status InitCpuid()
84 {
85  CpuidRegs regs = { 0 };
86 
87  regs.eax = 0;
88  Invoke_cpuid(&regs);
89  cpuid_maxFunction = regs.eax;
90 
91  regs.eax = 0x80000000;
92  Invoke_cpuid(&regs);
94 
95  return INFO::OK;
96 }
97 
98 bool cpuid(CpuidRegs* regs)
99 {
100  static ModuleInitState initState;
101  ModuleInit(&initState, InitCpuid);
102 
103  const u32 function = regs->eax;
104  if(function > cpuid_maxExtendedFunction)
105  return false;
106  if(function < 0x80000000 && function > cpuid_maxFunction)
107  return false;
108 
109  Invoke_cpuid(regs);
110  return true;
111 }
112 
113 
114 //-----------------------------------------------------------------------------
115 // capability bits
116 
117 // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
118 // keep in sync with enum Cap!
119 static u32 caps[4];
120 
122 
123 static Status InitCaps()
124 {
125  CpuidRegs regs = { 0 };
126  regs.eax = 1;
127  if(cpuid(&regs))
128  {
129  caps[0] = regs.ecx;
130  caps[1] = regs.edx;
131  }
132  regs.eax = 0x80000001;
133  if(cpuid(&regs))
134  {
135  caps[2] = regs.ecx;
136  caps[3] = regs.edx;
137  }
138 
139  return INFO::OK;
140 }
141 
142 bool Cap(Caps cap)
143 {
145 
146  const size_t index = cap >> 5;
147  const size_t bit = cap & 0x1F;
148  if(index >= ARRAY_SIZE(caps))
149  {
151  return false;
152  }
153  return IsBitSet(caps[index], bit);
154 }
155 
156 void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
157 {
159 
160  *d0 = caps[0];
161  *d1 = caps[1];
162  *d2 = caps[2];
163  *d3 = caps[3];
164 }
165 
166 
167 //-----------------------------------------------------------------------------
168 // vendor
169 
171 
173 {
174  CpuidRegs regs = { 0 };
175  regs.eax = 0;
176  if(!cpuid(&regs))
178 
179  // copy regs to string
180  // note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
181  char vendorString[13];
182  memcpy(&vendorString[0], &regs.ebx, 4);
183  memcpy(&vendorString[4], &regs.edx, 4);
184  memcpy(&vendorString[8], &regs.ecx, 4);
185  vendorString[12] = '\0'; // 0-terminate
186 
187  if(!strcmp(vendorString, "AuthenticAMD"))
189  else if(!strcmp(vendorString, "GenuineIntel"))
191  else
192  {
195  }
196 
197  return INFO::OK;
198 }
199 
201 {
202  static ModuleInitState initState;
203  ModuleInit(&initState, InitVendor);
204  return vendor;
205 }
206 
207 
208 //-----------------------------------------------------------------------------
209 // signature
210 
211 static size_t model;
212 static size_t family;
214 
216 {
217  CpuidRegs regs = { 0 };
218  regs.eax = 1;
219  if(!cpuid(&regs))
221  model = bits(regs.eax, 4, 7);
222  family = bits(regs.eax, 8, 11);
223  const size_t extendedModel = bits(regs.eax, 16, 19);
224  const size_t extendedFamily = bits(regs.eax, 20, 27);
225  if(family == 0xF)
226  family += extendedFamily;
227  if(family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && family == 6))
228  model += extendedModel << 4;
229  return INFO::OK;
230 }
231 
232 size_t Model()
233 {
235  return model;
236 }
237 
238 size_t Family()
239 {
241  return family;
242 }
243 
244 
245 
246 
247 //-----------------------------------------------------------------------------
248 // identifier string
249 
250 /// functor to remove substrings from the CPU identifier string
252 {
253 public:
254  StringStripper(char* string, size_t max_chars)
255  : m_string(string), m_max_chars(max_chars)
256  {
257  }
258 
259  // remove all instances of substring from m_string
260  void operator()(const char* substring)
261  {
262  const size_t substring_length = strlen(substring);
263  for(;;)
264  {
265  char* substring_pos = strstr(m_string, substring);
266  if(!substring_pos)
267  break;
268  const size_t substring_ofs = substring_pos - m_string;
269  const size_t num_chars = m_max_chars - substring_ofs - substring_length;
270  memmove(substring_pos, substring_pos+substring_length, num_chars);
271  }
272  }
273 
274 private:
275  char* m_string;
276  size_t m_max_chars;
277 };
278 
279 // 3 calls x 4 registers x 4 bytes = 48 + 0-terminator
280 static char identifierString[48+1];
281 
283 {
284  // get brand string (if available)
285  char* pos = identifierString;
286  bool gotBrandString = true;
287  for(u32 function = 0x80000002; function <= 0x80000004; function++)
288  {
289  CpuidRegs regs = { 0 };
290  regs.eax = function;
291  gotBrandString &= cpuid(&regs);
292  memcpy(pos, &regs, 16);
293  pos += 16;
294  }
295 
296  // fall back to manual detect of CPU type because either:
297  // - CPU doesn't support brand string (we use a flag to indicate this
298  // rather than comparing against a default value because it is safer);
299  // - the brand string is useless, e.g. "Unknown". this happens on
300  // some older boards whose BIOS reprograms the string for CPUs it
301  // doesn't recognize.
302  if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
303  {
304  const size_t family = Family();
305  const size_t model = Model();
306  switch(Vendor())
307  {
308  case x86_x64::VENDOR_AMD:
309  // everything else is either too old, or should have a brand string.
310  if(family == 6)
311  {
312  if(model == 3 || model == 7)
314  else if(model <= 5)
316  else
317  {
320  else
322  }
323  }
324  break;
325 
327  // everything else is either too old, or should have a brand string.
328  if(family == 6)
329  {
330  if(model == 1)
331  strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium Pro");
332  else if(model == 3 || model == 5)
333  strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium II");
334  else if(model == 6)
336  else
337  strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium III");
338  }
339  break;
340 
341  default:
342  strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Unknown, non-Intel/AMD");
343  break;
344  }
345  }
346  // identifierString already holds a valid brand string; pretty it up.
347  else
348  {
349  const char* const undesiredStrings[] = { "(tm)", "(TM)", "(R)", "CPU ", " " };
350  std::for_each(undesiredStrings, undesiredStrings+ARRAY_SIZE(undesiredStrings),
352 
353  // note: Intel brand strings include a frequency, but we can't rely
354  // on it because the CPU may be overclocked. we'll leave it in the
355  // string to show measurement accuracy and if SpeedStep is active.
356  }
357 
358  return INFO::OK;
359 }
360 
361 static const char* IdentifierString()
362 {
363  static ModuleInitState initState;
364  ModuleInit(&initState, InitIdentifierString);
365  return identifierString;
366 }
367 
368 
369 //-----------------------------------------------------------------------------
370 // miscellaneous stateless functions
371 
372 #if !MSC_VERSION // ensure not already defined in header
374 {
375 #if GCC_VERSION
376  // GCC supports "portable" assembly for both x86 and x64
377  volatile u32 lo, hi;
378  __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
379  return u64_from_u32(hi, lo);
380 #endif
381 }
382 #endif
383 
384 
386 {
387 #if MSC_VERSION
388  __debugbreak();
389 #elif GCC_VERSION
390  // note: this probably isn't necessary, since unix_debug_break
391  // (SIGTRAP) is most probably available if GCC_VERSION.
392  // we include it for completeness, though.
393  __asm__ __volatile__ ("int $3");
394 #endif
395 }
396 
397 
398 //-----------------------------------------------------------------------------
399 // CPU frequency
400 
401 // set scheduling priority and restore when going out of scope.
403 {
404 public:
405  ScopedSetPriority(int newPriority)
406  {
407  // get current scheduling policy and priority
409 
410  // set new priority
411  sched_param newParam = {0};
412  newParam.sched_priority = newPriority;
414  }
415 
417  {
418  // restore previous policy and priority.
420  }
421 
422 private:
425 };
426 
427 // note: this function uses timer.cpp!timer_Time, which is implemented via
428 // whrt.cpp on Windows.
430 {
431  // if the TSC isn't available, there's really no good way to count the
432  // actual CPU clocks per known time interval, so bail.
433  // note: loop iterations ("bogomips") are not a reliable measure due
434  // to differing IPC and compiler optimizations.
435  if(!Cap(x86_x64::CAP_TSC))
436  return -1.0; // impossible value
437 
438  // increase priority to reduce interference while measuring.
439  const int priority = sched_get_priority_max(SCHED_FIFO)-1;
440  ScopedSetPriority ssp(priority);
441 
442  // note: no need to "warm up" cpuid - it will already have been
443  // called several times by the time this code is reached.
444  // (background: it's used in rdtsc() to serialize instruction flow;
445  // the first call is documented to be slower on Intel CPUs)
446 
447  size_t numSamples = 16;
448  // if clock is low-res, do less samples so it doesn't take too long.
449  // balance measuring time (~ 10 ms) and accuracy (< 0.1% error -
450  // ok for using the TSC as a time reference)
451  if(timer_Resolution() >= 1e-3)
452  numSamples = 8;
453  std::vector<double> samples(numSamples);
454 
455  for(size_t i = 0; i < numSamples; i++)
456  {
457  double dt;
458  i64 dc; // (i64 instead of u64 for faster conversion to double)
459 
460  // count # of clocks in max{1 tick, 1 ms}:
461  // .. wait for start of tick.
462  const double t0 = timer_Time();
463  u64 c1; double t1;
464  do
465  {
466  // note: timer_Time effectively has a long delay (up to 5 us)
467  // before returning the time. we call it before rdtsc to
468  // minimize the delay between actually sampling time / TSC,
469  // thus decreasing the chance for interference.
470  // (if unavoidable background activity, e.g. interrupts,
471  // delays the second reading, inaccuracy is introduced).
472  t1 = timer_Time();
473  c1 = rdtsc();
474  }
475  while(t1 == t0);
476  // .. wait until start of next tick and at least 1 ms elapsed.
477  do
478  {
479  const double t2 = timer_Time();
480  const u64 c2 = rdtsc();
481  dc = (i64)(c2 - c1);
482  dt = t2 - t1;
483  }
484  while(dt < 1e-3);
485 
486  // .. freq = (delta_clocks) / (delta_seconds);
487  // rdtsc/timer overhead is negligible.
488  const double freq = dc / dt;
489  samples[i] = freq;
490  }
491 
492  std::sort(samples.begin(), samples.end());
493 
494  // median filter (remove upper and lower 25% and average the rest).
495  // note: don't just take the lowest value! it could conceivably be
496  // too low, if background processing delays reading c1 (see above).
497  double sum = 0.0;
498  const size_t lo = numSamples/4, hi = 3*numSamples/4;
499  for(size_t i = lo; i < hi; i++)
500  sum += samples[i];
501 
502  const double clockFrequency = sum / (hi-lo);
503  return clockFrequency;
504 }
505 
506 } // namespace x86_x64
507 
508 
509 const char* cpu_IdentifierString()
510 {
511  return x86_x64::IdentifierString();
512 }
int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param *param)
Definition: wpthread.cpp:104
const char * cpu_IdentifierString()
Definition: arm.cpp:46
int sched_priority
Definition: wpthread.h:37
static ModuleInitState signatureInitState
Definition: x86_x64.cpp:213
const Status OK
Definition: status.h:386
static u32 cpuid_maxFunction
Definition: x86_x64.cpp:80
static ModuleInitState capsInitState
Definition: x86_x64.cpp:121
void operator()(const char *substring)
Definition: x86_x64.cpp:260
Vendors Vendor()
Definition: x86_x64.cpp:200
static ModuleInitState initState
Definition: h_mgr.cpp:742
static size_t family
Definition: x86_x64.cpp:212
#define ARRAY_SIZE(name)
pthread_t pthread_self()
Definition: wpthread.cpp:74
StringStripper(char *string, size_t max_chars)
Definition: x86_x64.cpp:254
static char identifierString[48+1]
Definition: x86_x64.cpp:280
intptr_t ModuleInitState
initialization state of a module (class, source file, etc.) must be initialized to zero (e...
Definition: module_init.h:35
bool cpuid(CpuidRegs *regs)
invoke CPUID instruction.
Definition: x86_x64.cpp:98
Caps
bit indices of CPU capability flags (128 bits).
Definition: x86_x64.h:111
functor to remove substrings from the CPU identifier string
Definition: x86_x64.cpp:251
bool IsBitSet(T value, size_t index)
Definition: bits.h:54
double ClockFrequency()
measure the CPU clock frequency via rdtsc and timer_Time.
Definition: x86_x64.cpp:429
void DebugBreak()
trigger a breakpoint inside this function when it is called.
Definition: x86_x64.cpp:385
const Status INVALID_PARAM
Definition: status.h:423
void GetCapBits(u32 *d0, u32 *d1, u32 *d2, u32 *d3)
Definition: x86_x64.cpp:156
static Status InitIdentifierString()
Definition: x86_x64.cpp:282
i64 Status
Error handling system.
Definition: status.h:171
T bits(T num, size_t lo_idx, size_t hi_idx)
extract the value of bits hi_idx:lo_idx within num
Definition: bits.h:97
double timer_Time()
Definition: timer.cpp:98
const Status CPU_FEATURE_MISSING
Definition: cpu.h:35
int pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
Definition: wpthread.cpp:88
#define DEBUG_WARN_ERR(status)
display the error dialog with text corresponding to the given error code.
Definition: debug.h:331
const Status CPU_UNKNOWN_VENDOR
Definition: cpu.h:37
#define u64
Definition: types.h:42
bool Cap(Caps cap)
Definition: x86_x64.cpp:142
#define i64
Definition: types.h:37
#define u32
Definition: types.h:41
static void Invoke_cpuid(CpuidRegs *regs)
Definition: x86_x64.cpp:73
double timer_Resolution()
Definition: timer.cpp:145
static size_t model
Definition: x86_x64.cpp:211
static Vendors vendor
Definition: x86_x64.cpp:170
size_t Family()
Definition: x86_x64.cpp:238
static u32 cpuid_maxExtendedFunction
Definition: x86_x64.cpp:81
u64 u64_from_u32(u32 hi, u32 lo)
return lower 16-bits
Definition: lib.cpp:65
static Status InitCaps()
Definition: x86_x64.cpp:123
#define sched_get_priority_max(policy)
Definition: wpthread.h:48
registers used/returned by cpuid
Definition: x86_x64.h:46
int strcpy_s(char *dst, size_t max_dst_chars, const char *src)
u64 rdtsc()
Definition: x86_x64.cpp:373
#define cassert(expr)
Compile-time assertion.
ScopedSetPriority(int newPriority)
Definition: x86_x64.cpp:405
static Status InitSignature()
Definition: x86_x64.cpp:215
Vendors
CPU vendor.
Definition: x86_x64.h:72
static const char * IdentifierString()
Definition: x86_x64.cpp:361
Status ModuleInit(volatile ModuleInitState *initState, Status(*init)())
calls a user-defined init function if initState is zero.
Definition: module_init.cpp:40
static Status InitCpuid()
Definition: x86_x64.cpp:83
size_t Model()
Definition: x86_x64.cpp:232
static u32 caps[4]
Definition: x86_x64.cpp:119
static Status InitVendor()
Definition: x86_x64.cpp:172