Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
timer.h
Go to the documentation of this file.
1 /* Copyright (c) 2010 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * platform-independent high resolution timer
25  */
26 
27 #ifndef INCLUDED_TIMER
28 #define INCLUDED_TIMER
29 
30 #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC
31 #include "lib/sysdep/cpu.h" // cpu_AtomicAdd
32 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
33 # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
34 # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
35 #endif
36 
37 
38 /**
39  * timer_Time will subsequently return values relative to the current time.
40  **/
41 LIB_API void timer_LatchStartTime();
42 
43 /**
44  * @return high resolution (> 1 us) timestamp [s].
45  **/
46 LIB_API double timer_Time();
47 
48 /**
49  * @return resolution [s] of the timer.
50  **/
51 LIB_API double timer_Resolution();
52 
53 
54 // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting)
55 #if ARCH_AMD64
56 typedef intptr_t Cycles;
57 #else
58 typedef i64 Cycles;
59 #endif
60 
61 /**
62  * internal helper functions for returning an easily readable
63  * string (i.e. re-scaled to appropriate units)
64  **/
65 LIB_API std::wstring StringForSeconds(double seconds);
66 LIB_API std::wstring StringForCycles(Cycles cycles);
67 
68 
69 //-----------------------------------------------------------------------------
70 // scope timing
71 
72 /// used by TIMER
74 {
76 public:
77  ScopeTimer(const wchar_t* description)
78  : m_t0(timer_Time()), m_description(description)
79  {
80  }
81 
83  {
84  const double t1 = timer_Time();
85  const std::wstring elapsedTimeString = StringForSeconds(t1-m_t0);
86  debug_printf(L"TIMER| %ls: %ls\n", m_description, elapsedTimeString.c_str());
87  }
88 
89 private:
90  double m_t0;
91  const wchar_t* m_description;
92 };
93 
94 /**
95  * Measures the time taken to execute code up until end of the current scope;
96  * displays it via debug_printf. Can safely be nested.
97  * Useful for measuring time spent in a function or basic block.
98  * <description> must remain valid over the lifetime of this object;
99  * a string literal is safest.
100  *
101  * Example usage:
102  * void func()
103  * {
104  * TIMER(L"description");
105  * // code to be measured
106  * }
107  **/
108 #define TIMER(description) ScopeTimer UID__(description)
109 
110 /**
111  * Measures the time taken to execute code between BEGIN and END markers;
112  * displays it via debug_printf. Can safely be nested.
113  * Useful for measuring several pieces of code within the same function/block.
114  * <description> must remain valid over the lifetime of this object;
115  * a string literal is safest.
116  *
117  * Caveats:
118  * - this wraps the code to be measured in a basic block, so any
119  * variables defined there are invisible to surrounding code.
120  * - the description passed to END isn't inspected; you are responsible for
121  * ensuring correct nesting!
122  *
123  * Example usage:
124  * void func2()
125  * {
126  * // uninteresting code
127  * TIMER_BEGIN(L"description2");
128  * // code to be measured
129  * TIMER_END(L"description2");
130  * // uninteresting code
131  * }
132  **/
133 #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
134 #define TIMER_END(description) }
135 
136 
137 //-----------------------------------------------------------------------------
138 // cumulative timer API
139 
140 // this supplements in-game profiling by providing low-overhead,
141 // high resolution time accounting of specific areas.
142 
143 // since TIMER_ACCRUE et al. are called so often, we try to keep
144 // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
145 // returned by x86_x64::rdtsc) instead of absolute time has two benefits:
146 // - no need to convert from raw->time on every call
147 // (instead, it's only done once when displaying the totals)
148 // - possibly less overhead to querying the time itself
149 // (timer_Time may be using slower time sources with ~3us overhead)
150 //
151 // however, the cycle count is not necessarily a measure of wall-clock time
152 // (see http://www.gamedev.net/reference/programming/features/timing).
153 // therefore, on systems with SpeedStep active, measurements of I/O or other
154 // non-CPU bound activity may be skewed. this is ok because the timer is
155 // only used for profiling; just be aware of the issue.
156 // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC.
157 //
158 // note that overflow isn't an issue either way (63 bit cycle counts
159 // at 10 GHz cover intervals of 29 years).
160 
161 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
162 
163 class TimerUnit
164 {
165 public:
166  void SetToZero()
167  {
168  m_cycles = 0;
169  }
170 
171  void SetFromTimer()
172  {
173  m_cycles = x86_x64::rdtsc();
174  }
175 
176  void AddDifference(TimerUnit t0, TimerUnit t1)
177  {
178  m_cycles += t1.m_cycles - t0.m_cycles;
179  }
180 
182  {
183  const Cycles delta = t1.m_cycles - t0.m_cycles;
184 #if ARCH_AMD64
185  cpu_AtomicAdd(&m_cycles, delta);
186 #elif ARCH_IA32
187 retry:
188  if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
189  goto retry;
190 #else
191 # error "port"
192 #endif
193  }
194 
195  void Subtract(TimerUnit t)
196  {
197  m_cycles -= t.m_cycles;
198  }
199 
200  std::wstring ToString() const
201  {
202  ENSURE(m_cycles >= 0);
203  return StringForCycles(m_cycles);
204  }
205 
206  double ToSeconds() const
207  {
208  return (double)m_cycles / os_cpu_ClockFrequency();
209  }
210 
211 private:
212  Cycles m_cycles;
213 };
214 
215 #else
216 
218 {
219 public:
220  void SetToZero()
221  {
222  m_seconds = 0.0;
223  }
224 
226  {
227  m_seconds = timer_Time();
228  }
229 
231  {
232  m_seconds += t1.m_seconds - t0.m_seconds;
233  }
234 
236  {
237 retry:
238  i64 oldRepresentation;
239  memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation));
240 
241  const double seconds = m_seconds + t1.m_seconds - t0.m_seconds;
242  i64 newRepresentation;
243  memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
244 
245  if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
246  goto retry;
247  }
248 
250  {
251  m_seconds -= t.m_seconds;
252  }
253 
254  std::wstring ToString() const
255  {
256  ENSURE(m_seconds >= 0.0);
257  return StringForSeconds(m_seconds);
258  }
259 
260  double ToSeconds() const
261  {
262  return m_seconds;
263  }
264 
265 private:
266  double m_seconds;
267 };
268 
269 #endif
270 
271 // opaque - do not access its fields!
272 // note: must be defined here because clients instantiate them;
273 // fields cannot be made private due to POD requirement.
275 {
276  TimerUnit sum; // total bill
277 
278  // only store a pointer for efficiency.
279  const wchar_t* description;
280 
282 
283  // how often the timer was billed (helps measure relative
284  // performance of something that is done indeterminately often).
285  intptr_t num_calls;
286 };
287 
288 /**
289  * make the given TimerClient (usually instantiated as static data)
290  * ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
291  * this client's total (which is increased by a BillingPolicy) will be
292  * displayed by timer_DisplayClientTotals.
293  * notes:
294  * - may be called at any time;
295  * - always succeeds (there's no fixed limit);
296  * - free() is not needed nor possible.
297  * - description must remain valid until exit; a string literal is safest.
298  **/
299 LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description);
300 
301 /**
302  * "allocate" a new TimerClient that will keep track of the total time
303  * billed to it, along with a description string. These are displayed when
304  * timer_DisplayClientTotals is called.
305  * Invoke this at file or function scope; a (static) TimerClient pointer of
306  * name <id> will be defined, which should be passed to TIMER_ACCRUE.
307  **/
308 #define TIMER_ADD_CLIENT(id)\
309  static TimerClient UID__;\
310  static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id))
311 
312 /**
313  * bill the difference between t0 and t1 to the client's total.
314  **/
316 {
317  void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
318  {
319  tc->sum.AddDifference(t0, t1);
320  tc->num_calls++;
321  }
322 };
323 
324 /**
325  * thread-safe (not used by default due to its higher overhead)
326  * note: we can't just use thread-local variables to avoid
327  * synchronization overhead because we don't have control over all
328  * threads (for accumulating their separate timer copies).
329  **/
331 {
332  void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
333  {
334  tc->sum.AddDifferenceAtomic(t0, t1);
335  cpu_AtomicAdd(&tc->num_calls, +1);
336  }
337 };
338 
339 /**
340  * display all clients' totals; does not reset them.
341  * typically called at exit.
342  **/
343 LIB_API void timer_DisplayClientTotals();
344 
345 
346 /// used by TIMER_ACCRUE
347 template<class BillingPolicy = BillingPolicy_Default>
349 {
351 public:
353  : m_tc(tc)
354  {
355  m_t0.SetFromTimer();
356  }
357 
359  {
360  TimerUnit t1;
361  t1.SetFromTimer();
362  BillingPolicy()(m_tc, m_t0, t1);
363  }
364 
365 private:
368 };
369 
370 /**
371  * Measure the time taken to execute code up until end of the current scope;
372  * bill it to the given TimerClient object. Can safely be nested.
373  * Useful for measuring total time spent in a function or basic block over the
374  * entire program.
375  * `client' is an identifier registered via TIMER_ADD_CLIENT.
376  *
377  * Example usage:
378  * TIMER_ADD_CLIENT(client);
379  *
380  * void func()
381  * {
382  * TIMER_ACCRUE(client);
383  * // code to be measured
384  * }
385  *
386  * [later or at exit]
387  * timer_DisplayClientTotals();
388  **/
389 #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
390 #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
391 
392 #endif // #ifndef INCLUDED_TIMER
NONCOPYABLE(ScopeTimerAccrue)
TimerClient * m_tc
Definition: timer.h:367
double os_cpu_ClockFrequency()
Definition: os_cpu.cpp:43
TimerUnit m_t0
Definition: timer.h:366
ScopeTimer(const wchar_t *description)
Definition: timer.h:77
void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
Definition: timer.h:235
TimerClient * next
Definition: timer.h:281
void operator()(TimerClient *tc, TimerUnit t0, TimerUnit t1) const
Definition: timer.h:317
TimerClient * timer_AddClient(TimerClient *tc, const wchar_t *description)
make the given TimerClient (usually instantiated as static data) ready for use.
Definition: timer.cpp:166
intptr_t cpu_AtomicAdd(volatile intptr_t *location, intptr_t increment)
add a signed value to a variable without the possibility of interference from other threads/CPUs...
Definition: arm.cpp:31
NONCOPYABLE(ScopeTimer)
bill the difference between t0 and t1 to the client&#39;s total.
Definition: timer.h:315
void AddDifference(TimerUnit t0, TimerUnit t1)
Definition: timer.h:230
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
thread-safe (not used by default due to its higher overhead) note: we can&#39;t just use thread-local var...
Definition: timer.h:330
void timer_LatchStartTime()
timer_Time will subsequently return values relative to the current time.
Definition: timer.cpp:74
double ToSeconds() const
Definition: timer.h:260
TimerUnit sum
Definition: timer.h:276
used by TIMER
Definition: timer.h:73
void operator()(TimerClient *tc, TimerUnit t0, TimerUnit t1) const
Definition: timer.h:332
double timer_Time()
Definition: timer.cpp:98
void SetFromTimer()
Definition: timer.h:225
const wchar_t * m_description
Definition: timer.h:91
double m_t0
Definition: timer.h:90
bool cpu_CAS64(volatile i64 *location, i64 expected, i64 newValue)
Definition: arm.cpp:41
#define i64
Definition: types.h:37
double timer_Resolution()
Definition: timer.cpp:145
i64 Cycles
Definition: timer.h:58
const wchar_t * description
Definition: timer.h:279
void Subtract(TimerUnit t)
Definition: timer.h:249
used by TIMER_ACCRUE
Definition: timer.h:348
void SetToZero()
Definition: timer.h:220
double m_seconds
Definition: timer.h:266
~ScopeTimerAccrue()
Definition: timer.h:358
u64 rdtsc()
Definition: x86_x64.cpp:373
~ScopeTimer()
Definition: timer.h:82
std::wstring ToString() const
Definition: timer.h:254
std::wstring StringForSeconds(double seconds)
internal helper functions for returning an easily readable string (i.e.
Definition: timer.cpp:204
std::wstring StringForCycles(Cycles cycles)
Definition: timer.cpp:220
intptr_t num_calls
Definition: timer.h:285
ScopeTimerAccrue(TimerClient *tc)
Definition: timer.h:352
void debug_printf(const wchar_t *fmt,...)
write a formatted string to the debug channel, subject to filtering (see below).
Definition: debug.cpp:142
void timer_DisplayClientTotals()
display all clients&#39; totals; does not reset them.
Definition: timer.cpp:181