Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Profiler2GPU.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2011 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "precompiled.h"
24 
25 #include "Profiler2GPU.h"
26 
27 #include "lib/ogl.h"
29 #include "ps/ConfigDB.h"
30 #include "ps/Profiler2.h"
31 
32 #if !CONFIG2_GLES
33 
35 {
37 
38 protected:
39  CProfiler2GPU_base(CProfiler2& profiler, const char* name) :
40  m_Profiler(profiler), m_Storage(profiler, name)
41  {
44 
46  }
47 
49  {
51  }
52 
55 };
56 
57 //////////////////////////////////////////////////////////////////////////
58 
59 // Base class for ARB_timer_query, EXT_timer_query
61 {
62 protected:
63  CProfiler2GPU_timer_query(CProfiler2& profiler, const char* name) :
64  CProfiler2GPU_base(profiler, name)
65  {
66  }
67 
69  {
70  if (!m_FreeQueries.empty())
71  pglDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
73  }
74 
75  // Returns a new GL query object (or a recycled old one)
76  GLuint NewQuery()
77  {
78  if (m_FreeQueries.empty())
79  {
80  // Generate a batch of new queries
81  m_FreeQueries.resize(8);
82  pglGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
84  }
85 
86  GLuint query = m_FreeQueries.back();
87  m_FreeQueries.pop_back();
88  return query;
89  }
90 
91  std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
92 };
93 
94 //////////////////////////////////////////////////////////////////////////
95 
96 /*
97  * GL_ARB_timer_query supports sync and async queries for absolute GPU
98  * timestamps, which lets us time regions of code relative to the CPU.
99  * At the start of a frame, we record the CPU time and sync GPU timestamp,
100  * giving the time-vs-timestamp offset.
101  * At each enter/leave-region event, we do an async GPU timestamp query.
102  * When all the queries for a frame have their results available,
103  * we convert their GPU timestamps into CPU times and record the data.
104  */
106 {
107  struct SEvent
108  {
109  const char* id;
110  GLuint query;
111  bool isEnter; // true if entering region; false if leaving
112  };
113 
114  struct SFrame
115  {
117 
118  double syncTimeStart; // CPU time at start of maybe this frame or a recent one
119  GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
120 
121  std::vector<SEvent> events;
122  };
123 
124  std::deque<SFrame> m_Frames;
125 
126 public:
127  static bool IsSupported()
128  {
129  return ogl_HaveExtension("GL_ARB_timer_query");
130  }
131 
133  CProfiler2GPU_timer_query(profiler, "gpu_arb")
134  {
135  // TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
136  // high enough (but apparently it might trigger GL errors on ATI)
137  }
138 
140  {
141  // Pop frames to return queries to the free list
142  while (!m_Frames.empty())
143  PopFrontFrame();
144  }
145 
146  void FrameStart()
147  {
148  ProcessFrames();
149 
150  SFrame frame;
151  frame.num = m_Profiler.GetFrameNumber();
152 
153  // On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
154  // vsync enabled and not CPU-bound, the first glGet* call at the start
155  // of a frame appears to trigger a wait (to stop the GPU getting too
156  // far behind, or to wait for the vsync period).
157  // That will be this GL_TIMESTAMP get, which potentially distorts the
158  // reported results. So we'll only do it fairly rarely, and for most
159  // frames we'll just assume the clocks don't drift much
160 
161  const double RESYNC_PERIOD = 1.0; // seconds
162 
163  double now = m_Profiler.GetTime();
164 
165  if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
166  {
167  PROFILE2("profile timestamp resync");
168 
169  pglGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
170  ogl_WarnIfError();
171 
172  frame.syncTimeStart = m_Profiler.GetTime();
173  // (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
174  // might wait a while before returning its now-current timestamp)
175  }
176  else
177  {
178  // Reuse the previous frame's sync data
179  frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
180  frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
181  }
182 
183  m_Frames.push_back(frame);
184 
185  RegionEnter("frame");
186  }
187 
188  void FrameEnd()
189  {
190  RegionLeave("frame");
191  }
192 
193  void RecordRegion(const char* id, bool isEnter)
194  {
195  ENSURE(!m_Frames.empty());
196  SFrame& frame = m_Frames.back();
197 
198  SEvent event;
199  event.id = id;
200  event.query = NewQuery();
201  event.isEnter = isEnter;
202 
203  pglQueryCounter(event.query, GL_TIMESTAMP);
204  ogl_WarnIfError();
205 
206  frame.events.push_back(event);
207  }
208 
209  void RegionEnter(const char* id)
210  {
211  RecordRegion(id, true);
212  }
213 
214  void RegionLeave(const char* id)
215  {
216  RecordRegion(id, false);
217  }
218 
219 private:
220 
222  {
223  while (!m_Frames.empty())
224  {
225  SFrame& frame = m_Frames.front();
226 
227  // Queries become available in order so we only need to check the last one
228  GLint available = 0;
229  pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
230  ogl_WarnIfError();
231  if (!available)
232  break;
233 
234  // The frame's queries are now available, so retrieve and record all their results:
235 
236  for (size_t i = 0; i < frame.events.size(); ++i)
237  {
238  GLuint64 queryTimestamp = 0;
239  pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
240  // (use the non-suffixed function here, as defined by GL_ARB_timer_query)
241  ogl_WarnIfError();
242 
243  // Convert to absolute CPU-clock time
244  double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
245 
246  // Record a frame-start for syncing
247  if (i == 0)
249 
250  if (frame.events[i].isEnter)
252  else
254 
255  // Associate the frame number with the "frame" region
256  if (i == 0)
257  m_Storage.RecordAttributePrintf("%u", frame.num);
258  }
259 
260  PopFrontFrame();
261  }
262  }
263 
265  {
266  ENSURE(!m_Frames.empty());
267  SFrame& frame = m_Frames.front();
268  for (size_t i = 0; i < frame.events.size(); ++i)
269  m_FreeQueries.push_back(frame.events[i].query);
270  m_Frames.pop_front();
271  }
272 };
273 
274 //////////////////////////////////////////////////////////////////////////
275 
276 /*
277  * GL_EXT_timer_query only supports async queries for elapsed time,
278  * and only a single simultaneous query.
279  * We can't correctly convert it to absolute time, so we just pretend
280  * each GPU frame starts the same time as the CPU for that frame.
281  * We do a query for elapsed time between every adjacent enter/leave-region event.
282  * When all the queries for a frame have their results available,
283  * we sum the elapsed times to calculate when each event occurs within the
284  * frame, and record the data.
285  */
287 {
288  struct SEvent
289  {
290  const char* id;
291  GLuint query; // query for time elapsed from this event until the next, or 0 for final event
292  bool isEnter; // true if entering region; false if leaving
293  };
294 
295  struct SFrame
296  {
298  double timeStart; // CPU time at frame start
299  std::vector<SEvent> events;
300  };
301 
302  std::deque<SFrame> m_Frames;
303 
304 public:
305  static bool IsSupported()
306  {
307  return ogl_HaveExtension("GL_EXT_timer_query");
308  }
309 
311  CProfiler2GPU_timer_query(profiler, "gpu_ext")
312  {
313  }
314 
316  {
317  // Pop frames to return queries to the free list
318  while (!m_Frames.empty())
319  PopFrontFrame();
320  }
321 
322  void FrameStart()
323  {
324  ProcessFrames();
325 
326  SFrame frame;
327  frame.num = m_Profiler.GetFrameNumber();
328  frame.timeStart = m_Profiler.GetTime();
329 
330  m_Frames.push_back(frame);
331 
332  RegionEnter("frame");
333  }
334 
335  void FrameEnd()
336  {
337  RegionLeave("frame");
338 
339  pglEndQueryARB(GL_TIME_ELAPSED);
340  ogl_WarnIfError();
341  }
342 
343  void RecordRegion(const char* id, bool isEnter)
344  {
345  ENSURE(!m_Frames.empty());
346  SFrame& frame = m_Frames.back();
347 
348  // Must call glEndQuery before calling glGenQueries (via NewQuery),
349  // for compatibility with the GL_EXT_timer_query spec (which says
350  // GL_INVALID_OPERATION if a query of any target is active; the ARB
351  // spec and OpenGL specs don't appear to say that, but the AMD drivers
352  // implement that error (see Trac #1033))
353 
354  if (!frame.events.empty())
355  {
356  pglEndQueryARB(GL_TIME_ELAPSED);
357  ogl_WarnIfError();
358  }
359 
360  SEvent event;
361  event.id = id;
362  event.query = NewQuery();
363  event.isEnter = isEnter;
364 
365  pglBeginQueryARB(GL_TIME_ELAPSED, event.query);
366  ogl_WarnIfError();
367 
368  frame.events.push_back(event);
369  }
370 
371  void RegionEnter(const char* id)
372  {
373  RecordRegion(id, true);
374  }
375 
376  void RegionLeave(const char* id)
377  {
378  RecordRegion(id, false);
379  }
380 
381 private:
383  {
384  while (!m_Frames.empty())
385  {
386  SFrame& frame = m_Frames.front();
387 
388  // Queries become available in order so we only need to check the last one
389  GLint available = 0;
390  pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
391  ogl_WarnIfError();
392  if (!available)
393  break;
394 
395  // The frame's queries are now available, so retrieve and record all their results:
396 
397  double t = frame.timeStart;
399 
400  for (size_t i = 0; i < frame.events.size(); ++i)
401  {
402  if (frame.events[i].isEnter)
404  else
406 
407  // Associate the frame number with the "frame" region
408  if (i == 0)
409  m_Storage.RecordAttributePrintf("%u", frame.num);
410 
411  // Advance by the elapsed time to the next event
412  GLuint64 queryElapsed = 0;
413  pglGetQueryObjectui64vEXT(frame.events[i].query, GL_QUERY_RESULT, &queryElapsed);
414  // (use the EXT-suffixed function here, as defined by GL_EXT_timer_query)
415  ogl_WarnIfError();
416  t += (double)queryElapsed / 1e9;
417  }
418 
419  PopFrontFrame();
420  }
421  }
422 
424  {
425  ENSURE(!m_Frames.empty());
426  SFrame& frame = m_Frames.front();
427  for (size_t i = 0; i < frame.events.size(); ++i)
428  m_FreeQueries.push_back(frame.events[i].query);
429  m_Frames.pop_front();
430  }
431 };
432 
433 //////////////////////////////////////////////////////////////////////////
434 
435 /*
436  * GL_INTEL_performance_queries is not officially documented
437  * (see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
438  * but it's potentially useful so we'll support it anyway.
439  * It supports async queries giving elapsed time plus a load of other
440  * counters that we'd like to use, and supports many simultaneous queries
441  * (unlike GL_EXT_timer_query).
442  * There are multiple query types (typically 2), each with its own set of
443  * multiple counters.
444  * On each enter-region event, we start a new set of queries.
445  * On each leave-region event, we end the corresponding set of queries.
446  * We can't tell the offsets between the enter events of nested regions,
447  * so we pretend they all got entered at the same time.
448  */
450 {
451  struct SEvent
452  {
453  const char* id;
454  bool isEnter;
455  std::vector<GLuint> queries; // if isEnter, one per SPerfQueryType; else empty
456  };
457 
458  struct SFrame
459  {
461  double timeStart; // CPU time at frame start
462  std::vector<SEvent> events;
463  std::vector<size_t> activeRegions; // stack of indexes into events
464  };
465 
466  std::deque<SFrame> m_Frames;
467 
468  // Counters listed by the graphics driver for a particular query type
470  {
471  std::string name;
472  std::string desc;
473  GLuint offset;
474  GLuint size;
475  GLuint type;
476  };
477 
478  // Query types listed by the graphics driver
480  {
481  GLuint queryTypeId;
482  std::string name;
484  std::vector<SPerfCounter> counters;
485 
486  std::vector<GLuint> freeQueries; // query objects that are allocated but not currently in use
487  };
488 
489  std::vector<SPerfQueryType> m_QueryTypes;
490 
491  #define INTEL_PERFQUERIES_NONBLOCK 0x83FA
492  #define INTEL_PERFQUERIES_BLOCK 0x83FB
493  #define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT 0x9402
494  #define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64 0x9403
495  #define INTEL_PERFQUERIES_TYPE_FLOAT 0x9404
496  #define INTEL_PERFQUERIES_TYPE_BOOL 0x9406
497 
498 public:
499  static bool IsSupported()
500  {
501  return ogl_HaveExtension("GL_INTEL_performance_queries");
502  }
503 
505  CProfiler2GPU_base(profiler, "gpu_intel")
506  {
508  }
509 
511  {
512  // Pop frames to return queries to the free list
513  while (!m_Frames.empty())
514  PopFrontFrame();
515 
516  for (size_t i = 0; i < m_QueryTypes.size(); ++i)
517  for (size_t j = 0; j < m_QueryTypes[i].freeQueries.size(); ++j)
518  pglDeletePerfQueryINTEL(m_QueryTypes[i].freeQueries[j]);
519 
520  ogl_WarnIfError();
521  }
522 
523  void FrameStart()
524  {
525  ProcessFrames();
526 
527  SFrame frame;
528  frame.num = m_Profiler.GetFrameNumber();
529  frame.timeStart = m_Profiler.GetTime();
530 
531  m_Frames.push_back(frame);
532 
533  RegionEnter("frame");
534  }
535 
536  void FrameEnd()
537  {
538  RegionLeave("frame");
539  }
540 
541  void RegionEnter(const char* id)
542  {
543  ENSURE(!m_Frames.empty());
544  SFrame& frame = m_Frames.back();
545 
546  SEvent event;
547  event.id = id;
548  event.isEnter = true;
549 
550  for (size_t i = 0; i < m_QueryTypes.size(); ++i)
551  {
552  GLuint id = NewQuery(i);
553  pglBeginPerfQueryINTEL(id);
554  ogl_WarnIfError();
555  event.queries.push_back(id);
556  }
557 
558  frame.activeRegions.push_back(frame.events.size());
559 
560  frame.events.push_back(event);
561  }
562 
563  void RegionLeave(const char* id)
564  {
565  ENSURE(!m_Frames.empty());
566  SFrame& frame = m_Frames.back();
567 
568  ENSURE(!frame.activeRegions.empty());
569  SEvent& activeEvent = frame.events[frame.activeRegions.back()];
570 
571  for (size_t i = 0; i < m_QueryTypes.size(); ++i)
572  {
573  pglEndPerfQueryINTEL(activeEvent.queries[i]);
574  ogl_WarnIfError();
575  }
576 
577  frame.activeRegions.pop_back();
578 
579  SEvent event;
580  event.id = id;
581  event.isEnter = false;
582  frame.events.push_back(event);
583  }
584 
585 private:
586  GLuint NewQuery(size_t queryIdx)
587  {
588  ENSURE(queryIdx < m_QueryTypes.size());
589 
590  if (m_QueryTypes[queryIdx].freeQueries.empty())
591  {
592  GLuint id;
593  pglCreatePerfQueryINTEL(m_QueryTypes[queryIdx].queryTypeId, &id);
594  ogl_WarnIfError();
595  return id;
596  }
597 
598  GLuint id = m_QueryTypes[queryIdx].freeQueries.back();
599  m_QueryTypes[queryIdx].freeQueries.pop_back();
600  return id;
601  }
602 
604  {
605  while (!m_Frames.empty())
606  {
607  SFrame& frame = m_Frames.front();
608 
609  // Queries don't become available in order, so check them all before
610  // trying to read the results from any
611  for (size_t j = 0; j < m_QueryTypes.size(); ++j)
612  {
613  size_t size = m_QueryTypes[j].counterBufferSize;
614  shared_ptr<char> buf(new char[size], ArrayDeleter());
615 
616  for (size_t i = 0; i < frame.events.size(); ++i)
617  {
618  if (!frame.events[i].isEnter)
619  continue;
620 
621  GLuint length = 0;
622  pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buf.get(), &length);
623  ogl_WarnIfError();
624  if (length == 0)
625  return;
626  }
627  }
628 
629  double lastTime = frame.timeStart;
630  std::stack<double> endTimes;
631 
633 
634  for (size_t i = 0; i < frame.events.size(); ++i)
635  {
636  if (frame.events[i].isEnter)
637  {
638  m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id);
639 
640  if (i == 0)
641  m_Storage.RecordAttributePrintf("%u", frame.num);
642 
643  double elapsed = 0.0;
644 
645  for (size_t j = 0; j < m_QueryTypes.size(); ++j)
646  {
647  GLuint length;
648  char* buf = new char[m_QueryTypes[j].counterBufferSize];
649  pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buf, &length);
650  ogl_WarnIfError();
651  ENSURE(length == m_QueryTypes[j].counterBufferSize);
652 
653  m_Storage.RecordAttributePrintf("-- %s --", m_QueryTypes[j].name.c_str());
654 
655  for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k)
656  {
657  SPerfCounter& counter = m_QueryTypes[j].counters[k];
658 
660  {
661  ENSURE(counter.size == 4);
662  GLuint value = 0;
663  memcpy(&value, buf + counter.offset, counter.size);
664  m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value);
665  }
666  else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64)
667  {
668  ENSURE(counter.size == 8);
669  GLuint64 value = 0;
670  memcpy(&value, buf + counter.offset, counter.size);
671  m_Storage.RecordAttributePrintf("%s: %.0f", counter.name.c_str(), (double)value);
672 
673  if (counter.name == "TotalTime")
674  elapsed = (double)value / 1e6;
675  }
676  else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT)
677  {
678  ENSURE(counter.size == 4);
679  GLfloat value = 0;
680  memcpy(&value, buf + counter.offset, counter.size);
681  m_Storage.RecordAttributePrintf("%s: %f", counter.name.c_str(), value);
682  }
683  else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL)
684  {
685  ENSURE(counter.size == 4);
686  GLuint value = 0;
687  memcpy(&value, buf + counter.offset, counter.size);
688  ENSURE(value == 0 || value == 1);
689  m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value);
690  }
691  else
692  {
693  debug_warn(L"unrecognised Intel performance counter type");
694  }
695  }
696 
697  delete[] buf;
698  }
699 
700  endTimes.push(lastTime + elapsed);
701  }
702  else
703  {
704  lastTime = endTimes.top();
705  endTimes.pop();
706  m_Storage.Record(CProfiler2::ITEM_LEAVE, lastTime, frame.events[i].id);
707  }
708  }
709 
710  PopFrontFrame();
711  }
712  }
713 
715  {
716  ENSURE(!m_Frames.empty());
717  SFrame& frame = m_Frames.front();
718  for (size_t i = 0; i < frame.events.size(); ++i)
719  if (frame.events[i].isEnter)
720  for (size_t j = 0; j < m_QueryTypes.size(); ++j)
721  m_QueryTypes[j].freeQueries.push_back(frame.events[i].queries[j]);
722  m_Frames.pop_front();
723  }
724 
726  {
727  GLuint queryTypeId;
728  pglGetFirstPerfQueryIdINTEL(&queryTypeId);
729  ogl_WarnIfError();
730  do
731  {
732  char queryName[256];
733  GLuint counterBufferSize, numCounters, maxQueries, unknown;
734  pglGetPerfQueryInfoINTEL(queryTypeId, ARRAY_SIZE(queryName), queryName, &counterBufferSize, &numCounters, &maxQueries, &unknown);
735  ogl_WarnIfError();
736  ENSURE(unknown == 1);
737 
738  SPerfQueryType query;
739  query.queryTypeId = queryTypeId;
740  query.name = queryName;
741  query.counterBufferSize = counterBufferSize;
742 
743  for (GLuint counterId = 1; counterId <= numCounters; ++counterId)
744  {
745  char counterName[256];
746  char counterDesc[2048];
747  GLuint counterOffset, counterSize, counterUsage, counterType;
748  GLuint64 unknown2;
749  pglGetPerfCounterInfoINTEL(queryTypeId, counterId, ARRAY_SIZE(counterName), counterName, ARRAY_SIZE(counterDesc), counterDesc, &counterOffset, &counterSize, &counterUsage, &counterType, &unknown2);
750  ogl_WarnIfError();
751  ENSURE(unknown2 == 0 || unknown2 == 1);
752 
754  counter.name = counterName;
755  counter.desc = counterDesc;
756  counter.offset = counterOffset;
757  counter.size = counterSize;
758  counter.type = counterType;
759  query.counters.push_back(counter);
760  }
761 
762  m_QueryTypes.push_back(query);
763 
764  pglGetNextPerfQueryIdINTEL(queryTypeId, &queryTypeId);
765  ogl_WarnIfError();
766 
767  } while (queryTypeId);
768  }
769 };
770 
771 //////////////////////////////////////////////////////////////////////////
772 
774  m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL)
775 {
776  bool enabledARB = false;
777  bool enabledEXT = false;
778  bool enabledINTEL = false;
779  CFG_GET_VAL("profiler2.gpu.arb.enable", Bool, enabledARB);
780  CFG_GET_VAL("profiler2.gpu.ext.enable", Bool, enabledEXT);
781  CFG_GET_VAL("profiler2.gpu.intel.enable", Bool, enabledINTEL);
782 
783  // Only enable either ARB or EXT, not both, because they are redundant
784  // (EXT is only needed for compatibility with older systems), and because
785  // using both triggers GL_INVALID_OPERATION on AMD drivers (see comment
786  // in CProfiler2GPU_EXT_timer_query::RecordRegion)
787  if (enabledARB && CProfiler2GPU_ARB_timer_query::IsSupported())
788  {
790  }
791  else if (enabledEXT && CProfiler2GPU_EXT_timer_query::IsSupported())
792  {
794  }
795 
796  // The INTEL mode should be compatible with ARB/EXT (though no current
797  // drivers support both), and provides complementary data, so enable it
798  // when possible
800  {
802  }
803 }
804 
806 {
810 }
811 
813 {
814  if (m_ProfilerARB)
816 
817  if (m_ProfilerEXT)
819 
820  if (m_ProfilerINTEL)
822 }
823 
825 {
826  if (m_ProfilerARB)
828 
829  if (m_ProfilerEXT)
831 
832  if (m_ProfilerINTEL)
834 }
835 
836 void CProfiler2GPU::RegionEnter(const char* id)
837 {
838  if (m_ProfilerARB)
840 
841  if (m_ProfilerEXT)
843 
844  if (m_ProfilerINTEL)
846 }
847 
848 void CProfiler2GPU::RegionLeave(const char* id)
849 {
850  if (m_ProfilerARB)
852 
853  if (m_ProfilerEXT)
855 
856  if (m_ProfilerINTEL)
858 }
859 
860 #else // CONFIG2_GLES
861 
863  m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL)
864 {
865 }
866 
868 
869 void CProfiler2GPU::FrameStart() { }
870 void CProfiler2GPU::FrameEnd() { }
871 void CProfiler2GPU::RegionEnter(const char* UNUSED(id)) { }
872 void CProfiler2GPU::RegionLeave(const char* UNUSED(id)) { }
873 
874 #endif
CProfiler2GPU(CProfiler2 &profiler)
std::vector< GLuint > m_FreeQueries
CProfiler2GPU_ARB_timer_query * m_ProfilerARB
Definition: Profiler2GPU.h:47
std::deque< SFrame > m_Frames
void RegionLeave(const char *id)
#define UNUSED(param)
mark a function parameter as unused and avoid the corresponding compiler warning. ...
CProfiler2::ThreadStorage m_Storage
#define INTEL_PERFQUERIES_TYPE_FLOAT
int64_t GLint64
Definition: ogl.h:120
void RegionEnter(const char *id)
void AddThreadStorage(ThreadStorage *storage)
Definition: Profiler2.cpp:257
static ICounter * counter
Definition: whrt.cpp:96
CProfiler2GPU_base(CProfiler2 &profiler, const char *name)
#define INTEL_PERFQUERIES_TYPE_BOOL
#define CFG_GET_VAL(name, type, destination)
Definition: ConfigDB.h:147
NONCOPYABLE(CProfiler2GPU_base)
void RegionLeave(const char *id)
CProfiler2 & m_Profiler
#define GL_TIME_ELAPSED
Definition: ogl.h:108
uint64_t GLuint64
Definition: ogl.h:121
CProfiler2 & m_Profiler
Definition: Profiler2GPU.h:45
void RecordAttributePrintf(const char *fmt,...) PRINTF_ARGS(2)
Definition: Profiler2.h:167
#define ARRAY_SIZE(name)
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
#define PROFILE2(region)
Starts timing from now until the end of the current scope.
Definition: Profiler2.h:446
void RecordSyncMarker(double t)
Definition: Profiler2.h:138
CProfiler2GPU_INTEL_performance_queries(CProfiler2 &profiler)
New profiler (complementing the older CProfileManager)
CProfiler2GPU_ARB_timer_query(CProfiler2 &profiler)
#define GL_TIMESTAMP
Definition: ogl.h:109
double GetTime()
Definition: Profiler2.h:357
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64
std::deque< SFrame > m_Frames
#define SAFE_DELETE(p)
delete memory ensuing from new and set the pointer to zero (thus making double-frees safe / a no-op) ...
CProfiler2GPU_timer_query(CProfiler2 &profiler, const char *name)
void RecordRegion(const char *id, bool isEnter)
void Record(EItem type, double t, const char *id)
Definition: Profiler2.h:150
CProfiler2GPU_INTEL_performance_queries * m_ProfilerINTEL
Definition: Profiler2GPU.h:49
std::vector< SPerfQueryType > m_QueryTypes
void RegionEnter(const char *id)
#define INTEL_PERFQUERIES_NONBLOCK
bool ogl_HaveExtension(const char *ext)
check if an extension is supported by the OpenGL implementation.
Definition: ogl.cpp:187
#define u32
Definition: types.h:41
void RemoveThreadStorage(ThreadStorage *storage)
Definition: Profiler2.cpp:263
void RegionLeave(const char *id)
void RecordRegion(const char *id, bool isEnter)
void ogl_WarnIfError()
raise a warning (break into the debugger) if an OpenGL error is pending.
Definition: ogl.cpp:398
#define debug_warn(expr)
display the error dialog with the given text.
Definition: debug.h:324
void RegionEnter(const char *id)
CProfiler2GPU_EXT_timer_query(CProfiler2 &profiler)
CProfiler2GPU_EXT_timer_query * m_ProfilerEXT
Definition: Profiler2GPU.h:48
int GetFrameNumber()
Definition: Profiler2.h:362
Class instantiated in every registered thread.
Definition: Profiler2.h:131
#define INTEL_PERFQUERIES_BLOCK
void RecordFrameStart(double t)
Definition: Profiler2.h:159