mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2025-01-31 04:20:34 +00:00
- consolidated cycle_t and glcycle_t because aside from glcycle_t's ability to be deactivated when no profiling takes place they did exactly the same thing.
This commit is contained in:
parent
d6fedd36b1
commit
711a88bab3
5 changed files with 25 additions and 171 deletions
|
@ -1,7 +1,6 @@
|
||||||
/*
|
/*
|
||||||
**
|
**
|
||||||
** This is a copy of the regular cycle_t from a time when that was based
|
** Hardware render profiling info
|
||||||
** on QueryPerformanceCounter which is too costly for real-time profiling.
|
|
||||||
**
|
**
|
||||||
**---------------------------------------------------------------------------
|
**---------------------------------------------------------------------------
|
||||||
** Copyright 1998-2016 Randy Heit
|
** Copyright 1998-2016 Randy Heit
|
||||||
|
@ -34,17 +33,6 @@
|
||||||
**
|
**
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#include <windows.h>
|
|
||||||
#include <intrin.h>
|
|
||||||
|
|
||||||
#elif defined __APPLE__
|
|
||||||
#include <sys/sysctl.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
#include "i_system.h"
|
#include "i_system.h"
|
||||||
#include "g_level.h"
|
#include "g_level.h"
|
||||||
#include "c_console.h"
|
#include "c_console.h"
|
||||||
|
@ -68,67 +56,6 @@ int vertexcount, flatvertices, flatprimitives;
|
||||||
int rendered_lines,rendered_flats,rendered_sprites,render_vertexsplit,render_texsplit,rendered_decals, rendered_portals;
|
int rendered_lines,rendered_flats,rendered_sprites,render_vertexsplit,render_texsplit,rendered_decals, rendered_portals;
|
||||||
int iter_dlightf, iter_dlight, draw_dlight, draw_dlightf;
|
int iter_dlightf, iter_dlight, draw_dlight, draw_dlightf;
|
||||||
|
|
||||||
double gl_SecondsPerCycle = 1e-8;
|
|
||||||
double gl_MillisecPerCycle = 1e-5; // 100 MHz
|
|
||||||
|
|
||||||
// For GL timing the performance counter is far too costly so we still need RDTSC
|
|
||||||
// even though it may not be perfect.
|
|
||||||
|
|
||||||
void gl_CalculateCPUSpeed ()
|
|
||||||
{
|
|
||||||
#ifdef _WIN32
|
|
||||||
LARGE_INTEGER freq;
|
|
||||||
|
|
||||||
QueryPerformanceFrequency (&freq);
|
|
||||||
|
|
||||||
if (freq.QuadPart != 0)
|
|
||||||
{
|
|
||||||
LARGE_INTEGER count1, count2;
|
|
||||||
unsigned minDiff;
|
|
||||||
int64_t ClockCalibration = 0;
|
|
||||||
|
|
||||||
// Count cycles for at least 55 milliseconds.
|
|
||||||
// The performance counter is very low resolution compared to CPU
|
|
||||||
// speeds today, so the longer we count, the more accurate our estimate.
|
|
||||||
// On the other hand, we don't want to count too long, because we don't
|
|
||||||
// want the user to notice us spend time here, since most users will
|
|
||||||
// probably never use the performance statistics.
|
|
||||||
minDiff = freq.LowPart * 11 / 200;
|
|
||||||
|
|
||||||
// Minimize the chance of task switching during the testing by going very
|
|
||||||
// high priority. This is another reason to avoid timing for too long.
|
|
||||||
SetPriorityClass (GetCurrentProcess (), REALTIME_PRIORITY_CLASS);
|
|
||||||
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_TIME_CRITICAL);
|
|
||||||
ClockCalibration = __rdtsc();
|
|
||||||
QueryPerformanceCounter (&count1);
|
|
||||||
do
|
|
||||||
{
|
|
||||||
QueryPerformanceCounter (&count2);
|
|
||||||
} while ((uint32_t)((uint64_t)count2.QuadPart - (uint64_t)count1.QuadPart) < minDiff);
|
|
||||||
ClockCalibration = __rdtsc() - ClockCalibration;
|
|
||||||
QueryPerformanceCounter (&count2);
|
|
||||||
SetPriorityClass (GetCurrentProcess (), NORMAL_PRIORITY_CLASS);
|
|
||||||
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_NORMAL);
|
|
||||||
|
|
||||||
double CyclesPerSecond = (double)ClockCalibration *
|
|
||||||
(double)freq.QuadPart /
|
|
||||||
(double)((__int64)count2.QuadPart - (__int64)count1.QuadPart);
|
|
||||||
gl_SecondsPerCycle = 1.0 / CyclesPerSecond;
|
|
||||||
gl_MillisecPerCycle = 1000.0 / CyclesPerSecond;
|
|
||||||
}
|
|
||||||
#elif defined __APPLE__
|
|
||||||
long long frequency;
|
|
||||||
size_t size = sizeof frequency;
|
|
||||||
|
|
||||||
if (0 == sysctlbyname("machdep.tsc.frequency", &frequency, &size, nullptr, 0) && 0 != frequency)
|
|
||||||
{
|
|
||||||
gl_SecondsPerCycle = 1.0 / frequency;
|
|
||||||
gl_MillisecPerCycle = 1000.0 / frequency;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void ResetProfilingData()
|
void ResetProfilingData()
|
||||||
{
|
{
|
||||||
All.Reset();
|
All.Reset();
|
||||||
|
@ -238,7 +165,7 @@ void CheckBench()
|
||||||
AppendRenderTimes(compose);
|
AppendRenderTimes(compose);
|
||||||
AppendLightStats(compose);
|
AppendLightStats(compose);
|
||||||
AppendMissingTextureStats(compose);
|
AppendMissingTextureStats(compose);
|
||||||
compose.AppendFormat("%" PRIu64 " fps\n\n", screen->GetLastFPS());
|
compose.AppendFormat("%llu fps\n\n", screen->GetLastFPS());
|
||||||
|
|
||||||
FILE *f = fopen("benchmarks.txt", "at");
|
FILE *f = fopen("benchmarks.txt", "at");
|
||||||
if (f != NULL)
|
if (f != NULL)
|
||||||
|
@ -269,11 +196,11 @@ CCMD(bench)
|
||||||
C_HideConsole ();
|
C_HideConsole ();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gl_benching = false;
|
bool glcycle_t::active = false;
|
||||||
|
|
||||||
void checkBenchActive()
|
void checkBenchActive()
|
||||||
{
|
{
|
||||||
FStat *stat = FStat::FindStat("rendertimes");
|
FStat *stat = FStat::FindStat("rendertimes");
|
||||||
gl_benching = ((stat != NULL && stat->isActive()) || printstats);
|
glcycle_t::active = ((stat != NULL && stat->isActive()) || printstats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,92 +5,6 @@
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
#include "m_fixed.h"
|
#include "m_fixed.h"
|
||||||
|
|
||||||
extern bool gl_benching;
|
|
||||||
|
|
||||||
extern double gl_SecondsPerCycle;
|
|
||||||
extern double gl_MillisecPerCycle;
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
|
|
||||||
__forceinline int64_t GetClockCycle ()
|
|
||||||
{
|
|
||||||
return __rdtsc();
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
|
|
||||||
|
|
||||||
inline int64_t GetClockCycle()
|
|
||||||
{
|
|
||||||
return __builtin_ia32_rdtsc();
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(__GNUG__) && defined(__i386__)
|
|
||||||
|
|
||||||
inline int64_t GetClockCycle()
|
|
||||||
{
|
|
||||||
if (CPU.bRDTSC)
|
|
||||||
{
|
|
||||||
int64_t res;
|
|
||||||
asm volatile ("rdtsc" : "=A" (res));
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
inline int64_t GetClockCycle ()
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class glcycle_t
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
glcycle_t &operator= (const glcycle_t &o)
|
|
||||||
{
|
|
||||||
Counter = o.Counter;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Reset()
|
|
||||||
{
|
|
||||||
Counter = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void Clock()
|
|
||||||
{
|
|
||||||
// Not using QueryPerformanceCounter directly, so we don't need
|
|
||||||
// to pull in the Windows headers for every single file that
|
|
||||||
// wants to do some profiling.
|
|
||||||
int64_t time = (gl_benching? GetClockCycle() : 0);
|
|
||||||
Counter -= time;
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void Unclock()
|
|
||||||
{
|
|
||||||
int64_t time = (gl_benching? GetClockCycle() : 0);
|
|
||||||
Counter += time;
|
|
||||||
}
|
|
||||||
|
|
||||||
double Time()
|
|
||||||
{
|
|
||||||
return double(Counter) * gl_SecondsPerCycle;
|
|
||||||
}
|
|
||||||
|
|
||||||
double TimeMS()
|
|
||||||
{
|
|
||||||
return double(Counter) * gl_MillisecPerCycle;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
int64_t Counter;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern glcycle_t RenderWall,SetupWall,ClipWall;
|
extern glcycle_t RenderWall,SetupWall,ClipWall;
|
||||||
extern glcycle_t RenderFlat,SetupFlat;
|
extern glcycle_t RenderFlat,SetupFlat;
|
||||||
extern glcycle_t RenderSprite,SetupSprite;
|
extern glcycle_t RenderSprite,SetupSprite;
|
||||||
|
|
|
@ -408,9 +408,6 @@ CocoaVideo::CocoaVideo()
|
||||||
{
|
{
|
||||||
memset(&m_modeIterator, 0, sizeof m_modeIterator);
|
memset(&m_modeIterator, 0, sizeof m_modeIterator);
|
||||||
|
|
||||||
extern void gl_CalculateCPUSpeed();
|
|
||||||
gl_CalculateCPUSpeed();
|
|
||||||
|
|
||||||
// Create OpenGL pixel format
|
// Create OpenGL pixel format
|
||||||
NSOpenGLPixelFormatAttribute defaultProfile = NSOpenGLProfileVersion3_2Core;
|
NSOpenGLPixelFormatAttribute defaultProfile = NSOpenGLProfileVersion3_2Core;
|
||||||
|
|
||||||
|
|
22
src/stats.h
22
src/stats.h
|
@ -117,6 +117,11 @@ inline unsigned __int64 rdtsc()
|
||||||
{
|
{
|
||||||
return __rdtsc();
|
return __rdtsc();
|
||||||
}
|
}
|
||||||
|
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
|
||||||
|
inline unsigned __int64 rdtsc()
|
||||||
|
{
|
||||||
|
return __builtin_ia32_rdtsc();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
inline uint64_t rdtsc()
|
inline uint64_t rdtsc()
|
||||||
{
|
{
|
||||||
|
@ -165,7 +170,7 @@ public:
|
||||||
Counter -= time;
|
Counter -= time;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Unclock()
|
void Unclock(bool checkvar = true)
|
||||||
{
|
{
|
||||||
int64_t time = rdtsc();
|
int64_t time = rdtsc();
|
||||||
Counter += time;
|
Counter += time;
|
||||||
|
@ -192,6 +197,21 @@ private:
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
class glcycle_t : public cycle_t
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static bool active;
|
||||||
|
void Clock()
|
||||||
|
{
|
||||||
|
if (active) cycle_t::Clock();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unclock()
|
||||||
|
{
|
||||||
|
if (active) cycle_t::Unclock();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class FStat
|
class FStat
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -64,7 +64,6 @@ extern "C" {
|
||||||
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
|
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gl_CalculateCPUSpeed();
|
|
||||||
extern int NewWidth, NewHeight, NewBits, DisplayBits;
|
extern int NewWidth, NewHeight, NewBits, DisplayBits;
|
||||||
|
|
||||||
// these get used before GLEW is initialized so we have to use separate pointers with different names
|
// these get used before GLEW is initialized so we have to use separate pointers with different names
|
||||||
|
@ -162,9 +161,6 @@ public:
|
||||||
|
|
||||||
Win32GLVideo::Win32GLVideo(int parm) : m_Modes(NULL), m_IsFullscreen(false)
|
Win32GLVideo::Win32GLVideo(int parm) : m_Modes(NULL), m_IsFullscreen(false)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
gl_CalculateCPUSpeed();
|
|
||||||
#endif
|
|
||||||
I_SetWndProc();
|
I_SetWndProc();
|
||||||
m_DisplayWidth = vid_defwidth;
|
m_DisplayWidth = vid_defwidth;
|
||||||
m_DisplayHeight = vid_defheight;
|
m_DisplayHeight = vid_defheight;
|
||||||
|
|
Loading…
Reference in a new issue