mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-18 15:11:46 +00:00
- consolidated cycle_t and glcycle_t because aside from glcycle_t's ability to be deactivated when no profiling takes place they did exactly the same thing.
This commit is contained in:
parent
d6fedd36b1
commit
711a88bab3
5 changed files with 25 additions and 171 deletions
|
@ -1,7 +1,6 @@
|
|||
/*
|
||||
**
|
||||
** This is a copy of the regular cycle_t from a time when that was based
|
||||
** on QueryPerformanceCounter which is too costly for real-time profiling.
|
||||
** Hardware render profiling info
|
||||
**
|
||||
**---------------------------------------------------------------------------
|
||||
** Copyright 1998-2016 Randy Heit
|
||||
|
@ -34,17 +33,6 @@
|
|||
**
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#include <intrin.h>
|
||||
|
||||
#elif defined __APPLE__
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "i_system.h"
|
||||
#include "g_level.h"
|
||||
#include "c_console.h"
|
||||
|
@ -68,67 +56,6 @@ int vertexcount, flatvertices, flatprimitives;
|
|||
int rendered_lines,rendered_flats,rendered_sprites,render_vertexsplit,render_texsplit,rendered_decals, rendered_portals;
|
||||
int iter_dlightf, iter_dlight, draw_dlight, draw_dlightf;
|
||||
|
||||
double gl_SecondsPerCycle = 1e-8;
|
||||
double gl_MillisecPerCycle = 1e-5; // 100 MHz
|
||||
|
||||
// For GL timing the performance counter is far too costly so we still need RDTSC
|
||||
// even though it may not be perfect.
|
||||
|
||||
void gl_CalculateCPUSpeed ()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER freq;
|
||||
|
||||
QueryPerformanceFrequency (&freq);
|
||||
|
||||
if (freq.QuadPart != 0)
|
||||
{
|
||||
LARGE_INTEGER count1, count2;
|
||||
unsigned minDiff;
|
||||
int64_t ClockCalibration = 0;
|
||||
|
||||
// Count cycles for at least 55 milliseconds.
|
||||
// The performance counter is very low resolution compared to CPU
|
||||
// speeds today, so the longer we count, the more accurate our estimate.
|
||||
// On the other hand, we don't want to count too long, because we don't
|
||||
// want the user to notice us spend time here, since most users will
|
||||
// probably never use the performance statistics.
|
||||
minDiff = freq.LowPart * 11 / 200;
|
||||
|
||||
// Minimize the chance of task switching during the testing by going very
|
||||
// high priority. This is another reason to avoid timing for too long.
|
||||
SetPriorityClass (GetCurrentProcess (), REALTIME_PRIORITY_CLASS);
|
||||
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_TIME_CRITICAL);
|
||||
ClockCalibration = __rdtsc();
|
||||
QueryPerformanceCounter (&count1);
|
||||
do
|
||||
{
|
||||
QueryPerformanceCounter (&count2);
|
||||
} while ((uint32_t)((uint64_t)count2.QuadPart - (uint64_t)count1.QuadPart) < minDiff);
|
||||
ClockCalibration = __rdtsc() - ClockCalibration;
|
||||
QueryPerformanceCounter (&count2);
|
||||
SetPriorityClass (GetCurrentProcess (), NORMAL_PRIORITY_CLASS);
|
||||
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_NORMAL);
|
||||
|
||||
double CyclesPerSecond = (double)ClockCalibration *
|
||||
(double)freq.QuadPart /
|
||||
(double)((__int64)count2.QuadPart - (__int64)count1.QuadPart);
|
||||
gl_SecondsPerCycle = 1.0 / CyclesPerSecond;
|
||||
gl_MillisecPerCycle = 1000.0 / CyclesPerSecond;
|
||||
}
|
||||
#elif defined __APPLE__
|
||||
long long frequency;
|
||||
size_t size = sizeof frequency;
|
||||
|
||||
if (0 == sysctlbyname("machdep.tsc.frequency", &frequency, &size, nullptr, 0) && 0 != frequency)
|
||||
{
|
||||
gl_SecondsPerCycle = 1.0 / frequency;
|
||||
gl_MillisecPerCycle = 1000.0 / frequency;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void ResetProfilingData()
|
||||
{
|
||||
All.Reset();
|
||||
|
@ -238,7 +165,7 @@ void CheckBench()
|
|||
AppendRenderTimes(compose);
|
||||
AppendLightStats(compose);
|
||||
AppendMissingTextureStats(compose);
|
||||
compose.AppendFormat("%" PRIu64 " fps\n\n", screen->GetLastFPS());
|
||||
compose.AppendFormat("%llu fps\n\n", screen->GetLastFPS());
|
||||
|
||||
FILE *f = fopen("benchmarks.txt", "at");
|
||||
if (f != NULL)
|
||||
|
@ -269,11 +196,11 @@ CCMD(bench)
|
|||
C_HideConsole ();
|
||||
}
|
||||
|
||||
bool gl_benching = false;
|
||||
bool glcycle_t::active = false;
|
||||
|
||||
void checkBenchActive()
|
||||
{
|
||||
FStat *stat = FStat::FindStat("rendertimes");
|
||||
gl_benching = ((stat != NULL && stat->isActive()) || printstats);
|
||||
glcycle_t::active = ((stat != NULL && stat->isActive()) || printstats);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,92 +5,6 @@
|
|||
#include "x86.h"
|
||||
#include "m_fixed.h"
|
||||
|
||||
extern bool gl_benching;
|
||||
|
||||
extern double gl_SecondsPerCycle;
|
||||
extern double gl_MillisecPerCycle;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
__forceinline int64_t GetClockCycle ()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
|
||||
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
|
||||
|
||||
inline int64_t GetClockCycle()
|
||||
{
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
|
||||
#elif defined(__GNUG__) && defined(__i386__)
|
||||
|
||||
inline int64_t GetClockCycle()
|
||||
{
|
||||
if (CPU.bRDTSC)
|
||||
{
|
||||
int64_t res;
|
||||
asm volatile ("rdtsc" : "=A" (res));
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline int64_t GetClockCycle ()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
class glcycle_t
|
||||
{
|
||||
public:
|
||||
glcycle_t &operator= (const glcycle_t &o)
|
||||
{
|
||||
Counter = o.Counter;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
Counter = 0;
|
||||
}
|
||||
|
||||
__forceinline void Clock()
|
||||
{
|
||||
// Not using QueryPerformanceCounter directly, so we don't need
|
||||
// to pull in the Windows headers for every single file that
|
||||
// wants to do some profiling.
|
||||
int64_t time = (gl_benching? GetClockCycle() : 0);
|
||||
Counter -= time;
|
||||
}
|
||||
|
||||
__forceinline void Unclock()
|
||||
{
|
||||
int64_t time = (gl_benching? GetClockCycle() : 0);
|
||||
Counter += time;
|
||||
}
|
||||
|
||||
double Time()
|
||||
{
|
||||
return double(Counter) * gl_SecondsPerCycle;
|
||||
}
|
||||
|
||||
double TimeMS()
|
||||
{
|
||||
return double(Counter) * gl_MillisecPerCycle;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t Counter;
|
||||
};
|
||||
|
||||
extern glcycle_t RenderWall,SetupWall,ClipWall;
|
||||
extern glcycle_t RenderFlat,SetupFlat;
|
||||
extern glcycle_t RenderSprite,SetupSprite;
|
||||
|
|
|
@ -408,9 +408,6 @@ CocoaVideo::CocoaVideo()
|
|||
{
|
||||
memset(&m_modeIterator, 0, sizeof m_modeIterator);
|
||||
|
||||
extern void gl_CalculateCPUSpeed();
|
||||
gl_CalculateCPUSpeed();
|
||||
|
||||
// Create OpenGL pixel format
|
||||
NSOpenGLPixelFormatAttribute defaultProfile = NSOpenGLProfileVersion3_2Core;
|
||||
|
||||
|
|
22
src/stats.h
22
src/stats.h
|
@ -117,6 +117,11 @@ inline unsigned __int64 rdtsc()
|
|||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
|
||||
inline unsigned __int64 rdtsc()
|
||||
{
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
#else
|
||||
inline uint64_t rdtsc()
|
||||
{
|
||||
|
@ -165,7 +170,7 @@ public:
|
|||
Counter -= time;
|
||||
}
|
||||
|
||||
void Unclock()
|
||||
void Unclock(bool checkvar = true)
|
||||
{
|
||||
int64_t time = rdtsc();
|
||||
Counter += time;
|
||||
|
@ -192,6 +197,21 @@ private:
|
|||
|
||||
#endif
|
||||
|
||||
class glcycle_t : public cycle_t
|
||||
{
|
||||
public:
|
||||
static bool active;
|
||||
void Clock()
|
||||
{
|
||||
if (active) cycle_t::Clock();
|
||||
}
|
||||
|
||||
void Unclock()
|
||||
{
|
||||
if (active) cycle_t::Unclock();
|
||||
}
|
||||
};
|
||||
|
||||
class FStat
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -64,7 +64,6 @@ extern "C" {
|
|||
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
|
||||
}
|
||||
|
||||
void gl_CalculateCPUSpeed();
|
||||
extern int NewWidth, NewHeight, NewBits, DisplayBits;
|
||||
|
||||
// these get used before GLEW is initialized so we have to use separate pointers with different names
|
||||
|
@ -162,9 +161,6 @@ public:
|
|||
|
||||
Win32GLVideo::Win32GLVideo(int parm) : m_Modes(NULL), m_IsFullscreen(false)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
gl_CalculateCPUSpeed();
|
||||
#endif
|
||||
I_SetWndProc();
|
||||
m_DisplayWidth = vid_defwidth;
|
||||
m_DisplayHeight = vid_defheight;
|
||||
|
|
Loading…
Reference in a new issue