- consolidated cycle_t and glcycle_t because aside from glcycle_t's ability to be deactivated when no profiling takes place they did exactly the same thing.

This commit is contained in:
Christoph Oelckers 2018-04-01 13:24:03 +02:00
parent d6fedd36b1
commit 711a88bab3
5 changed files with 25 additions and 171 deletions

View file

@ -1,7 +1,6 @@
/*
**
** This is a copy of the regular cycle_t from a time when that was based
** on QueryPerformanceCounter which is too costly for real-time profiling.
** Hardware render profiling info
**
**---------------------------------------------------------------------------
** Copyright 1998-2016 Randy Heit
@ -34,17 +33,6 @@
**
*/
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <intrin.h>
#elif defined __APPLE__
#include <sys/sysctl.h>
#endif
#include <inttypes.h>
#include "i_system.h"
#include "g_level.h"
#include "c_console.h"
@ -68,67 +56,6 @@ int vertexcount, flatvertices, flatprimitives;
int rendered_lines,rendered_flats,rendered_sprites,render_vertexsplit,render_texsplit,rendered_decals, rendered_portals;
int iter_dlightf, iter_dlight, draw_dlight, draw_dlightf;
double gl_SecondsPerCycle = 1e-8;
double gl_MillisecPerCycle = 1e-5; // 100 MHz
// For GL timing the performance counter is far too costly so we still need RDTSC
// even though it may not be perfect.
void gl_CalculateCPUSpeed ()
{
#ifdef _WIN32
LARGE_INTEGER freq;
QueryPerformanceFrequency (&freq);
if (freq.QuadPart != 0)
{
LARGE_INTEGER count1, count2;
unsigned minDiff;
int64_t ClockCalibration = 0;
// Count cycles for at least 55 milliseconds.
// The performance counter is very low resolution compared to CPU
// speeds today, so the longer we count, the more accurate our estimate.
// On the other hand, we don't want to count too long, because we don't
// want the user to notice us spend time here, since most users will
// probably never use the performance statistics.
minDiff = freq.LowPart * 11 / 200;
// Minimize the chance of task switching during the testing by going very
// high priority. This is another reason to avoid timing for too long.
SetPriorityClass (GetCurrentProcess (), REALTIME_PRIORITY_CLASS);
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_TIME_CRITICAL);
ClockCalibration = __rdtsc();
QueryPerformanceCounter (&count1);
do
{
QueryPerformanceCounter (&count2);
} while ((uint32_t)((uint64_t)count2.QuadPart - (uint64_t)count1.QuadPart) < minDiff);
ClockCalibration = __rdtsc() - ClockCalibration;
QueryPerformanceCounter (&count2);
SetPriorityClass (GetCurrentProcess (), NORMAL_PRIORITY_CLASS);
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_NORMAL);
double CyclesPerSecond = (double)ClockCalibration *
(double)freq.QuadPart /
(double)((__int64)count2.QuadPart - (__int64)count1.QuadPart);
gl_SecondsPerCycle = 1.0 / CyclesPerSecond;
gl_MillisecPerCycle = 1000.0 / CyclesPerSecond;
}
#elif defined __APPLE__
long long frequency;
size_t size = sizeof frequency;
if (0 == sysctlbyname("machdep.tsc.frequency", &frequency, &size, nullptr, 0) && 0 != frequency)
{
gl_SecondsPerCycle = 1.0 / frequency;
gl_MillisecPerCycle = 1000.0 / frequency;
}
#endif
}
void ResetProfilingData()
{
All.Reset();
@ -238,7 +165,7 @@ void CheckBench()
AppendRenderTimes(compose);
AppendLightStats(compose);
AppendMissingTextureStats(compose);
compose.AppendFormat("%" PRIu64 " fps\n\n", screen->GetLastFPS());
compose.AppendFormat("%llu fps\n\n", screen->GetLastFPS());
FILE *f = fopen("benchmarks.txt", "at");
if (f != NULL)
@ -269,11 +196,11 @@ CCMD(bench)
C_HideConsole ();
}
bool gl_benching = false;
bool glcycle_t::active = false;
void checkBenchActive()
{
FStat *stat = FStat::FindStat("rendertimes");
gl_benching = ((stat != NULL && stat->isActive()) || printstats);
glcycle_t::active = ((stat != NULL && stat->isActive()) || printstats);
}

View file

@ -5,92 +5,6 @@
#include "x86.h"
#include "m_fixed.h"
extern bool gl_benching;
extern double gl_SecondsPerCycle;
extern double gl_MillisecPerCycle;
#ifdef _MSC_VER
__forceinline int64_t GetClockCycle ()
{
return __rdtsc();
}
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
inline int64_t GetClockCycle()
{
return __builtin_ia32_rdtsc();
}
#elif defined(__GNUG__) && defined(__i386__)
inline int64_t GetClockCycle()
{
if (CPU.bRDTSC)
{
int64_t res;
asm volatile ("rdtsc" : "=A" (res));
return res;
}
else
{
return 0;
}
}
#else
inline int64_t GetClockCycle ()
{
return 0;
}
#endif
class glcycle_t
{
public:
glcycle_t &operator= (const glcycle_t &o)
{
Counter = o.Counter;
return *this;
}
void Reset()
{
Counter = 0;
}
__forceinline void Clock()
{
// Not using QueryPerformanceCounter directly, so we don't need
// to pull in the Windows headers for every single file that
// wants to do some profiling.
int64_t time = (gl_benching? GetClockCycle() : 0);
Counter -= time;
}
__forceinline void Unclock()
{
int64_t time = (gl_benching? GetClockCycle() : 0);
Counter += time;
}
double Time()
{
return double(Counter) * gl_SecondsPerCycle;
}
double TimeMS()
{
return double(Counter) * gl_MillisecPerCycle;
}
private:
int64_t Counter;
};
extern glcycle_t RenderWall,SetupWall,ClipWall;
extern glcycle_t RenderFlat,SetupFlat;
extern glcycle_t RenderSprite,SetupSprite;

View file

@ -408,9 +408,6 @@ CocoaVideo::CocoaVideo()
{
memset(&m_modeIterator, 0, sizeof m_modeIterator);
extern void gl_CalculateCPUSpeed();
gl_CalculateCPUSpeed();
// Create OpenGL pixel format
NSOpenGLPixelFormatAttribute defaultProfile = NSOpenGLProfileVersion3_2Core;

View file

@ -117,6 +117,11 @@ inline unsigned __int64 rdtsc()
{
return __rdtsc();
}
#elif defined __APPLE__ && (defined __i386__ || defined __x86_64__)
inline unsigned __int64 rdtsc()
{
return __builtin_ia32_rdtsc();
}
#else
inline uint64_t rdtsc()
{
@ -165,7 +170,7 @@ public:
Counter -= time;
}
void Unclock()
void Unclock(bool checkvar = true)
{
int64_t time = rdtsc();
Counter += time;
@ -192,6 +197,21 @@ private:
#endif
class glcycle_t : public cycle_t
{
public:
static bool active;
void Clock()
{
if (active) cycle_t::Clock();
}
void Unclock()
{
if (active) cycle_t::Unclock();
}
};
class FStat
{
public:

View file

@ -64,7 +64,6 @@ extern "C" {
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
}
void gl_CalculateCPUSpeed();
extern int NewWidth, NewHeight, NewBits, DisplayBits;
// these get used before GLEW is initialized so we have to use separate pointers with different names
@ -162,9 +161,6 @@ public:
Win32GLVideo::Win32GLVideo(int parm) : m_Modes(NULL), m_IsFullscreen(false)
{
#ifdef _WIN32
gl_CalculateCPUSpeed();
#endif
I_SetWndProc();
m_DisplayWidth = vid_defwidth;
m_DisplayHeight = vid_defheight;