mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-25 13:31:37 +00:00
- Went back to using RDTSC for timing on Win32. Ironically,
QueryPerformanceCounter() is obviously using the TSC for its timing on my machine, yet the overhead it has to do to keep the timer sane is apparently noticeable on a few maps. I suppose I should at some time check clock_gettime() and see if it has similar issues on Linux. SVN r1460 (trunk)
This commit is contained in:
parent
3b36334704
commit
5eeaa130fd
4 changed files with 109 additions and 27 deletions
|
@ -1,4 +1,11 @@
|
|||
March 3, 2009 (Changes by Graf Zahl)
|
||||
March 4, 2009
|
||||
- Went back to using RDTSC for timing on Win32. Ironically,
|
||||
QueryPerformanceCounter() is obviously using the TSC for its timing on my
|
||||
machine, yet the overhead it has to do to keep the timer sane is apparently
|
||||
visible on a few maps. I suppose I should at some time check clock_gettime()
|
||||
and see if it has similar issues on Linux.
|
||||
|
||||
March 3, 2009 (Changes by Graf Zahl)
|
||||
- changed: If a monster with the BOSSDEATH flag is crushed A_BossDeath will
|
||||
be called now.
|
||||
- fixed: D'Sparil's second form was missing the BOSSDEATH flag.
|
||||
|
|
46
src/stats.h
46
src/stats.h
|
@ -103,8 +103,40 @@ private:
|
|||
#else
|
||||
|
||||
// Windows
|
||||
#include "x86.h"
|
||||
|
||||
extern double PerfToSec, PerfToMillisec;
|
||||
long long QueryPerfCounter();
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// Trying to include intrin.h here results in some bizarre errors, so I'm just
|
||||
// going to duplicate the function prototype instead.
|
||||
//#include <intrin.h>
|
||||
extern "C" unsigned __int64 __rdtsc(void);
|
||||
#pragma intrinsic(__rdtsc)
|
||||
inline unsigned __int64 rdtsc()
|
||||
{
|
||||
#ifndef _M_X64
|
||||
if (CPU.bRDTSC)
|
||||
#endif
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
inline volatile unsigned long long rdtsc()
|
||||
{
|
||||
#ifndef __amd64__
|
||||
if (CPU.bRDTSC)
|
||||
#endif
|
||||
{
|
||||
register unsigned long long tsc asm("eax");
|
||||
asm volatile ("\trdtsc\n" : : : "eax, "edx");
|
||||
return tsc;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
class cycle_t
|
||||
{
|
||||
|
@ -122,16 +154,13 @@ public:
|
|||
|
||||
void Clock()
|
||||
{
|
||||
// Not using QueryPerformanceCounter directly, so we don't need
|
||||
// to pull in the Windows headers for every single file that
|
||||
// wants to do some profiling.
|
||||
long long time = QueryPerfCounter();
|
||||
long long time = rdtsc();
|
||||
Counter -= time;
|
||||
}
|
||||
|
||||
void Unclock()
|
||||
{
|
||||
long long time = QueryPerfCounter();
|
||||
long long time = rdtsc();
|
||||
Counter += time;
|
||||
}
|
||||
|
||||
|
@ -145,6 +174,11 @@ public:
|
|||
return Counter * PerfToMillisec;
|
||||
}
|
||||
|
||||
long long GetRawCounter()
|
||||
{
|
||||
return Counter;
|
||||
}
|
||||
|
||||
private:
|
||||
long long Counter;
|
||||
};
|
||||
|
|
|
@ -372,21 +372,62 @@ void SetLanguageIDs ()
|
|||
}
|
||||
}
|
||||
|
||||
void CalculateCPUSpeed()
|
||||
{
|
||||
LARGE_INTEGER freq;
|
||||
|
||||
QueryPerformanceFrequency (&freq);
|
||||
|
||||
if (freq.QuadPart != 0 && CPU.bRDTSC)
|
||||
{
|
||||
LARGE_INTEGER count1, count2;
|
||||
cycle_t ClockCalibration;
|
||||
DWORD min_diff;
|
||||
|
||||
ClockCalibration.Reset();
|
||||
|
||||
// Count cycles for at least 55 milliseconds.
|
||||
// The performance counter may be very low resolution compared to CPU
|
||||
// speeds today, so the longer we count, the more accurate our estimate.
|
||||
// On the other hand, we don't want to count too long, because we don't
|
||||
// want the user to notice us spend time here, since most users will
|
||||
// probably never use the performance statistics.
|
||||
min_diff = freq.LowPart * 11 / 200;
|
||||
|
||||
// Minimize the chance of task switching during the testing by going very
|
||||
// high priority. This is another reason to avoid timing for too long.
|
||||
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
|
||||
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
|
||||
|
||||
// Make sure we start timing on a counter boundary.
|
||||
QueryPerformanceCounter(&count1);
|
||||
do { QueryPerformanceCounter(&count2); } while (count1.QuadPart == count2.QuadPart);
|
||||
|
||||
// Do the timing loop.
|
||||
ClockCalibration.Clock();
|
||||
do { QueryPerformanceCounter(&count1); } while ((count1.QuadPart - count2.QuadPart) < min_diff);
|
||||
ClockCalibration.Unclock();
|
||||
|
||||
SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
|
||||
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);
|
||||
|
||||
PerfToSec = double(count1.QuadPart - count2.QuadPart) / (double(ClockCalibration.GetRawCounter()) * freq.QuadPart);
|
||||
PerfToMillisec = PerfToSec * 1000.0;
|
||||
}
|
||||
|
||||
Printf ("CPU Speed: %.0f MHz\n", 0.001 / PerfToMillisec);
|
||||
}
|
||||
|
||||
//
|
||||
// I_Init
|
||||
//
|
||||
|
||||
void I_Init (void)
|
||||
{
|
||||
LARGE_INTEGER perf_freq;
|
||||
|
||||
CheckCPUID(&CPU);
|
||||
CalculateCPUSpeed();
|
||||
DumpCPUInfo(&CPU);
|
||||
|
||||
QueryPerformanceFrequency(&perf_freq);
|
||||
PerfToSec = 1 / double(perf_freq.QuadPart);
|
||||
PerfToMillisec = 1000 / double(perf_freq.QuadPart);
|
||||
|
||||
// Use a timer event if possible
|
||||
NewTicArrived = CreateEvent (NULL, FALSE, FALSE, NULL);
|
||||
if (NewTicArrived)
|
||||
|
|
28
src/x86.h
28
src/x86.h
|
@ -1,6 +1,6 @@
|
|||
#ifndef X86_H
|
||||
#define X86_H
|
||||
|
||||
#ifndef X86_H
|
||||
#define X86_H
|
||||
|
||||
struct CPUInfo // 92 bytes
|
||||
{
|
||||
char VendorID[16];
|
||||
|
@ -17,11 +17,11 @@ struct CPUInfo // 92 bytes
|
|||
BYTE APICID;
|
||||
|
||||
DWORD bSSE3:1;
|
||||
DWORD DontCare1:8;
|
||||
DWORD bSSSE3:1;
|
||||
DWORD DontCare1a:9;
|
||||
DWORD bSSE41:1;
|
||||
DWORD bSSE42:1;
|
||||
DWORD DontCare1:8;
|
||||
DWORD bSSSE3:1;
|
||||
DWORD DontCare1a:9;
|
||||
DWORD bSSE41:1;
|
||||
DWORD bSSE42:1;
|
||||
DWORD DontCare2a:11;
|
||||
|
||||
DWORD bFPU:1;
|
||||
|
@ -77,10 +77,10 @@ struct CPUInfo // 92 bytes
|
|||
|
||||
|
||||
extern "C" CPUInfo CPU;
|
||||
|
||||
void CheckCPUID (CPUInfo *cpu);
|
||||
void DumpCPUInfo (const CPUInfo *cpu);
|
||||
|
||||
void CheckCPUID (CPUInfo *cpu);
|
||||
void DumpCPUInfo (const CPUInfo *cpu);
|
||||
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in a new issue