- Went back to using RDTSC for timing on Win32. Ironically,

QueryPerformanceCounter() is obviously using the TSC for its timing on my
  machine, yet the overhead it has to do to keep the timer sane is apparently
  noticeable on a few maps. I suppose I should at some time check
  clock_gettime() and see if it has similar issues on Linux.


SVN r1460 (trunk)
This commit is contained in:
Randy Heit 2009-03-04 06:06:31 +00:00
parent 3b36334704
commit 5eeaa130fd
4 changed files with 109 additions and 27 deletions

View File

@ -1,4 +1,11 @@
March 3, 2009 (Changes by Graf Zahl) March 4, 2009
- Went back to using RDTSC for timing on Win32. Ironically,
QueryPerformanceCounter() is obviously using the TSC for its timing on my
machine, yet the overhead it has to do to keep the timer sane is apparently
visible on a few maps. I suppose I should at some time check clock_gettime()
and see if it has similar issues on Linux.
March 3, 2009 (Changes by Graf Zahl)
- changed: If a monster with the BOSSDEATH flag is crushed A_BossDeath will - changed: If a monster with the BOSSDEATH flag is crushed A_BossDeath will
be called now. be called now.
- fixed: D'Sparil's second form was missing the BOSSDEATH flag. - fixed: D'Sparil's second form was missing the BOSSDEATH flag.

View File

@ -103,8 +103,40 @@ private:
#else #else
// Windows // Windows
#include "x86.h"
extern double PerfToSec, PerfToMillisec; extern double PerfToSec, PerfToMillisec;
long long QueryPerfCounter();
#ifdef _MSC_VER
// Trying to include intrin.h here results in some bizarre errors, so I'm just
// going to duplicate the function prototype instead.
//#include <intrin.h>
extern "C" unsigned __int64 __rdtsc(void);
#pragma intrinsic(__rdtsc)
inline unsigned __int64 rdtsc()
{
#ifndef _M_X64
if (CPU.bRDTSC)
#endif
{
return __rdtsc();
}
return 0;
}
#else
inline volatile unsigned long long rdtsc()
{
#ifndef __amd64__
if (CPU.bRDTSC)
#endif
{
register unsigned long long tsc asm("eax");
asm volatile ("\trdtsc\n" : : : "eax, "edx");
return tsc;
}
return 0;
}
#endif
class cycle_t class cycle_t
{ {
@ -122,16 +154,13 @@ public:
void Clock() void Clock()
{ {
// Not using QueryPerformanceCounter directly, so we don't need long long time = rdtsc();
// to pull in the Windows headers for every single file that
// wants to do some profiling.
long long time = QueryPerfCounter();
Counter -= time; Counter -= time;
} }
void Unclock() void Unclock()
{ {
long long time = QueryPerfCounter(); long long time = rdtsc();
Counter += time; Counter += time;
} }
@ -145,6 +174,11 @@ public:
return Counter * PerfToMillisec; return Counter * PerfToMillisec;
} }
long long GetRawCounter()
{
return Counter;
}
private: private:
long long Counter; long long Counter;
}; };

View File

@ -372,21 +372,62 @@ void SetLanguageIDs ()
} }
} }
void CalculateCPUSpeed()
{
LARGE_INTEGER freq;
QueryPerformanceFrequency (&freq);
if (freq.QuadPart != 0 && CPU.bRDTSC)
{
LARGE_INTEGER count1, count2;
cycle_t ClockCalibration;
DWORD min_diff;
ClockCalibration.Reset();
// Count cycles for at least 55 milliseconds.
// The performance counter may be very low resolution compared to CPU
// speeds today, so the longer we count, the more accurate our estimate.
// On the other hand, we don't want to count too long, because we don't
// want the user to notice us spend time here, since most users will
// probably never use the performance statistics.
min_diff = freq.LowPart * 11 / 200;
// Minimize the chance of task switching during the testing by going very
// high priority. This is another reason to avoid timing for too long.
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
// Make sure we start timing on a counter boundary.
QueryPerformanceCounter(&count1);
do { QueryPerformanceCounter(&count2); } while (count1.QuadPart == count2.QuadPart);
// Do the timing loop.
ClockCalibration.Clock();
do { QueryPerformanceCounter(&count1); } while ((count1.QuadPart - count2.QuadPart) < min_diff);
ClockCalibration.Unclock();
SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);
PerfToSec = double(count1.QuadPart - count2.QuadPart) / (double(ClockCalibration.GetRawCounter()) * freq.QuadPart);
PerfToMillisec = PerfToSec * 1000.0;
}
Printf ("CPU Speed: %.0f MHz\n", 0.001 / PerfToMillisec);
}
// //
// I_Init // I_Init
// //
void I_Init (void) void I_Init (void)
{ {
LARGE_INTEGER perf_freq;
CheckCPUID(&CPU); CheckCPUID(&CPU);
CalculateCPUSpeed();
DumpCPUInfo(&CPU); DumpCPUInfo(&CPU);
QueryPerformanceFrequency(&perf_freq);
PerfToSec = 1 / double(perf_freq.QuadPart);
PerfToMillisec = 1000 / double(perf_freq.QuadPart);
// Use a timer event if possible // Use a timer event if possible
NewTicArrived = CreateEvent (NULL, FALSE, FALSE, NULL); NewTicArrived = CreateEvent (NULL, FALSE, FALSE, NULL);
if (NewTicArrived) if (NewTicArrived)

View File

@ -1,6 +1,6 @@
#ifndef X86_H #ifndef X86_H
#define X86_H #define X86_H
struct CPUInfo // 92 bytes struct CPUInfo // 92 bytes
{ {
char VendorID[16]; char VendorID[16];
@ -17,11 +17,11 @@ struct CPUInfo // 92 bytes
BYTE APICID; BYTE APICID;
DWORD bSSE3:1; DWORD bSSE3:1;
DWORD DontCare1:8; DWORD DontCare1:8;
DWORD bSSSE3:1; DWORD bSSSE3:1;
DWORD DontCare1a:9; DWORD DontCare1a:9;
DWORD bSSE41:1; DWORD bSSE41:1;
DWORD bSSE42:1; DWORD bSSE42:1;
DWORD DontCare2a:11; DWORD DontCare2a:11;
DWORD bFPU:1; DWORD bFPU:1;
@ -77,10 +77,10 @@ struct CPUInfo // 92 bytes
extern "C" CPUInfo CPU; extern "C" CPUInfo CPU;
void CheckCPUID (CPUInfo *cpu); void CheckCPUID (CPUInfo *cpu);
void DumpCPUInfo (const CPUInfo *cpu); void DumpCPUInfo (const CPUInfo *cpu);
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
#endif #endif