diff --git a/docs/rh-log.txt b/docs/rh-log.txt index dddba0dbdb..dd663c4b91 100644 --- a/docs/rh-log.txt +++ b/docs/rh-log.txt @@ -1,8 +1,9 @@ -August 10, 2008 -- Changed Linux to use clock_gettime for profiling instead of rdtsc. This - avoids potential erroneous results on multicore and variable speed - processors. - +August 10, 2008 +- Changed Windows to use the performance counter instead of rdtsc. +- Changed Linux to use clock_gettime for profiling instead of rdtsc. This + avoids potential erroneous results on multicore and variable speed + processors. + August 9, 2008 (Changes by Graf Zahl) - Converted the last of Hexen's inventory items to DECORATE so that I could export AInventory. diff --git a/src/d_main.cpp b/src/d_main.cpp index e56b761ad9..21e42ca659 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -479,8 +479,8 @@ void D_Display () if (nodrawers) return; // for comparative timing / profiling - cycle_t cycles; - + cycle_t cycles; + cycles.Reset(); cycles.Clock(); @@ -2653,7 +2653,7 @@ static double bestwallcycles = HUGE_VAL; ADD_STAT (wallcycles) { - FString out; + FString out; double cycles = WallCycles.Time(); if (cycles && cycles < bestwallcycles) bestwallcycles = cycles; @@ -2680,7 +2680,7 @@ static double bestscancycles = HUGE_VAL; ADD_STAT (scancycles) { - FString out; + FString out; double scancycles = WallScanCycles.Time(); if (scancycles && scancycles < bestscancycles) bestscancycles = scancycles; diff --git a/src/dobject.cpp b/src/dobject.cpp index c3644d8a98..fbaf60c6b1 100644 --- a/src/dobject.cpp +++ b/src/dobject.cpp @@ -525,4 +525,4 @@ void DObject::CheckIfSerialized () const StaticType()->TypeName.GetChars()); } } - + diff --git a/src/dthinker.cpp b/src/dthinker.cpp index 3deaf5da1d..ecac8933ff 100644 --- a/src/dthinker.cpp +++ b/src/dthinker.cpp @@ -42,7 +42,7 @@ static cycle_t ThinkCycles; extern cycle_t BotSupportCycles; -extern cycle_t BotWTG; +extern int BotWTG; IMPLEMENT_CLASS (DThinker) @@ -406,9 +406,9 @@ void DThinker::RunThinkers () { int i, count; - ThinkCycles.Reset(); - BotSupportCycles.Reset(); - BotWTG.Reset(); + ThinkCycles.Reset(); + BotSupportCycles.Reset(); + BotWTG = 0; ThinkCycles.Clock(); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7a5707147a..88a081fddd 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -41,6 +41,7 @@ #include "gi.h" #include "stats.h" +#include "x86.h" #undef RANGECHECK diff --git a/src/stats.h b/src/stats.h index bec6ccc03f..179b6a996c 100644 --- a/src/stats.h +++ b/src/stats.h @@ -35,74 +35,120 @@ #define __STATS_H__ #include "zstring.h" - -#ifdef unix - -#ifdef NO_CLOCK_GETTIME -class cycle_t -{ -public: - cycle_t &operator= (const cycle_t &o) { return *this; } - void Reset() {} - void Clock() {} - void Unclock() {} - double Time() { return 0; } - double TimeMS() { return 0; } -}; - -#else - -#include - -class cycle_t -{ -public: - cycle_t &operator= (const cycle_t &o) - { - Sec = o.Sec; - return *this; - } - - void Reset() - { - Sec = 0; - } - - void Clock() - { - timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - Sec -= ts.tv_sec + ts.tv_nsec * 1e-9; - } - - void Unclock() - { - timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - Sec += ts.tv_sec + ts.tv_nsec * 1e-9; - } - - double Time() - { - return Sec; - } - - double TimeMS() - { - return Sec * 1e3; - } - -private: - double Sec; -}; - -#endif - -#else - -// Windows + +#ifdef unix + +#ifdef NO_CLOCK_GETTIME +class cycle_t +{ +public: + cycle_t &operator= (const cycle_t &o) { return *this; } + void Reset() {} + void Clock() {} + void Unclock() {} + double Time() { return 0; } + double TimeMS() { return 0; } +}; + +#else + +#include + +class cycle_t +{ +public: + cycle_t &operator= (const cycle_t &o) + { + Sec = o.Sec; + return *this; + } + + void Reset() + { + Sec = 0; + } + + void Clock() + { + timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + Sec -= ts.tv_sec + ts.tv_nsec * 1e-9; + } + + void Unclock() + { + timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + Sec += ts.tv_sec + ts.tv_nsec * 1e-9; + } + + double Time() + { + return Sec; + } + + double TimeMS() + { + return Sec * 1e3; + } + +private: + double Sec; +}; + +#endif + +#else + +// Windows +extern double PerfToSec, PerfToMillisec; +long long QueryPerfCounter(); + +class cycle_t +{ +public: + cycle_t &operator= (const cycle_t &o) + { + Counter = o.Counter; + return *this; + } + + void Reset() + { + Counter = 0; + } + + void Clock() + { + // Not using QueryPerformanceCounter directly, so we don't need + // to pull in the Windows headers for every single file that + // wants to do some profiling. + long long time = QueryPerfCounter(); + Counter -= time; + } + + void Unclock() + { + long long time = QueryPerfCounter(); + Counter += time; + } + + double Time() + { + return Counter * PerfToSec; + } + + double TimeMS() + { + return Counter * PerfToMillisec; + } + +private: + long long Counter; +}; + #endif class FStat diff --git a/src/v_palette.cpp b/src/v_palette.cpp index b826e3a9f3..c2889bf2e8 100644 --- a/src/v_palette.cpp +++ b/src/v_palette.cpp @@ -54,6 +54,7 @@ #include "g_level.h" #include "st_stuff.h" #include "gi.h" +#include "x86.h" extern "C" { FDynamicColormap NormalLight; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index a92de0f021..77ff0ead53 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -866,8 +866,8 @@ void D3DFB::Update () UploadPalette(); } - BlitCycles = 0; - clock (BlitCycles); + BlitCycles.Reset(); + BlitCycles.Clock(); LockCount = 0; HRESULT hr = D3DDevice->TestCooperativeLevel(); @@ -885,7 +885,7 @@ void D3DFB::Update () InScene = false; } - unclock (BlitCycles); + BlitCycles.Unclock(); //LOG1 ("cycles = %d\n", BlitCycles); Buffer = NULL; diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index d4438ba4b5..2880005824 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -1095,8 +1095,8 @@ void DDrawFB::Update () } } - BlitCycles = 0; - clock (BlitCycles); + BlitCycles.Reset(); + BlitCycles.Clock(); if (BufferingNow) { @@ -1163,8 +1163,8 @@ void DDrawFB::Update () } } - unclock (BlitCycles); - LOG1 ("cycles = %llu\n", BlitCycles); + BlitCycles.Unclock(); + LOG1 ("cycles = %.1 ms\n", BlitCycles.TimeMS()); Buffer = NULL; LockCount = 0; @@ -1299,9 +1299,6 @@ void DDrawFB::Blank () ADD_STAT (blit) { FString out; - out.Format ( - "blit=%04.1f ms", - (double)BlitCycles * SecondsPerCycle * 1000 - ); + out.Format ("blit=%04.1f ms", BlitCycles.TimeMS()); return out; } diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index d967917c94..0612d901f0 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -53,6 +53,7 @@ #include "i_music.h" #include "resource.h" #include "x86.h" +#include "stats.h" #include "d_main.h" #include "d_net.h" @@ -70,15 +71,11 @@ EXTERN_CVAR (String, language) extern void CheckCPUID(CPUInfo *cpu); -extern "C" -{ - double SecondsPerCycle = 1e-8; - double CyclesPerSecond = 1e8; // 100 MHz -} - extern HWND Window, ConWindow, GameTitleWindow; extern HINSTANCE g_hInst; +double PerfToSec, PerfToMillisec; + UINT TimerPeriod; UINT TimerEventID; UINT MillisecondsPerTic; @@ -345,9 +342,15 @@ void SetLanguageIDs () void I_Init (void) { + LARGE_INTEGER perf_freq; + CheckCPUID(&CPU); DumpCPUInfo(&CPU); + QueryPerformanceFrequency(&perf_freq); + PerfToSec = 1 / double(perf_freq.QuadPart); + PerfToMillisec = 1000 / double(perf_freq.QuadPart); + // Use a timer event if possible NewTicArrived = CreateEvent (NULL, FALSE, FALSE, NULL); if (NewTicArrived) @@ -737,3 +740,11 @@ FString I_GetSteamPath() path = ""; return path; } + +long long QueryPerfCounter() +{ + LARGE_INTEGER counter; + + QueryPerformanceCounter(&counter); + return counter.QuadPart; +} diff --git a/src/x86.cpp b/src/x86.cpp index 849ce85e23..99e39d553c 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -3,15 +3,15 @@ #endif #include #include - -#include "doomtype.h" + +#include "doomtype.h" #include "doomdef.h" #include "x86.h" - -extern "C" -{ - CPUInfo CPU; -} + +extern "C" +{ + CPUInfo CPU; +} #ifdef __GNUC__ #define __cpuid(output, func) __asm__ __volatile__("cpuid" : "=a" ((output)[0]),\ @@ -102,9 +102,9 @@ haveid: if (cpu->Family == 15) { // Add extended family. - cpu->Family += (foo[0] >> 20) & 0xFF; - } - if (cpu->Family == 6 || cpu->Family == 15) + cpu->Family += (foo[0] >> 20) & 0xFF; + } + if (cpu->Family == 6 || cpu->Family == 15) { // Add extended model ID. cpu->Model |= (foo[0] >> 12) & 0xF0; } @@ -144,65 +144,65 @@ haveid: #endif } - -void DumpCPUInfo(const CPUInfo *cpu) -{ - char cpustring[4*4*3+1]; - - // Why does Intel right-justify this string (on P4s) - // or add extra spaces (on Cores)? - const char *f = cpu->CPUString; - char *t; - - // Skip extra whitespace at the beginning. - while (*f == ' ') - { - ++f; - } - - // Copy string to temp buffer, but condense consecutive - // spaces to a single space character. - for (t = cpustring; *f != '\0'; ++f) - { - if (*f == ' ' && *(f - 1) == ' ') - { - continue; - } - *t++ = *f; - } - *t = '\0'; - - if (cpu->VendorID[0]) - { - Printf("CPU Vendor ID: %s\n", cpu->VendorID); - if (cpustring[0]) - { - Printf(" Name: %s\n", cpustring); - } - if (cpu->bIsAMD) - { - Printf(" Family %d (%d), Model %d, Stepping %d\n", - cpu->Family, cpu->AMDFamily, cpu->AMDModel, cpu->AMDStepping); - } - else - { - Printf(" Family %d, Model %d, Stepping %d\n", - cpu->Family, cpu->Model, cpu->Stepping); - } - Printf(" Features:"); - if (cpu->bMMX) Printf(" MMX"); - if (cpu->bMMXPlus) Printf(" MMX+"); - if (cpu->bSSE) Printf(" SSE"); - if (cpu->bSSE2) Printf(" SSE2"); - if (cpu->bSSE3) Printf(" SSE3"); - if (cpu->bSSSE3) Printf(" SSSE3"); - if (cpu->bSSE41) Printf(" SSE4.1"); - if (cpu->bSSE42) Printf(" SSE4.2"); - if (cpu->b3DNow) Printf(" 3DNow!"); - if (cpu->b3DNowPlus) Printf(" 3DNow!+"); - Printf ("\n"); - } -} + +void DumpCPUInfo(const CPUInfo *cpu) +{ + char cpustring[4*4*3+1]; + + // Why does Intel right-justify this string (on P4s) + // or add extra spaces (on Cores)? + const char *f = cpu->CPUString; + char *t; + + // Skip extra whitespace at the beginning. + while (*f == ' ') + { + ++f; + } + + // Copy string to temp buffer, but condense consecutive + // spaces to a single space character. + for (t = cpustring; *f != '\0'; ++f) + { + if (*f == ' ' && *(f - 1) == ' ') + { + continue; + } + *t++ = *f; + } + *t = '\0'; + + if (cpu->VendorID[0]) + { + Printf("CPU Vendor ID: %s\n", cpu->VendorID); + if (cpustring[0]) + { + Printf(" Name: %s\n", cpustring); + } + if (cpu->bIsAMD) + { + Printf(" Family %d (%d), Model %d, Stepping %d\n", + cpu->Family, cpu->AMDFamily, cpu->AMDModel, cpu->AMDStepping); + } + else + { + Printf(" Family %d, Model %d, Stepping %d\n", + cpu->Family, cpu->Model, cpu->Stepping); + } + Printf(" Features:"); + if (cpu->bMMX) Printf(" MMX"); + if (cpu->bMMXPlus) Printf(" MMX+"); + if (cpu->bSSE) Printf(" SSE"); + if (cpu->bSSE2) Printf(" SSE2"); + if (cpu->bSSE3) Printf(" SSE3"); + if (cpu->bSSSE3) Printf(" SSSE3"); + if (cpu->bSSE41) Printf(" SSE4.1"); + if (cpu->bSSE42) Printf(" SSE4.2"); + if (cpu->b3DNow) Printf(" 3DNow!"); + if (cpu->b3DNowPlus) Printf(" 3DNow!+"); + Printf ("\n"); + } +} #if 0 // Compiler output for this function is crap compared to the assembly diff --git a/zdoom.vcproj b/zdoom.vcproj index a2a7d9bf04..685a1d678f 100644 --- a/zdoom.vcproj +++ b/zdoom.vcproj @@ -1,7 +1,7 @@ - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - @@ -953,6 +943,16 @@ Outputs=""src/$(InputName).h"" /> + + + @@ -1529,6 +1529,10 @@ RelativePath=".\src\wi_stuff.h" > + + @@ -1554,16 +1558,6 @@ Outputs="$(IntDir)\$(InputName).obj" /> - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - @@ -1804,6 +1800,14 @@ Outputs="$(IntDir)/$(InputName).obj" /> + + + @@ -1969,6 +1973,14 @@ Outputs="$(IntDir)\$(InputName).obj" /> + + + @@ -1979,14 +1991,6 @@ Outputs="$(IntDir)\$(InputName).obj" /> - - - + + + @@ -2865,14 +2877,6 @@ AdditionalIncludeDirectories="src\win32;$(NoInherit)" /> - - - @@ -3147,7 +3151,7 @@ />