diff --git a/source/common/engine/stats.h b/source/common/engine/stats.h index 735931cb5..7e3e05d8c 100644 --- a/source/common/engine/stats.h +++ b/source/common/engine/stats.h @@ -54,6 +54,44 @@ public: #include +// [MK] try to use RDTSC on linux if possible +// avoids excess latency of clock_gettime() on some platforms +#ifdef __linux__ +extern bool PerfAvailable; +extern double PerfToSec, PerfToMillisec; + +inline uint64_t rdtsc() +{ +#ifdef __amd64__ + uint64_t tsc; + asm volatile("rdtsc; shlq $32, %%rdx; orq %%rdx, %%rax":"=a"(tsc)::"%rdx"); + return tsc; +#elif defined __ppc__ + unsigned int lower, upper, temp; + do + { + asm volatile ("mftbu %0 \n mftb %1 \n mftbu %2 \n" + : "=r"(upper), "=r"(lower), "=r"(temp)); + } + while (upper != temp); + return (static_cast(upper) << 32) | lower; +#elif defined __aarch64__ + // TODO: Implement and test on ARM64 + return 0; +#elif defined __i386__ + if (CPU.bRDTSC) + { + uint64_t tsc; + asm volatile ("\trdtsc\n" : "=A" (tsc)); + return tsc; + } + return 0; +#else + return 0; +#endif // __amd64__ +} +#endif + class cycle_t { public: @@ -64,6 +102,14 @@ public: void Clock() { +#ifdef __linux__ + if ( PerfAvailable ) + { + int64_t time = rdtsc(); + Sec -= time * PerfToSec; + return; + } +#endif timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -72,6 +118,14 @@ public: void Unclock() { +#ifdef __linux__ + if ( PerfAvailable ) + { + int64_t time = rdtsc(); + Sec += time * PerfToSec; + return; + } +#endif timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -140,7 +194,7 @@ inline uint64_t rdtsc() #elif defined __aarch64__ // TODO: Implement and test on ARM64 return 0; -#else // i386 +#elif defined __i386__ // i386 if (CPU.bRDTSC) { uint64_t tsc; @@ -148,6 +202,8 @@ inline uint64_t rdtsc() return tsc; } return 0; +#else + return 0; #endif // __amd64__ } #endif diff --git a/source/common/platform/posix/sdl/i_system.cpp b/source/common/platform/posix/sdl/i_system.cpp index 0bca72304..783d65efa 100644 --- a/source/common/platform/posix/sdl/i_system.cpp +++ b/source/common/platform/posix/sdl/i_system.cpp @@ -47,6 +47,13 @@ #include #include +#ifdef __linux__ +#include +#include +#include +#include "printf.h" +#endif + #include #include "version.h" @@ -131,8 +138,46 @@ void I_ShowFatalError(const char *message) #endif } +bool PerfAvailable; + void CalculateCPUSpeed() { + PerfAvailable = false; + PerfToMillisec = PerfToSec = 0.; +#ifdef __linux__ + // [MK] read from perf values if we can + struct perf_event_attr pe; + memset(&pe,0,sizeof(struct perf_event_attr)); + pe.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(struct perf_event_attr); + pe.config = PERF_COUNT_HW_INSTRUCTIONS; + pe.disabled = 1; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (fd == -1) + { + return; + } + void *addr = mmap(nullptr, 4096, PROT_READ, MAP_SHARED, fd, 0); + if (addr == nullptr) + { + close(fd); + return; + } + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)addr; + if (pc->cap_user_time != 1) + { + close(fd); + return; + } + double mhz = (1000LU << pc->time_shift) / (double)pc->time_mult; + PerfAvailable = true; + PerfToSec = .000001/mhz; + PerfToMillisec = PerfToSec*1000.; + if (!batchrun) Printf("CPU speed: %.0f MHz\n", mhz); + close(fd); +#endif } void CleanProgressBar()