From 9578c23aa189a02716e7e154feece4ea61787570 Mon Sep 17 00:00:00 2001 From: Marisa Kirisame Date: Tue, 8 Mar 2022 17:54:19 +0100 Subject: [PATCH] Use RDTSC on Linux if possible for stats. --- src/common/engine/stats.h | 52 ++++++++++++++++++++++ src/common/platform/posix/sdl/i_system.cpp | 46 +++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/common/engine/stats.h b/src/common/engine/stats.h index 64b6e9d75c..3530d18834 100644 --- a/src/common/engine/stats.h +++ b/src/common/engine/stats.h @@ -54,6 +54,42 @@ public: #include +// [MK] try to use RDTSC on linux if possible +// avoids excess latency of clock_gettime() on some platforms +#ifdef __linux__ +extern bool PerfAvailable; +extern double PerfToSec, PerfToMillisec; + +inline uint64_t rdtsc() +{ +#ifdef __amd64__ + uint64_t tsc; + asm volatile("rdtsc; shlq $32, %%rdx; orq %%rdx, %%rax":"=a"(tsc)::"%rdx"); + return tsc; +#elif defined __ppc__ + unsigned int lower, upper, temp; + do + { + asm volatile ("mftbu %0 \n mftb %1 \n mftbu %2 \n" + : "=r"(upper), "=r"(lower), "=r"(temp)); + } + while (upper != temp); + return (static_cast(upper) << 32) | lower; +#elif defined __aarch64__ + // TODO: Implement and test on ARM64 + return 0; +#else // i386 + if (CPU.bRDTSC) + { + uint64_t tsc; + asm volatile ("\trdtsc\n" : "=A" (tsc)); + return tsc; + } + return 0; +#endif // __amd64__ +} +#endif + class cycle_t { public: @@ -64,6 +100,14 @@ public: void Clock() { +#ifdef __linux__ + if ( PerfAvailable ) + { + int64_t time = rdtsc(); + Sec -= time * PerfToSec; + return; + } +#endif timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -72,6 +116,14 @@ public: void Unclock() { +#ifdef __linux__ + if ( PerfAvailable ) + { + int64_t time = rdtsc(); + Sec += time * PerfToSec; + return; + } +#endif timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); diff --git a/src/common/platform/posix/sdl/i_system.cpp b/src/common/platform/posix/sdl/i_system.cpp index 0bca723049..243c64edc0 100644 --- a/src/common/platform/posix/sdl/i_system.cpp +++ b/src/common/platform/posix/sdl/i_system.cpp @@ -47,6 +47,13 @@ #include #include +#ifdef __linux__ +#include +#include +#include +#include "printf.h" +#endif + #include #include "version.h" @@ -131,8 +138,47 @@ void I_ShowFatalError(const char *message) #endif } +bool PerfAvailable; + void CalculateCPUSpeed() { + PerfAvailable = false; + PerfToMillisec = PerfToSec = 0.; +#ifdef __linux__ + // [MK] read from perf values if we can + struct perf_event_attr pe = + { + .type = PERF_TYPE_HARDWARE, + .size = sizeof(struct perf_event_attr), + .config = PERF_COUNT_HW_INSTRUCTIONS, + .disabled = 1, + .exclude_kernel = 1, + .exclude_hv = 1 + }; + int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (fd == -1) + { + return; + } + void *addr = mmap(nullptr, 4096, PROT_READ, MAP_SHARED, fd, 0); + if (addr == nullptr) + { + close(fd); + return; + } + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)addr; + if (pc->cap_user_time != 1) + { + close(fd); + return; + } + double mhz = (1000LU << pc->time_shift) / (double)pc->time_mult; + PerfAvailable = true; + PerfToSec = .000001/mhz; + PerfToMillisec = PerfToSec*1000.; + if (!batchrun) Printf("CPU speed: %.0f MHz\n", mhz); + close(fd); +#endif } void CleanProgressBar()