Use RDTSC on Linux if possible for stats.

This commit is contained in:
Marisa Kirisame 2022-03-08 17:54:19 +01:00 committed by Rachael Alexanderson
parent d348bad823
commit 9578c23aa1
2 changed files with 98 additions and 0 deletions

View File

@ -54,6 +54,42 @@ public:
#include <time.h>
// [MK] try to use RDTSC on linux if possible
// avoids excess latency of clock_gettime() on some platforms
#ifdef __linux__
extern bool PerfAvailable;
extern double PerfToSec, PerfToMillisec;
inline uint64_t rdtsc()
{
#ifdef __amd64__
uint64_t tsc;
asm volatile("rdtsc; shlq $32, %%rdx; orq %%rdx, %%rax":"=a"(tsc)::"%rdx");
return tsc;
#elif defined __ppc__
unsigned int lower, upper, temp;
do
{
asm volatile ("mftbu %0 \n mftb %1 \n mftbu %2 \n"
: "=r"(upper), "=r"(lower), "=r"(temp));
}
while (upper != temp);
return (static_cast<unsigned long long>(upper) << 32) | lower;
#elif defined __aarch64__
// TODO: Implement and test on ARM64
return 0;
#else // i386
if (CPU.bRDTSC)
{
uint64_t tsc;
asm volatile ("\trdtsc\n" : "=A" (tsc));
return tsc;
}
return 0;
#endif // __amd64__
}
#endif
class cycle_t
{
public:
@ -64,6 +100,14 @@ public:
void Clock()
{
#ifdef __linux__
if ( PerfAvailable )
{
int64_t time = rdtsc();
Sec -= time * PerfToSec;
return;
}
#endif
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
@ -72,6 +116,14 @@ public:
void Unclock()
{
#ifdef __linux__
if ( PerfAvailable )
{
int64_t time = rdtsc();
Sec += time * PerfToSec;
return;
}
#endif
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);

View File

@ -47,6 +47,13 @@
#include <unistd.h>
#include <sys/ioctl.h>
#ifdef __linux__
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
#include "printf.h"
#endif
#include <SDL.h>
#include "version.h"
@ -131,8 +138,47 @@ void I_ShowFatalError(const char *message)
#endif
}
bool PerfAvailable;
void CalculateCPUSpeed()
{
PerfAvailable = false;
PerfToMillisec = PerfToSec = 0.;
#ifdef __linux__
// [MK] read from perf values if we can
struct perf_event_attr pe =
{
.type = PERF_TYPE_HARDWARE,
.size = sizeof(struct perf_event_attr),
.config = PERF_COUNT_HW_INSTRUCTIONS,
.disabled = 1,
.exclude_kernel = 1,
.exclude_hv = 1
};
int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0);
if (fd == -1)
{
return;
}
void *addr = mmap(nullptr, 4096, PROT_READ, MAP_SHARED, fd, 0);
if (addr == nullptr)
{
close(fd);
return;
}
struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)addr;
if (pc->cap_user_time != 1)
{
close(fd);
return;
}
double mhz = (1000LU << pc->time_shift) / (double)pc->time_mult;
PerfAvailable = true;
PerfToSec = .000001/mhz;
PerfToMillisec = PerfToSec*1000.;
if (!batchrun) Printf("CPU speed: %.0f MHz\n", mhz);
close(fd);
#endif
}
void CleanProgressBar()