Refactor timing code even more

System layer is greatly simplified and framecap
logic has been moved internally. I_Sleep now
takes a sleep duration and I_SleepDuration
generically implements a precise sleep with spin
loop.
This commit is contained in:
Eidolon 2022-05-01 00:32:46 -05:00
parent 7d7564b7f9
commit f0d7d8467f
19 changed files with 176 additions and 152 deletions

View file

@ -82,7 +82,17 @@ INT64 current_time_in_ps() {
return (t.tv_sec * (INT64)1000000) + t.tv_usec;
}
void I_Sleep(void){}
void I_Sleep(UINT32 ms){}
precise_t I_GetPreciseTime(void)
{
return 0;
}
UINT64 I_GetPrecisePrecision(void)
{
return 1000000;
}
void I_GetEvent(void){}

View file

@ -2442,7 +2442,10 @@ static boolean CL_ServerConnectionTicker(const char *tmpsave, tic_t *oldtic, tic
#endif
}
else
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
return true;
}

View file

@ -696,15 +696,15 @@ tic_t rendergametic;
void D_SRB2Loop(void)
{
tic_t realtics = 0, rendertimeout = INFTICS;
tic_t entertic = 0, oldentertics = 0, realtics = 0, rendertimeout = INFTICS;
double deltatics = 0.0;
double deltasecs = 0.0;
static lumpnum_t gstartuplumpnum;
boolean interp = false;
boolean doDisplay = false;
boolean screenUpdate = false;
double frameEnd = 0.0;
if (dedicated)
server = true;
@ -716,6 +716,7 @@ void D_SRB2Loop(void)
#endif
I_UpdateTime(cv_timescale.value);
oldentertics = I_GetTime();
// end of loading screen: CONS_Printf() will no more call FinishUpdate()
con_refresh = false;
@ -755,20 +756,30 @@ void D_SRB2Loop(void)
for (;;)
{
frameEnd = I_GetFrameTime();
// capbudget is the minimum precise_t duration of a single loop iteration
precise_t capbudget;
precise_t enterprecise = I_GetPreciseTime();
precise_t finishprecise = enterprecise;
{
// Casting the return value of a function is bad practice (apparently)
double budget = round((1.0 / R_GetFramerateCap()) * I_GetPrecisePrecision());
capbudget = (precise_t) budget;
}
I_UpdateTime(cv_timescale.value);
// Can't guarantee that I_UpdateTime won't be called inside TryRunTics
// so capture the realtics for later use
realtics = g_time.realtics;
if (lastwipetic)
{
// oldentertics = lastwipetic;
oldentertics = lastwipetic;
lastwipetic = 0;
}
// get real tics
entertic = I_GetTime();
realtics = entertic - oldentertics;
oldentertics = entertic;
if (demoplayback && gamestate == GS_LEVEL)
{
// Nicer place to put this.
@ -803,11 +814,11 @@ void D_SRB2Loop(void)
if (lastdraw || singletics || gametic > rendergametic)
{
rendergametic = gametic;
rendertimeout = g_time.time + TICRATE/17;
rendertimeout = entertic + TICRATE/17;
doDisplay = true;
}
else if (rendertimeout < g_time.time) // in case the server hang or netsplit
else if (rendertimeout < entertic) // in case the server hang or netsplit
{
// Lagless camera! Yay!
if (gamestate == GS_LEVEL && netgame)
@ -839,9 +850,9 @@ void D_SRB2Loop(void)
// I looked at the possibility of putting in a float drawer for
// perfstats and it's very complicated, so we'll just do this instead...
ps_interp_frac.value.p = (precise_t)((FIXED_TO_FLOAT(g_time.timefrac)) * 1000.0f);
ps_interp_lag.value.p = (precise_t)((FIXED_TO_FLOAT(g_time.deltaseconds)) * 1000.0f);
ps_interp_lag.value.p = (precise_t)((deltasecs) * 1000.0f);
renderdeltatics = g_time.deltatics;
renderdeltatics = FLOAT_TO_FIXED(deltatics);
if (!(paused || P_AutoPause()))
{
@ -873,12 +884,6 @@ void D_SRB2Loop(void)
LUA_Step();
// Fully completed frame made.
if (!singletics)
{
I_FrameCapSleep(frameEnd);
}
// I_FinishUpdate is now here instead of D_Display,
// because it synchronizes it more closely with the frame counter.
if (screenUpdate == true)
@ -888,6 +893,21 @@ void D_SRB2Loop(void)
PS_STOP_TIMING(ps_swaptime);
}
// Fully completed frame made.
finishprecise = I_GetPreciseTime();
if (!singletics)
{
INT64 elapsed = (INT64)(finishprecise - enterprecise);
if (elapsed > 0 && (INT64)capbudget > elapsed)
{
I_SleepDuration(capbudget - (finishprecise - enterprecise));
}
}
// Capture the time once more to get the real delta time.
finishprecise = I_GetPreciseTime();
deltasecs = (double)((INT64)(finishprecise - enterprecise)) / I_GetPrecisePrecision();
deltatics = deltasecs * NEWTICRATE;
// Only take screenshots after drawing.
if (moviemode)
M_SaveFrame();

View file

@ -615,7 +615,10 @@ void Net_WaitAllAckReceived(UINT32 timeout)
while (timeout > I_GetTime() && !Net_AllAcksReceived())
{
while (tictac == I_GetTime())
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
tictac = I_GetTime();
HGetPacket();
Net_AckTicker();

View file

@ -11,7 +11,15 @@ UINT32 I_GetFreeMem(UINT32 *total)
return 0;
}
void I_Sleep(void){}
void I_Sleep(UINT32 ms){}
precise_t I_GetPreciseTime(void) {
return 0;
}
UINT64 I_GetPrecisePrecision(void) {
return 1000000;
}
void I_GetEvent(void){}

View file

@ -915,7 +915,10 @@ void F_IntroTicker(void)
while (quittime > nowtime)
{
while (!((nowtime = I_GetTime()) - lasttime))
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
lasttime = nowtime;
I_OsPolling();

View file

@ -556,7 +556,10 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu)
// wait loop
while (!((nowtime = I_GetTime()) - lastwipetic))
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
lastwipetic = nowtime;
// Wipe styles

View file

@ -1905,7 +1905,10 @@ void G_PreLevelTitleCard(void)
{
// draw loop
while (!((nowtime = I_GetTime()) - lasttime))
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
lasttime = nowtime;
ST_runTitleCard();

View file

@ -42,33 +42,32 @@ extern UINT8 keyboard_started;
*/
UINT32 I_GetFreeMem(UINT32 *total);
/** \brief Returns precise time value for performance measurement.
/** \brief Returns precise time value for performance measurement. The precise
time should be a monotonically increasing counter, and will wrap.
precise_t is internally represented as an unsigned integer and
integer arithmetic may be used directly between values of precise_t.
*/
precise_t I_GetPreciseTime(void);
/** \brief Converts a precise_t to microseconds and casts it to a 32 bit integer.
/** \brief Get the precision of precise_t in units per second. Invocations of
this function for the program's duration MUST return the same value.
*/
int I_PreciseToMicros(precise_t d);
/** \brief Calculates the elapsed microseconds between two precise_t.
*/
double I_PreciseElapsedSeconds(precise_t before, precise_t after);
UINT64 I_GetPrecisePrecision(void);
/** \brief Get the current time in rendering tics, including fractions.
*/
double I_GetFrameTime(void);
/** \brief Sleeps by the value of cv_sleep
/** \brief Sleeps for the given duration in milliseconds. Depending on the
operating system's scheduler, the calling thread may give up its
time slice for a longer duration. The implementation should give a
best effort to sleep for the given duration, without spin-locking.
Calling code should check the current precise time after sleeping
and not assume the thread has slept for the expected duration.
\return void
*/
void I_Sleep(void);
/** \brief Sleeps for a variable amount of time, depending on how much time the last frame took.
\return void
*/
boolean I_FrameCapSleep(const double frameStart);
void I_Sleep(UINT32 ms);
/** \brief Get events

View file

@ -13,8 +13,11 @@
#include "i_time.h"
#include <math.h>
#include "command.h"
#include "doomtype.h"
#include "d_netcmd.h"
#include "m_fixed.h"
#include "i_system.h"
@ -36,9 +39,6 @@ void I_InitializeTime(void)
{
g_time.time = 0;
g_time.timefrac = 0;
g_time.realtics = 0;
g_time.deltaseconds = 0;
g_time.ticrate = FLOAT_TO_FIXED(TICRATE);
enterprecise = 0;
oldenterprecise = 0;
@ -61,7 +61,7 @@ void I_UpdateTime(fixed_t timescale)
ticratescaled = (double)TICRATE * FIXED_TO_FLOAT(timescale);
enterprecise = I_GetPreciseTime();
elapsedseconds = I_PreciseElapsedSeconds(oldenterprecise, enterprecise);
elapsedseconds = (double)(enterprecise - oldenterprecise) / I_GetPrecisePrecision();
tictimer += elapsedseconds;
while (tictimer > 1.0/ticratescaled)
{
@ -79,8 +79,39 @@ void I_UpdateTime(fixed_t timescale)
fractional = modf(tictimer * ticratescaled, &integral);
g_time.timefrac = FLOAT_TO_FIXED(fractional);
}
g_time.realtics = realtics;
g_time.deltatics = FLOAT_TO_FIXED(elapsedseconds * ticratescaled);
g_time.deltaseconds = FLOAT_TO_FIXED(elapsedseconds);
g_time.ticrate = FLOAT_TO_FIXED(ticratescaled);
}
void I_SleepDuration(precise_t duration)
{
UINT64 precision = I_GetPrecisePrecision();
INT32 sleepvalue = cv_sleep.value;
UINT64 delaygranularity;
precise_t cur;
precise_t dest;
{
double gran = round(((double)(precision / 1000) * sleepvalue * 2.1));
delaygranularity = (UINT64)gran;
}
cur = I_GetPreciseTime();
dest = cur + duration;
// the reason this is not dest > cur is because the precise counter may wrap
// two's complement arithmetic is our friend here, though!
// e.g. cur 0xFFFFFFFFFFFFFFFE = -2, dest 0x0000000000000001 = 1
// 0x0000000000000001 - 0xFFFFFFFFFFFFFFFE = 3
while ((INT64)(dest - cur) > 0)
{
// If our cv_sleep value exceeds the remaining sleep duration, use the
// hard sleep function.
if (sleepvalue > 0 && (dest - cur) > delaygranularity)
{
I_Sleep(sleepvalue);
}
// Otherwise, this is a spinloop.
cur = I_GetPreciseTime();
}
}

View file

@ -25,10 +25,6 @@ extern "C" {
typedef struct timestate_s {
tic_t time;
fixed_t timefrac;
fixed_t realtics;
fixed_t deltatics;
fixed_t deltaseconds;
fixed_t ticrate;
} timestate_t;
extern timestate_t g_time;
@ -44,6 +40,13 @@ void I_InitializeTime(void);
void I_UpdateTime(fixed_t timescale);
/** \brief Block for at minimum the duration specified. This function makes a
best effort not to oversleep, and will spinloop if sleeping would
take too long. However, callers should still check the current time
after this returns.
*/
void I_SleepDuration(precise_t duration);
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -31,7 +31,7 @@
#include "m_misc.h" // M_MapNumber
#include "b_bot.h" // B_UpdateBotleader
#include "d_clisrv.h" // CL_RemovePlayer
#include "i_system.h" // I_GetPreciseTime, I_PreciseToMicros
#include "i_system.h" // I_GetPreciseTime, I_GetPrecisePrecision
#include "lua_script.h"
#include "lua_libs.h"
@ -3917,7 +3917,7 @@ static int lib_gTicsToMilliseconds(lua_State *L)
static int lib_getTimeMicros(lua_State *L)
{
lua_pushinteger(L, I_PreciseToMicros(I_GetPreciseTime()));
lua_pushinteger(L, I_GetPreciseTime() / (I_GetPrecisePrecision() / 1000000));
return 1;
}

View file

@ -608,7 +608,7 @@ static void GIF_framewrite(void)
{
// golden's attempt at creating a "dynamic delay"
UINT16 mingifdelay = 10; // minimum gif delay in milliseconds (keep at 10 because gifs can't get more precise).
gif_delayus += I_PreciseToMicros(I_GetPreciseTime() - gif_prevframetime); // increase delay by how much time was spent between last measurement
gif_delayus += (I_GetPreciseTime() - gif_prevframetime) / (I_GetPrecisePrecision() / 1000000); // increase delay by how much time was spent between last measurement
if (gif_delayus/1000 >= mingifdelay) // delay is big enough to be able to effect gif frame delay?
{
@ -621,7 +621,7 @@ static void GIF_framewrite(void)
{
float delayf = ceil(100.0f/NEWTICRATE);
delay = (UINT16)I_PreciseToMicros((I_GetPreciseTime() - gif_prevframetime))/10/1000;
delay = (UINT16)((I_GetPreciseTime() - gif_prevframetime)) / (I_GetPrecisePrecision() / 1000000) /10/1000;
if (delay < (UINT16)(delayf))
delay = (UINT16)(delayf);

View file

@ -13569,7 +13569,8 @@ void M_QuitResponse(INT32 ch)
{
V_DrawScaledPatch(0, 0, 0, W_CachePatchName("GAMEQUIT", PU_PATCH)); // Demo 3 Quit Screen Tails 06-16-2001
I_FinishUpdate(); // Update the screen with the image Tails 06-19-2001
I_Sleep();
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
}
I_Quit();

View file

@ -268,7 +268,7 @@ static INT32 PS_GetMetricAverage(ps_metric_t *metric, boolean time_metric)
for (i = 0; i < cv_ps_samplesize.value; i++)
{
if (time_metric)
sum += I_PreciseToMicros(*((precise_t*)history_read_pos));
sum += (*((precise_t*)history_read_pos)) / (I_GetPrecisePrecision() / 1000000);
else
sum += *((INT32*)history_read_pos);
history_read_pos += value_size;
@ -288,7 +288,7 @@ static INT32 PS_GetMetricMinOrMax(ps_metric_t *metric, boolean time_metric, bool
{
INT32 value;
if (time_metric)
value = I_PreciseToMicros(*((precise_t*)history_read_pos));
value = (*((precise_t*)history_read_pos)) / (I_GetPrecisePrecision() / 1000000);
else
value = *((INT32*)history_read_pos);
@ -316,7 +316,7 @@ static INT32 PS_GetMetricSD(ps_metric_t *metric, boolean time_metric)
{
INT64 value;
if (time_metric)
value = I_PreciseToMicros(*((precise_t*)history_read_pos));
value = (*((precise_t*)history_read_pos)) / (I_GetPrecisePrecision() / 1000000);
else
value = *((INT32*)history_read_pos);
@ -346,7 +346,7 @@ static INT32 PS_GetMetricScreenValue(ps_metric_t *metric, boolean time_metric)
else
{
if (time_metric)
return I_PreciseToMicros(metric->value.p);
return (metric->value.p) / (I_GetPrecisePrecision() / 1000000);
else
return metric->value.i;
}

View file

@ -4072,7 +4072,10 @@ static void P_RunSpecialStageWipe(void)
{
// wait loop
while (!((nowtime = I_GetTime()) - lastwipetic))
I_Sleep();
{
I_Sleep(cv_sleep.value);
I_UpdateTime(cv_timescale.value);
}
lastwipetic = nowtime;
if (moviemode) // make sure we save frames for the white hold too
M_SaveFrame();

View file

@ -479,12 +479,12 @@ void SCR_CalculateFPS(void)
return;
}
updateElapsed = I_PreciseToMicros(endTime - updateTime);
updateElapsed = (endTime - updateTime) / (I_GetPrecisePrecision() / 1000000);
if (updateElapsed >= FPS_SAMPLE_RATE)
{
static int sampleIndex = 0;
int frameElapsed = I_PreciseToMicros(endTime - startTime);
int frameElapsed = (endTime - startTime) / (I_GetPrecisePrecision() / 1000000);
fps_samples[sampleIndex] = frameElapsed / 1000.0f;

View file

@ -2144,36 +2144,16 @@ ticcmd_t *I_BaseTiccmd2(void)
static Uint64 timer_frequency;
//
// I_GetPreciseTime
// returns time in precise_t
//
precise_t I_GetPreciseTime(void)
{
return SDL_GetPerformanceCounter();
}
int I_PreciseToMicros(precise_t d)
UINT64 I_GetPrecisePrecision(void)
{
// d is going to be converted into a double. So remove the highest bits
// to avoid loss of precision in the lower bits, for the (probably rare) case
// that the higher bits are actually used.
d &= ((precise_t)1 << 53) - 1; // The mantissa of a double can handle 53 bits at most.
// The resulting double from the calculation is converted first to UINT64 to avoid overflow,
// which is undefined behaviour when converting floating point values to integers.
return (int)(UINT64)(d / (timer_frequency / 1000000.0));
return SDL_GetPerformanceFrequency();
}
double I_PreciseElapsedSeconds(precise_t before, precise_t after)
{
return (after - before) / (double)timer_frequency;
}
//
// I_GetFrameTime
// returns time in 1/fpscap second tics
//
static UINT32 frame_rate;
static double frame_frequency;
@ -2233,68 +2213,9 @@ void I_StartupTimer(void)
elapsed_frames = 0.0;
}
//
// I_Sleep
// Sleeps by the value of cv_sleep
//
void I_Sleep(void)
void I_Sleep(UINT32 ms)
{
if (cv_sleep.value > 0)
SDL_Delay(cv_sleep.value);
// I_Sleep is still called in a number of places
// we need to update the internal time state to make this work
I_UpdateTime(cv_timescale.value);
}
//
// I_FrameCapSleep
// Sleeps for a variable amount of time, depending on how much time the frame took.
//
boolean I_FrameCapSleep(const double t)
{
// SDL_Delay(1) gives me a range of around 1.95ms to 2.05ms.
// Has a bit extra to be totally safe.
const double delayGranularity = 2.1;
double frameMS = 0.0;
double curTime = 0.0;
double destTime = 0.0;
double sleepTime = 0.0;
if (frame_rate == 0)
{
// We don't want to cap.
return false;
}
curTime = I_GetFrameTime();
destTime = floor(t) + 1.0;
if (curTime >= destTime)
{
// We're already behind schedule.
return false;
}
frameMS = frame_rate * 0.001; // 1ms as frame time
sleepTime = destTime - (delayGranularity * frameMS);
while (curTime < destTime)
{
if (curTime < sleepTime && cv_sleep.value > 0)
{
// Wait 1ms at a time (on default settings)
// until we're close enough.
SDL_Delay(cv_sleep.value);
}
// This part will spin-lock the rest.
curTime = I_GetFrameTime();
}
// We took our nap.
return true;
SDL_Delay(ms);
}
#ifdef NEWSIGNALHANDLER

View file

@ -263,12 +263,25 @@ tic_t I_GetTime(void)
return newtics;
}
void I_Sleep(void)
precise_t I_GetPreciseTime(void)
{
if (cv_sleep.value != -1)
Sleep(cv_sleep.value);
LARGE_INTEGER time;
BOOL res = QueryPerformanceCounter(&time);
if (!res) I_Error("QueryPerformanceCounter error"); // if this happens, you've gone back to the 90s
return (precise_t) time.QuadPart;
}
I_UpdateTime(cv_timescale.value);
UINT64 I_GetPrecisePrecision(void)
{
LARGE_INTEGER time;
BOOL res = QueryPerformanceFrequency(&time);
if (!res) I_Error("QueryPerformanceFrequency error"); // if this happens, you've gone back to the 90s
return (precise_t) time.QuadPart;
}
void I_Sleep(UINT32 ms)
{
Sleep(ms);
}
// should move to i_video