- Use std::fma() in the interpolation code.

* From everything I've read, this more accurate, can be ~5% faster and is heavily optimised on CPUs of the last 10-12 years.
* This homegrown solution can be removed for `std::lerp()` once we go C++20.
This commit is contained in:
Mitchell Richters 2022-11-07 19:28:34 +11:00
parent 88e92a15a6
commit ff7e0afa6f
2 changed files with 22 additions and 9 deletions

View file

@ -41,7 +41,7 @@
#define VECTORS_H
#include <cstddef>
#include <math.h>
#include <cmath>
#include <float.h>
#include <string.h>
#include "xs_Float.h"
@ -1548,15 +1548,27 @@ inline TVector2<T> clamp(const TVector2<T> &vec, const TVector2<T> &min, const T
}
template<class T>
inline TAngle<T> interpolatedvalue(const TAngle<T> &oang, const TAngle<T> &ang, const double interpfrac)
inline T interpolatedvalue(const T oval, const T val, const double interpfrac)
{
return oang + (deltaangle(oang, ang) * interpfrac);
return std::fma(interpfrac, val, std::fma(-interpfrac, oval, oval));
}
template <class T>
inline T interpolatedvalue(const T& oval, const T& val, const double interpfrac)
template<class T>
inline TVector2<T> interpolatedvalue(const TVector2<T>& oval, const TVector2<T>& val, const double interpfrac)
{
return T(oval + (val - oval) * interpfrac);
return { interpolatedvalue(oval.X, val.X, interpfrac), interpolatedvalue(oval.Y, val.Y, interpfrac) };
}
template<class T>
inline TVector3<T> interpolatedvalue(const TVector3<T>& oval, const TVector3<T>& val, const double interpfrac)
{
return { interpolatedvalue(oval.X, val.X, interpfrac), interpolatedvalue(oval.Y, val.Y, interpfrac), interpolatedvalue(oval.Z, val.Z, interpfrac) };
}
template<class T>
inline TAngle<T> interpolatedvalue(const TAngle<T> oang, const TAngle<T> ang, const double interpfrac)
{
return TAngle<T>::fromDeg(interpolatedvalue(oang.Degrees(), (oang + deltaangle(oang, ang)).Degrees(), interpfrac));
}
// Much of this is copied from TVector3. Is all that functionality really appropriate?

View file

@ -194,13 +194,14 @@ void QAV::Draw(int ticks, int stat, int shade, int palnum, bool to3dview, double
if (prevTile)
{
double prevAlpha = ((stat | prevTile->stat) & RS_TRANS1) ? glblend[0].def[!!((stat | prevTile->stat) & RS_TRANS2)].alpha : 1.f;
double thisAlpha = (tileStat & RS_TRANS1) ? glblend[0].def[!!(tileStat & RS_TRANS2)].alpha : 1.f;
tileX = interpolatedvalue<double>(prevTile->x, thisTile->x, interpfrac);
tileY = interpolatedvalue<double>(prevTile->y, thisTile->y, interpfrac);
tileZ = interpolatedvalue<double>(prevTile->z, thisTile->z, interpfrac);
tileA = interpolatedvalue(prevTile->angle, thisTile->angle, interpfrac);
tileShade = interpolatedvalue(prevTile->shade, thisTile->shade, interpfrac) + shade;
auto prevAlpha = ((stat | prevTile->stat) & RS_TRANS1) ? glblend[0].def[!!((stat | prevTile->stat) & RS_TRANS2)].alpha : 1.f;
auto thisAlpha = (tileStat & RS_TRANS1) ? glblend[0].def[!!(tileStat & RS_TRANS2)].alpha : 1.f;
tileShade = (int)interpolatedvalue<double>(prevTile->shade, thisTile->shade, interpfrac) + shade;
tileAlpha = interpolatedvalue(prevAlpha, thisAlpha, interpfrac);
}
else