mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 15:21:51 +00:00
Switch to the lower precision rsqrtss instruction as it is faster, especially on older Intel architectures and on AMD
This commit is contained in:
parent
fa66ca214e
commit
9a529192b0
4 changed files with 17 additions and 2 deletions
|
@ -256,7 +256,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
|
|||
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z
|
||||
SSAFloat Lx = light_x - viewpos_x;
|
||||
SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx);
|
||||
SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx);
|
||||
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
||||
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
|
|||
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y
|
||||
SSAFloat Lz = light_z - z;
|
||||
SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz);
|
||||
SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz);
|
||||
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
||||
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "ssa_int.h"
|
||||
#include "ssa_scope.h"
|
||||
#include "ssa_bool.h"
|
||||
#include "ssa_vec4f.h"
|
||||
|
||||
SSAFloat::SSAFloat()
|
||||
: v(0)
|
||||
|
@ -60,6 +61,18 @@ SSAFloat SSAFloat::sqrt(SSAFloat f)
|
|||
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAFloat SSAFloat::fastsqrt(SSAFloat f)
|
||||
{
|
||||
return f * rsqrt(f);
|
||||
}
|
||||
|
||||
SSAFloat SSAFloat::rsqrt(SSAFloat f)
|
||||
{
|
||||
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
|
||||
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
|
||||
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAFloat SSAFloat::sin(SSAFloat val)
|
||||
{
|
||||
std::vector<llvm::Type *> params;
|
||||
|
|
|
@ -37,6 +37,8 @@ public:
|
|||
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
|
||||
static llvm::Type *llvm_type();
|
||||
static SSAFloat sqrt(SSAFloat f);
|
||||
static SSAFloat fastsqrt(SSAFloat f);
|
||||
static SSAFloat rsqrt(SSAFloat f);
|
||||
static SSAFloat sin(SSAFloat val);
|
||||
static SSAFloat cos(SSAFloat val);
|
||||
static SSAFloat pow(SSAFloat val, SSAFloat power);
|
||||
|
|
Loading…
Reference in a new issue