diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index c35198ba8..2272acf8d 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -256,7 +256,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z SSAFloat Lx = light_x - viewpos_x; - SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx); + SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx); SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 5e7f9000d..898aebdbb 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -244,7 +244,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y SSAFloat Lz = light_z - z; - SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz); + SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz); SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index f587d7b5a..f537792fc 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -25,6 +25,7 @@ #include "ssa_int.h" #include "ssa_scope.h" #include "ssa_bool.h" +#include "ssa_vec4f.h" SSAFloat::SSAFloat() : v(0) @@ -60,6 +61,18 @@ SSAFloat SSAFloat::sqrt(SSAFloat f) return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); } +SSAFloat SSAFloat::fastsqrt(SSAFloat f) +{ + return f * rsqrt(f); +} + +SSAFloat SSAFloat::rsqrt(SSAFloat f) +{ + llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); +} + SSAFloat SSAFloat::sin(SSAFloat val) { std::vector params; diff --git a/tools/drawergen/ssa/ssa_float.h b/tools/drawergen/ssa/ssa_float.h index 69fb81a75..f7e2b93ef 100644 --- a/tools/drawergen/ssa/ssa_float.h +++ b/tools/drawergen/ssa/ssa_float.h @@ -37,6 +37,8 @@ public: static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } static llvm::Type *llvm_type(); static SSAFloat sqrt(SSAFloat f); + static SSAFloat fastsqrt(SSAFloat f); + static SSAFloat rsqrt(SSAFloat f); static SSAFloat sin(SSAFloat val); static SSAFloat cos(SSAFloat val); static SSAFloat pow(SSAFloat val, SSAFloat power);