Switch to the lower precision rsqrtss instruction as it is faster, especially on older Intel architectures and on AMD

This commit is contained in:
Magnus Norddahl 2016-12-22 21:21:57 +01:00
parent fa66ca214e
commit 9a529192b0
4 changed files with 17 additions and 2 deletions

View file

@ -256,7 +256,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
// attenuation = 1 - MIN(dist * (1/radius), 1)
SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z
SSAFloat Lx = light_x - viewpos_x;
SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx);
SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx);
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;

View file

@ -244,7 +244,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
// attenuation = 1 - MIN(dist * (1/radius), 1)
SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y
SSAFloat Lz = light_z - z;
SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz);
SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz);
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;

View file

@ -25,6 +25,7 @@
#include "ssa_int.h"
#include "ssa_scope.h"
#include "ssa_bool.h"
#include "ssa_vec4f.h"
SSAFloat::SSAFloat()
: v(0)
@ -60,6 +61,18 @@ SSAFloat SSAFloat::sqrt(SSAFloat f)
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
}
SSAFloat SSAFloat::fastsqrt(SSAFloat f)
{
return f * rsqrt(f);
}
SSAFloat SSAFloat::rsqrt(SSAFloat f)
{
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint()));
}
SSAFloat SSAFloat::sin(SSAFloat val)
{
std::vector<llvm::Type *> params;

View file

@ -37,6 +37,8 @@ public:
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
static llvm::Type *llvm_type();
static SSAFloat sqrt(SSAFloat f);
static SSAFloat fastsqrt(SSAFloat f);
static SSAFloat rsqrt(SSAFloat f);
static SSAFloat sin(SSAFloat val);
static SSAFloat cos(SSAFloat val);
static SSAFloat pow(SSAFloat val, SSAFloat power);