mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-29 07:22:07 +00:00
Switch to the lower precision rsqrtss instruction as it is faster, especially on older Intel architectures and on AMD
This commit is contained in:
parent
fa66ca214e
commit
9a529192b0
4 changed files with 17 additions and 2 deletions
|
@ -256,7 +256,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
|
||||||
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||||
SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z
|
SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z
|
||||||
SSAFloat Lx = light_x - viewpos_x;
|
SSAFloat Lx = light_x - viewpos_x;
|
||||||
SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx);
|
SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx);
|
||||||
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
||||||
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
||||||
|
|
||||||
|
|
|
@ -244,7 +244,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
|
||||||
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
// attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||||
SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y
|
SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y
|
||||||
SSAFloat Lz = light_z - z;
|
SSAFloat Lz = light_z - z;
|
||||||
SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz);
|
SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz);
|
||||||
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true);
|
||||||
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
SSAVec4i contribution = (light_color * fg * attenuation) >> 16;
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "ssa_int.h"
|
#include "ssa_int.h"
|
||||||
#include "ssa_scope.h"
|
#include "ssa_scope.h"
|
||||||
#include "ssa_bool.h"
|
#include "ssa_bool.h"
|
||||||
|
#include "ssa_vec4f.h"
|
||||||
|
|
||||||
SSAFloat::SSAFloat()
|
SSAFloat::SSAFloat()
|
||||||
: v(0)
|
: v(0)
|
||||||
|
@ -60,6 +61,18 @@ SSAFloat SSAFloat::sqrt(SSAFloat f)
|
||||||
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
|
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SSAFloat SSAFloat::fastsqrt(SSAFloat f)
|
||||||
|
{
|
||||||
|
return f * rsqrt(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
SSAFloat SSAFloat::rsqrt(SSAFloat f)
|
||||||
|
{
|
||||||
|
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
|
||||||
|
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
|
||||||
|
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint()));
|
||||||
|
}
|
||||||
|
|
||||||
SSAFloat SSAFloat::sin(SSAFloat val)
|
SSAFloat SSAFloat::sin(SSAFloat val)
|
||||||
{
|
{
|
||||||
std::vector<llvm::Type *> params;
|
std::vector<llvm::Type *> params;
|
||||||
|
|
|
@ -37,6 +37,8 @@ public:
|
||||||
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
|
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
|
||||||
static llvm::Type *llvm_type();
|
static llvm::Type *llvm_type();
|
||||||
static SSAFloat sqrt(SSAFloat f);
|
static SSAFloat sqrt(SSAFloat f);
|
||||||
|
static SSAFloat fastsqrt(SSAFloat f);
|
||||||
|
static SSAFloat rsqrt(SSAFloat f);
|
||||||
static SSAFloat sin(SSAFloat val);
|
static SSAFloat sin(SSAFloat val);
|
||||||
static SSAFloat cos(SSAFloat val);
|
static SSAFloat cos(SSAFloat val);
|
||||||
static SSAFloat pow(SSAFloat val, SSAFloat power);
|
static SSAFloat pow(SSAFloat val, SSAFloat power);
|
||||||
|
|
Loading…
Reference in a new issue