mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-14 16:40:56 +00:00
Drop using intrinsics on ARM until after they actually got gdb to run on it.
It is all a waste of time anyhow, because when my computer is literally a factor 100 times faster at building, what frame rate can one really expect? What should it run at? 320x200?
This commit is contained in:
parent
3487be2c40
commit
2f64bfa5af
4 changed files with 15 additions and 2 deletions
|
@ -89,7 +89,7 @@ int main(int argc, char **argv)
|
||||||
std::cout << "Target triple is " << triple << std::endl;
|
std::cout << "Target triple is " << triple << std::endl;
|
||||||
|
|
||||||
#ifdef __arm__
|
#ifdef __arm__
|
||||||
std::string cpuName = "armv8";
|
std::string cpuName = llvm::sys::getHostCPUName(); // "armv8";
|
||||||
#else
|
#else
|
||||||
std::string cpuName = "pentium4";
|
std::string cpuName = "pentium4";
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -57,7 +57,8 @@ llvm::Type *SSAFloat::llvm_type()
|
||||||
SSAFloat SSAFloat::rsqrt(SSAFloat f)
|
SSAFloat SSAFloat::rsqrt(SSAFloat f)
|
||||||
{
|
{
|
||||||
#ifdef ARM_TARGET
|
#ifdef ARM_TARGET
|
||||||
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint()));
|
//return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint()));
|
||||||
|
return SSAFloat(1.0f) / (f * SSAFloat(0.01f));
|
||||||
#else
|
#else
|
||||||
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
|
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
|
||||||
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
|
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
|
||||||
|
|
|
@ -73,9 +73,15 @@ SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1)
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
#ifdef ARM_TARGET
|
#ifdef ARM_TARGET
|
||||||
|
/*
|
||||||
llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint());
|
llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint());
|
||||||
llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint());
|
llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint());
|
||||||
v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v;
|
v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v;
|
||||||
|
*/
|
||||||
|
// To do: add some clamping here
|
||||||
|
llvm::Value *int8x8_i0 = SSAScope::builder().CreateTrunc(s0.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8));
|
||||||
|
llvm::Value *int8x8_i1 = SSAScope::builder().CreateTrunc(s1.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8));
|
||||||
|
v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v;
|
||||||
#else
|
#else
|
||||||
llvm::Value *values[2] = { s0.v, s1.v };
|
llvm::Value *values[2] = { s0.v, s1.v };
|
||||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint());
|
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint());
|
||||||
|
|
|
@ -63,9 +63,15 @@ SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1)
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
#ifdef ARM_TARGET
|
#ifdef ARM_TARGET
|
||||||
|
/*
|
||||||
llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint());
|
llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint());
|
||||||
llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint());
|
llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint());
|
||||||
v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v;
|
v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v;
|
||||||
|
*/
|
||||||
|
// To do: add some clamping here
|
||||||
|
llvm::Value *int16x4_i0 = SSAScope::builder().CreateTrunc(i0.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4));
|
||||||
|
llvm::Value *int16x4_i1 = SSAScope::builder().CreateTrunc(i1.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4));
|
||||||
|
v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v;
|
||||||
#else
|
#else
|
||||||
llvm::Value *values[2] = { i0.v, i1.v };
|
llvm::Value *values[2] = { i0.v, i1.v };
|
||||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());
|
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());
|
||||||
|
|
Loading…
Reference in a new issue