mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 15:21:51 +00:00
Add arm neon intrinsics
This commit is contained in:
parent
b91e88a9a6
commit
0de30ebdd9
6 changed files with 30 additions and 0 deletions
|
@ -3,3 +3,9 @@
|
|||
|
||||
#include "llvm_include.h"
|
||||
#include "../../src/swrenderer/drawers/r_drawers.h"
|
||||
|
||||
#ifdef __arm__
|
||||
#define ARM_TARGET
|
||||
#else
|
||||
#define X86_TARGET
|
||||
#endif
|
||||
|
|
|
@ -56,9 +56,13 @@ llvm::Type *SSAFloat::llvm_type()
|
|||
|
||||
SSAFloat SSAFloat::rsqrt(SSAFloat f)
|
||||
{
|
||||
#ifdef ARM_TARGET
|
||||
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint()));
|
||||
#else
|
||||
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
|
||||
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
|
||||
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint()));
|
||||
#endif
|
||||
}
|
||||
|
||||
SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b)
|
||||
|
|
|
@ -72,8 +72,14 @@ SSAVec16ub::SSAVec16ub(llvm::Value *v)
|
|||
SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1)
|
||||
: v(0)
|
||||
{
|
||||
#ifdef ARM_TARGET
|
||||
llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s0.v, SSAScope::hint());
|
||||
llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s1.v, SSAScope::hint());
|
||||
return shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
#else
|
||||
llvm::Value *values[2] = { s0.v, s1.v };
|
||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint());
|
||||
#endif
|
||||
}
|
||||
|
||||
llvm::Type *SSAVec16ub::llvm_type()
|
||||
|
|
|
@ -81,9 +81,13 @@ SSAVec4f::SSAVec4f(llvm::Value *v)
|
|||
SSAVec4f::SSAVec4f(SSAVec4i i32)
|
||||
: v(0)
|
||||
{
|
||||
#ifdef ARM_TARGET
|
||||
v = SSAScope::builder().CreateSIToFP(i32.v, llvm_type(), SSAScope::hint());
|
||||
#else
|
||||
//llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4);
|
||||
//return builder->CreateSIToFP(i32.v, m128type);
|
||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint());
|
||||
#endif
|
||||
}
|
||||
|
||||
llvm::Type *SSAVec4f::llvm_type()
|
||||
|
|
|
@ -93,7 +93,11 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
|
|||
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
||||
: v(0)
|
||||
{
|
||||
#ifdef ARM_TARGET
|
||||
v = SSAScope::builder().CreateFPToSI(f32.v, llvm_type(), SSAScope::hint());
|
||||
#else
|
||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint());
|
||||
#endif
|
||||
}
|
||||
*/
|
||||
|
||||
|
|
|
@ -62,8 +62,14 @@ SSAVec8s::SSAVec8s(llvm::Value *v)
|
|||
SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1)
|
||||
: v(0)
|
||||
{
|
||||
#ifdef ARM_TARGET
|
||||
llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i0.v, SSAScope::hint());
|
||||
llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i1.v, SSAScope::hint());
|
||||
return shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
#else
|
||||
llvm::Value *values[2] = { i0.v, i1.v };
|
||||
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());
|
||||
#endif
|
||||
}
|
||||
|
||||
llvm::Type *SSAVec8s::llvm_type()
|
||||
|
|
Loading…
Reference in a new issue