From 0de30ebdd9521d2cafaaa9d529eca0206103102a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 12:11:44 +0100 Subject: [PATCH] Add arm neon intrinsics --- tools/drawergen/precomp.h | 6 ++++++ tools/drawergen/ssa/ssa_float.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec16ub.cpp | 6 ++++++ tools/drawergen/ssa/ssa_vec4f.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec4i.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec8s.cpp | 6 ++++++ 6 files changed, 30 insertions(+) diff --git a/tools/drawergen/precomp.h b/tools/drawergen/precomp.h index f2bf67b70..bb4f818df 100644 --- a/tools/drawergen/precomp.h +++ b/tools/drawergen/precomp.h @@ -3,3 +3,9 @@ #include "llvm_include.h" #include "../../src/swrenderer/drawers/r_drawers.h" + +#ifdef __arm__ +#define ARM_TARGET +#else +#define X86_TARGET +#endif diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index 4f93379c8..19a6fcd0a 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -56,9 +56,13 @@ llvm::Type *SSAFloat::llvm_type() SSAFloat SSAFloat::rsqrt(SSAFloat f) { +#ifdef ARM_TARGET + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); +#else llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); +#endif } SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b) diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index 0fceab8e8..cdf1e465c 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -72,8 +72,14 @@ SSAVec16ub::SSAVec16ub(llvm::Value *v) SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) : v(0) { +#ifdef ARM_TARGET + llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s0.v, SSAScope::hint()); + llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s1.v, SSAScope::hint()); + return shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +#else llvm::Value *values[2] = { s0.v, s1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +#endif } llvm::Type *SSAVec16ub::llvm_type() diff --git a/tools/drawergen/ssa/ssa_vec4f.cpp b/tools/drawergen/ssa/ssa_vec4f.cpp index e58f167de..d59400ea9 100644 --- a/tools/drawergen/ssa/ssa_vec4f.cpp +++ b/tools/drawergen/ssa/ssa_vec4f.cpp @@ -81,9 +81,13 @@ SSAVec4f::SSAVec4f(llvm::Value *v) SSAVec4f::SSAVec4f(SSAVec4i i32) : v(0) { +#ifdef ARM_TARGET + v = SSAScope::builder().CreateSIToFP(i32.v, llvm_type(), SSAScope::hint()); +#else //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); //return builder->CreateSIToFP(i32.v, m128type); v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +#endif } llvm::Type *SSAVec4f::llvm_type() diff --git a/tools/drawergen/ssa/ssa_vec4i.cpp b/tools/drawergen/ssa/ssa_vec4i.cpp index 0e42124e9..4b0d7766f 100644 --- a/tools/drawergen/ssa/ssa_vec4i.cpp +++ b/tools/drawergen/ssa/ssa_vec4i.cpp @@ -93,7 +93,11 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) SSAVec4i::SSAVec4i(SSAVec4f f32) : v(0) { +#ifdef ARM_TARGET + v = SSAScope::builder().CreateFPToSI(f32.v, llvm_type(), SSAScope::hint()); +#else v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +#endif } */ diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 552b8d69b..f78c1dd25 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -62,8 +62,14 @@ SSAVec8s::SSAVec8s(llvm::Value *v) SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) : v(0) { +#ifdef ARM_TARGET + llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i0.v, SSAScope::hint()); + llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i1.v, SSAScope::hint()); + return shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7); +#else llvm::Value *values[2] = { i0.v, i1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +#endif } llvm::Type *SSAVec8s::llvm_type()