diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index e753d61971..dcb039a404 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -89,7 +89,7 @@ int main(int argc, char **argv) std::cout << "Target triple is " << triple << std::endl; #ifdef __arm__ - std::string cpuName = "armv8"; + std::string cpuName = llvm::sys::getHostCPUName(); // "armv8"; #else std::string cpuName = "pentium4"; #endif diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index 19a6fcd0a3..6c597dc1c4 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -57,7 +57,8 @@ llvm::Type *SSAFloat::llvm_type() SSAFloat SSAFloat::rsqrt(SSAFloat f) { #ifdef ARM_TARGET - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); + //return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); + return SSAFloat(1.0f) / (f * SSAFloat(0.01f)); #else llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index 10a0a9d187..e64fe8e851 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -73,9 +73,15 @@ SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) : v(0) { #ifdef ARM_TARGET + /* llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint()); llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint()); v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; + */ + // To do: add some clamping here + llvm::Value *int8x8_i0 = SSAScope::builder().CreateTrunc(s0.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); + llvm::Value *int8x8_i1 = SSAScope::builder().CreateTrunc(s1.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); + v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; #else llvm::Value *values[2] = { s0.v, s1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 80d8817d1e..795194ca5b 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -63,9 +63,15 @@ SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) : v(0) { #ifdef ARM_TARGET + /* llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint()); llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint()); v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; + */ + // To do: add some clamping here + llvm::Value *int16x4_i0 = SSAScope::builder().CreateTrunc(i0.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); + llvm::Value *int16x4_i1 = SSAScope::builder().CreateTrunc(i1.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); + v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; #else llvm::Value *values[2] = { i0.v, i1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());