raze/source/common/scripting/jit/jit_math.cpp
Christoph Oelckers 8806fb930b - Backend update from GZDoom
* Vector 4 in zscript
* garbage collector fixes
2022-11-12 10:45:39 +01:00

1683 lines
37 KiB
C++

#include "jitintern.h"
#include "basics.h"
/////////////////////////////////////////////////////////////////////////////
// String instructions.
static void ConcatString(FString* to, FString* first, FString* second)
{
*to = *first + *second;
}
void JitCompiler::EmitCONCAT()
{
auto rc = CheckRegS(C, A);
auto call = CreateCall<void, FString*, FString*, FString*>(ConcatString);
call->setArg(0, regS[A]);
call->setArg(1, regS[B]);
call->setArg(2, rc);
}
static int StringLength(FString* str)
{
return static_cast<int>(str->Len());
}
void JitCompiler::EmitLENS()
{
auto result = newResultInt32();
auto call = CreateCall<int, FString*>(StringLength);
call->setRet(0, result);
call->setArg(0, regS[B]);
cc.mov(regD[A], result);
}
static int StringCompareNoCase(FString* first, FString* second)
{
return first->CompareNoCase(*second);
}
static int StringCompare(FString* first, FString* second)
{
return first->Compare(*second);
}
void JitCompiler::EmitCMPS()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto call = CreateCall<int, FString*, FString*>(static_cast<bool>(A & CMP_APPROX) ? StringCompareNoCase : StringCompare);
auto result = newResultInt32();
call->setRet(0, result);
if (static_cast<bool>(A & CMP_BK)) call->setArg(0, asmjit::imm_ptr(&konsts[B]));
else call->setArg(0, regS[B]);
if (static_cast<bool>(A & CMP_CK)) call->setArg(1, asmjit::imm_ptr(&konsts[C]));
else call->setArg(1, regS[C]);
int method = A & CMP_METHOD_MASK;
if (method == CMP_EQ) {
cc.test(result, result);
if (check) cc.jz(fail);
else cc.jnz(fail);
}
else if (method == CMP_LT) {
cc.cmp(result, 0);
if (check) cc.jl(fail);
else cc.jnl(fail);
}
else {
cc.cmp(result, 0);
if (check) cc.jle(fail);
else cc.jnle(fail);
}
});
}
/////////////////////////////////////////////////////////////////////////////
// Integer math.
void JitCompiler::EmitSLL_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.shl(regD[A], rc);
}
void JitCompiler::EmitSLL_RI()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.shl(regD[A], C);
}
void JitCompiler::EmitSLL_KR()
{
auto rc = CheckRegD(C, A);
cc.mov(regD[A], konstd[B]);
cc.shl(regD[A], rc);
}
void JitCompiler::EmitSRL_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.shr(regD[A], rc);
}
void JitCompiler::EmitSRL_RI()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.shr(regD[A], C);
}
void JitCompiler::EmitSRL_KR()
{
auto rc = CheckRegD(C, A);
cc.mov(regD[A], konstd[B]);
cc.shr(regD[A], rc);
}
void JitCompiler::EmitSRA_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.sar(regD[A], rc);
}
void JitCompiler::EmitSRA_RI()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.sar(regD[A], C);
}
void JitCompiler::EmitSRA_KR()
{
auto rc = CheckRegD(C, A);
cc.mov(regD[A], konstd[B]);
cc.sar(regD[A], rc);
}
void JitCompiler::EmitADD_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.add(regD[A], rc);
}
void JitCompiler::EmitADD_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.add(regD[A], konstd[C]);
}
void JitCompiler::EmitADDI()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.add(regD[A], Cs);
}
void JitCompiler::EmitSUB_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.sub(regD[A], rc);
}
void JitCompiler::EmitSUB_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.sub(regD[A], konstd[C]);
}
void JitCompiler::EmitSUB_KR()
{
auto rc = CheckRegD(C, A);
cc.mov(regD[A], konstd[B]);
cc.sub(regD[A], rc);
}
void JitCompiler::EmitMUL_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.imul(regD[A], rc);
}
void JitCompiler::EmitMUL_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.imul(regD[A], konstd[C]);
}
void JitCompiler::EmitDIV_RR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, regD[B]);
cc.cdq(tmp1, tmp0);
cc.idiv(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp0);
}
void JitCompiler::EmitDIV_RK()
{
if (konstd[C] != 0)
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto konstTmp = newTempIntPtr();
cc.mov(tmp0, regD[B]);
cc.cdq(tmp1, tmp0);
cc.mov(konstTmp, asmjit::imm_ptr(&konstd[C]));
cc.idiv(tmp1, tmp0, asmjit::x86::ptr(konstTmp));
cc.mov(regD[A], tmp0);
}
else
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
}
void JitCompiler::EmitDIV_KR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, konstd[B]);
cc.cdq(tmp1, tmp0);
cc.idiv(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp0);
}
void JitCompiler::EmitDIVU_RR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, regD[B]);
cc.mov(tmp1, 0);
cc.div(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp0);
}
void JitCompiler::EmitDIVU_RK()
{
if (konstd[C] != 0)
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto konstTmp = newTempIntPtr();
cc.mov(tmp0, regD[B]);
cc.mov(tmp1, 0);
cc.mov(konstTmp, asmjit::imm_ptr(&konstd[C]));
cc.div(tmp1, tmp0, asmjit::x86::ptr(konstTmp));
cc.mov(regD[A], tmp0);
}
else
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
}
void JitCompiler::EmitDIVU_KR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, konstd[B]);
cc.mov(tmp1, 0);
cc.div(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp0);
}
void JitCompiler::EmitMOD_RR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, regD[B]);
cc.cdq(tmp1, tmp0);
cc.idiv(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp1);
}
void JitCompiler::EmitMOD_RK()
{
if (konstd[C] != 0)
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto konstTmp = newTempIntPtr();
cc.mov(tmp0, regD[B]);
cc.cdq(tmp1, tmp0);
cc.mov(konstTmp, asmjit::imm_ptr(&konstd[C]));
cc.idiv(tmp1, tmp0, asmjit::x86::ptr(konstTmp));
cc.mov(regD[A], tmp1);
}
else
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
}
void JitCompiler::EmitMOD_KR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, konstd[B]);
cc.cdq(tmp1, tmp0);
cc.idiv(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp1);
}
void JitCompiler::EmitMODU_RR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, regD[B]);
cc.mov(tmp1, 0);
cc.div(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp1);
}
void JitCompiler::EmitMODU_RK()
{
if (konstd[C] != 0)
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto konstTmp = newTempIntPtr();
cc.mov(tmp0, regD[B]);
cc.mov(tmp1, 0);
cc.mov(konstTmp, asmjit::imm_ptr(&konstd[C]));
cc.div(tmp1, tmp0, asmjit::x86::ptr(konstTmp));
cc.mov(regD[A], tmp1);
}
else
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
}
void JitCompiler::EmitMODU_KR()
{
auto tmp0 = newTempInt32();
auto tmp1 = newTempInt32();
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
cc.test(regD[C], regD[C]);
cc.je(label);
cc.mov(tmp0, konstd[B]);
cc.mov(tmp1, 0);
cc.div(tmp1, tmp0, regD[C]);
cc.mov(regD[A], tmp1);
}
void JitCompiler::EmitAND_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.and_(regD[A], rc);
}
void JitCompiler::EmitAND_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.and_(regD[A], konstd[C]);
}
void JitCompiler::EmitOR_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.or_(regD[A], rc);
}
void JitCompiler::EmitOR_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.or_(regD[A], konstd[C]);
}
void JitCompiler::EmitXOR_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.xor_(regD[A], rc);
}
void JitCompiler::EmitXOR_RK()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.xor_(regD[A], konstd[C]);
}
void JitCompiler::EmitMIN_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.cmp(rc, regD[A]);
cc.cmovl(regD[A], rc);
}
void JitCompiler::EmitMIN_RK()
{
auto rc = newTempInt32();
if (A != B)
cc.mov(regD[A], regD[B]);
cc.mov(rc, asmjit::imm(konstd[C]));
cc.cmp(rc, regD[A]);
cc.cmovl(regD[A], rc);
}
void JitCompiler::EmitMAX_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.cmp(rc, regD[A]);
cc.cmovg(regD[A], rc);
}
void JitCompiler::EmitMAX_RK()
{
auto rc = newTempInt32();
if (A != B)
cc.mov(regD[A], regD[B]);
cc.mov(rc, asmjit::imm(konstd[C]));
cc.cmp(rc, regD[A]);
cc.cmovg(regD[A], rc);
}
void JitCompiler::EmitMINU_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.cmp(rc, regD[A]);
cc.cmovb(regD[A], rc);
}
void JitCompiler::EmitMINU_RK()
{
auto rc = newTempInt32();
if (A != B)
cc.mov(regD[A], regD[B]);
cc.mov(rc, asmjit::imm(konstd[C]));
cc.cmp(rc, regD[A]);
cc.cmovb(regD[A], rc);
}
void JitCompiler::EmitMAXU_RR()
{
auto rc = CheckRegD(C, A);
if (A != B)
cc.mov(regD[A], regD[B]);
cc.cmp(rc, regD[A]);
cc.cmova(regD[A], rc);
}
void JitCompiler::EmitMAXU_RK()
{
auto rc = newTempInt32();
if (A != B)
cc.mov(regD[A], regD[B]);
cc.mov(rc, asmjit::imm(konstd[C]));
cc.cmp(rc, regD[A]);
cc.cmova(regD[A], rc);
}
void JitCompiler::EmitABS()
{
auto srcB = CheckRegD(B, A);
auto tmp = newTempInt32();
cc.mov(tmp, regD[B]);
cc.sar(tmp, 31);
cc.mov(regD[A], tmp);
cc.xor_(regD[A], srcB);
cc.sub(regD[A], tmp);
}
void JitCompiler::EmitNEG()
{
auto srcB = CheckRegD(B, A);
cc.xor_(regD[A], regD[A]);
cc.sub(regD[A], srcB);
}
void JitCompiler::EmitNOT()
{
if (A != B)
cc.mov(regD[A], regD[B]);
cc.not_(regD[A]);
}
void JitCompiler::EmitEQ_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], regD[C]);
if (check) cc.je(fail);
else cc.jne(fail);
});
}
void JitCompiler::EmitEQ_K()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], konstd[C]);
if (check) cc.je(fail);
else cc.jne(fail);
});
}
void JitCompiler::EmitLT_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], regD[C]);
if (check) cc.jl(fail);
else cc.jnl(fail);
});
}
void JitCompiler::EmitLT_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], konstd[C]);
if (check) cc.jl(fail);
else cc.jnl(fail);
});
}
void JitCompiler::EmitLT_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstd[B]));
cc.cmp(asmjit::x86::ptr(tmp), regD[C]);
if (check) cc.jl(fail);
else cc.jnl(fail);
});
}
void JitCompiler::EmitLE_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], regD[C]);
if (check) cc.jle(fail);
else cc.jnle(fail);
});
}
void JitCompiler::EmitLE_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], konstd[C]);
if (check) cc.jle(fail);
else cc.jnle(fail);
});
}
void JitCompiler::EmitLE_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstd[B]));
cc.cmp(asmjit::x86::ptr(tmp), regD[C]);
if (check) cc.jle(fail);
else cc.jnle(fail);
});
}
void JitCompiler::EmitLTU_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], regD[C]);
if (check) cc.jb(fail);
else cc.jnb(fail);
});
}
void JitCompiler::EmitLTU_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], konstd[C]);
if (check) cc.jb(fail);
else cc.jnb(fail);
});
}
void JitCompiler::EmitLTU_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstd[B]));
cc.cmp(asmjit::x86::ptr(tmp), regD[C]);
if (check) cc.jb(fail);
else cc.jnb(fail);
});
}
void JitCompiler::EmitLEU_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], regD[C]);
if (check) cc.jbe(fail);
else cc.jnbe(fail);
});
}
void JitCompiler::EmitLEU_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regD[B], konstd[C]);
if (check) cc.jbe(fail);
else cc.jnbe(fail);
});
}
void JitCompiler::EmitLEU_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstd[B]));
cc.cmp(asmjit::x86::ptr(tmp), regD[C]);
if (check) cc.jbe(fail);
else cc.jnbe(fail);
});
}
/////////////////////////////////////////////////////////////////////////////
// Double-precision floating point math.
void JitCompiler::EmitADDF_RR()
{
auto rc = CheckRegF(C, A);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc);
}
void JitCompiler::EmitADDF_RK()
{
auto tmp = newTempIntPtr();
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.addsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitSUBF_RR()
{
auto rc = CheckRegF(C, A);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc);
}
void JitCompiler::EmitSUBF_RK()
{
auto tmp = newTempIntPtr();
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.subsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitSUBF_KR()
{
auto rc = CheckRegF(C, A);
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[B]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.subsd(regF[A], rc);
}
void JitCompiler::EmitMULF_RR()
{
auto rc = CheckRegF(C, A);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc);
}
void JitCompiler::EmitMULF_RK()
{
auto tmp = newTempIntPtr();
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVF_RR()
{
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
auto zero = newTempXmmSd();
cc.xorpd(zero, zero);
cc.ucomisd(regF[C], zero);
cc.je(label);
auto rc = CheckRegF(C, A);
cc.movsd(regF[A], regF[B]);
cc.divsd(regF[A], rc);
}
void JitCompiler::EmitDIVF_RK()
{
if (konstf[C] == 0.)
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
else
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
}
void JitCompiler::EmitDIVF_KR()
{
auto rc = CheckRegF(C, A);
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[B]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A], rc);
}
static double DoubleModF(double a, double b)
{
return a - floor(a / b) * b;
}
void JitCompiler::EmitMODF_RR()
{
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
auto zero = newTempXmmSd();
cc.xorpd(zero, zero);
cc.ucomisd(regF[C], zero);
cc.je(label);
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(DoubleModF);
call->setRet(0, result);
call->setArg(0, regF[B]);
call->setArg(1, regF[C]);
cc.movsd(regF[A], result);
}
void JitCompiler::EmitMODF_RK()
{
if (konstf[C] == 0.)
{
EmitThrowException(X_DIVISION_BY_ZERO);
}
else
{
auto tmpPtr = newTempIntPtr();
cc.mov(tmpPtr, asmjit::imm_ptr(&konstf[C]));
auto tmp = newTempXmmSd();
cc.movsd(tmp, asmjit::x86::qword_ptr(tmpPtr));
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(DoubleModF);
call->setRet(0, result);
call->setArg(0, regF[B]);
call->setArg(1, tmp);
cc.movsd(regF[A], result);
}
}
void JitCompiler::EmitMODF_KR()
{
using namespace asmjit;
auto label = EmitThrowExceptionLabel(X_DIVISION_BY_ZERO);
auto zero = newTempXmmSd();
cc.xorpd(zero, zero);
cc.ucomisd(regF[C], zero);
cc.je(label);
auto tmp = newTempXmmSd();
cc.movsd(tmp, x86::ptr(ToMemAddress(&konstf[B])));
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(DoubleModF);
call->setRet(0, result);
call->setArg(0, tmp);
call->setArg(1, regF[C]);
cc.movsd(regF[A], result);
}
void JitCompiler::EmitPOWF_RR()
{
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(g_pow);
call->setRet(0, result);
call->setArg(0, regF[B]);
call->setArg(1, regF[C]);
cc.movsd(regF[A], result);
}
void JitCompiler::EmitPOWF_RK()
{
auto tmp = newTempIntPtr();
auto tmp2 = newTempXmmSd();
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp));
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(g_pow);
call->setRet(0, result);
call->setArg(0, regF[B]);
call->setArg(1, tmp2);
cc.movsd(regF[A], result);
}
void JitCompiler::EmitPOWF_KR()
{
auto tmp = newTempIntPtr();
auto tmp2 = newTempXmmSd();
cc.mov(tmp, asmjit::imm_ptr(&konstf[B]));
cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp));
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(g_pow);
call->setRet(0, result);
call->setArg(0, tmp2);
call->setArg(1, regF[C]);
cc.movsd(regF[A], result);
}
void JitCompiler::EmitMINF_RR()
{
auto rc = CheckRegF(C, A);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.minpd(regF[A], rc); // minsd requires SSE 4.1
}
void JitCompiler::EmitMINF_RK()
{
auto rb = CheckRegF(B, A);
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.minpd(regF[A], rb); // minsd requires SSE 4.1
}
void JitCompiler::EmitMAXF_RR()
{
auto rc = CheckRegF(C, A);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.maxpd(regF[A], rc); // maxsd requires SSE 4.1
}
void JitCompiler::EmitMAXF_RK()
{
auto rb = CheckRegF(B, A);
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.maxpd(regF[A], rb); // maxsd requires SSE 4.1
}
void JitCompiler::EmitATAN2()
{
auto result = newResultXmmSd();
auto call = CreateCall<double, double, double>(g_atan2);
call->setRet(0, result);
call->setArg(0, regF[B]);
call->setArg(1, regF[C]);
cc.movsd(regF[A], result);
static const double constant = 180 / M_PI;
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&constant));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitFLOP()
{
if (C == FLOP_NEG)
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
if (A != B)
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
}
else
{
auto v = newTempXmmSd();
cc.movsd(v, regF[B]);
if (C == FLOP_TAN_DEG)
{
static const double constant = M_PI / 180;
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&constant));
cc.mulsd(v, asmjit::x86::qword_ptr(tmp));
}
typedef double(*FuncPtr)(double);
FuncPtr func = nullptr;
switch (C)
{
default: I_Error("Unknown OP_FLOP subfunction"); break;
case FLOP_ABS: func = fabs; break;
case FLOP_EXP: func = g_exp; break;
case FLOP_LOG: func = g_log; break;
case FLOP_LOG10: func = g_log10; break;
case FLOP_SQRT: func = g_sqrt; break;
case FLOP_CEIL: func = ceil; break;
case FLOP_FLOOR: func = floor; break;
case FLOP_ACOS: func = g_acos; break;
case FLOP_ASIN: func = g_asin; break;
case FLOP_ATAN: func = g_atan; break;
case FLOP_COS: func = g_cos; break;
case FLOP_SIN: func = g_sin; break;
case FLOP_TAN: func = g_tan; break;
case FLOP_ACOS_DEG: func = g_acos; break;
case FLOP_ASIN_DEG: func = g_asin; break;
case FLOP_ATAN_DEG: func = g_atan; break;
case FLOP_COS_DEG: func = g_cosdeg; break;
case FLOP_SIN_DEG: func = g_sindeg; break;
case FLOP_TAN_DEG: func = g_tan; break;
case FLOP_COSH: func = g_cosh; break;
case FLOP_SINH: func = g_sinh; break;
case FLOP_TANH: func = g_tanh; break;
case FLOP_ROUND: func = round; break;
}
auto result = newResultXmmSd();
auto call = CreateCall<double, double>(func);
call->setRet(0, result);
call->setArg(0, v);
cc.movsd(regF[A], result);
if (C == FLOP_ACOS_DEG || C == FLOP_ASIN_DEG || C == FLOP_ATAN_DEG)
{
static const double constant = 180 / M_PI;
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&constant));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
}
}
}
void JitCompiler::EmitEQF_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
bool approx = static_cast<bool>(A & CMP_APPROX);
if (!approx)
{
cc.ucomisd(regF[B], regF[C]);
if (check) {
cc.jp(success);
cc.je(fail);
}
else {
cc.jp(fail);
cc.jne(fail);
}
}
else
{
auto tmp = newTempXmmSd();
const int64_t absMaskInt = 0x7FFFFFFFFFFFFFFF;
auto absMask = cc.newDoubleConst(asmjit::kConstScopeLocal, reinterpret_cast<const double&>(absMaskInt));
auto absMaskXmm = newTempXmmPd();
auto epsilon = cc.newDoubleConst(asmjit::kConstScopeLocal, VM_EPSILON);
auto epsilonXmm = newTempXmmSd();
cc.movsd(tmp, regF[B]);
cc.subsd(tmp, regF[C]);
cc.movsd(absMaskXmm, absMask);
cc.andpd(tmp, absMaskXmm);
cc.movsd(epsilonXmm, epsilon);
cc.ucomisd(epsilonXmm, tmp);
if (check) cc.ja(fail);
else cc.jna(fail);
}
});
}
void JitCompiler::EmitEQF_K()
{
using namespace asmjit;
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
bool approx = static_cast<bool>(A & CMP_APPROX);
if (!approx) {
auto konstTmp = newTempIntPtr();
cc.mov(konstTmp, asmjit::imm_ptr(&konstf[C]));
cc.ucomisd(regF[B], x86::qword_ptr(konstTmp));
if (check) {
cc.jp(success);
cc.je(fail);
}
else {
cc.jp(fail);
cc.jne(fail);
}
}
else {
auto konstTmp = newTempIntPtr();
auto subTmp = newTempXmmSd();
const int64_t absMaskInt = 0x7FFFFFFFFFFFFFFF;
auto absMask = cc.newDoubleConst(kConstScopeLocal, reinterpret_cast<const double&>(absMaskInt));
auto absMaskXmm = newTempXmmPd();
auto epsilon = cc.newDoubleConst(kConstScopeLocal, VM_EPSILON);
auto epsilonXmm = newTempXmmSd();
cc.mov(konstTmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(subTmp, regF[B]);
cc.subsd(subTmp, x86::qword_ptr(konstTmp));
cc.movsd(absMaskXmm, absMask);
cc.andpd(subTmp, absMaskXmm);
cc.movsd(epsilonXmm, epsilon);
cc.ucomisd(epsilonXmm, subTmp);
if (check) cc.ja(fail);
else cc.jna(fail);
}
});
}
void JitCompiler::EmitLTF_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LTF_RR.\n");
cc.ucomisd(regF[C], regF[B]);
if (check) cc.ja(fail);
else cc.jna(fail);
});
}
void JitCompiler::EmitLTF_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LTF_RK.\n");
auto constTmp = newTempIntPtr();
auto xmmTmp = newTempXmmSd();
cc.mov(constTmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(xmmTmp, asmjit::x86::qword_ptr(constTmp));
cc.ucomisd(xmmTmp, regF[B]);
if (check) cc.ja(fail);
else cc.jna(fail);
});
}
void JitCompiler::EmitLTF_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LTF_KR.\n");
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[B]));
cc.ucomisd(regF[C], asmjit::x86::qword_ptr(tmp));
if (check) cc.ja(fail);
else cc.jna(fail);
});
}
void JitCompiler::EmitLEF_RR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LEF_RR.\n");
cc.ucomisd(regF[C], regF[B]);
if (check) cc.jae(fail);
else cc.jnae(fail);
});
}
void JitCompiler::EmitLEF_RK()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LEF_RK.\n");
auto constTmp = newTempIntPtr();
auto xmmTmp = newTempXmmSd();
cc.mov(constTmp, asmjit::imm_ptr(&konstf[C]));
cc.movsd(xmmTmp, asmjit::x86::qword_ptr(constTmp));
cc.ucomisd(xmmTmp, regF[B]);
if (check) cc.jae(fail);
else cc.jnae(fail);
});
}
void JitCompiler::EmitLEF_KR()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_Error("CMP_APPROX not implemented for LEF_KR.\n");
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konstf[B]));
cc.ucomisd(regF[C], asmjit::x86::qword_ptr(tmp));
if (check) cc.jae(fail);
else cc.jnae(fail);
});
}
/////////////////////////////////////////////////////////////////////////////
// Vector math. (2D)
void JitCompiler::EmitNEGV2()
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.xorpd(regF[A + 1], maskXmm);
}
void JitCompiler::EmitADDV2_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.addsd(regF[A + 1], rc1);
}
void JitCompiler::EmitSUBV2_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.subsd(regF[A + 1], rc1);
}
void JitCompiler::EmitDOTV2_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc0);
cc.movsd(tmp, regF[B + 1]);
cc.mulsd(tmp, rc1);
cc.addsd(regF[A], tmp);
}
void JitCompiler::EmitMULVF2_RR()
{
auto rc = CheckRegF(C, A, A + 1);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.mulsd(regF[A], rc);
cc.mulsd(regF[A + 1], rc);
}
void JitCompiler::EmitMULVF2_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVVF2_RR()
{
auto rc = CheckRegF(C, A, A + 1);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.divsd(regF[A], rc);
cc.divsd(regF[A + 1], rc);
}
void JitCompiler::EmitDIVVF2_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitLENV2()
{
auto rb0 = CheckRegF(B, A);
auto rb1 = CheckRegF(B + 1, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rb0);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rb1);
cc.addsd(regF[A], tmp);
CallSqrt(regF[A], regF[A]);
}
void JitCompiler::EmitEQV2_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
EmitVectorComparison<2> (check, fail, success);
});
}
void JitCompiler::EmitEQV2_K()
{
I_Error("EQV2_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Vector math. (3D)
void JitCompiler::EmitNEGV3()
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.xorpd(regF[A + 1], maskXmm);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.xorpd(regF[A + 2], maskXmm);
}
void JitCompiler::EmitADDV3_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.addsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.addsd(regF[A + 2], rc2);
}
void JitCompiler::EmitSUBV3_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.subsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.subsd(regF[A + 2], rc2);
}
void JitCompiler::EmitDOTV3_RR()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A);
auto rc2 = CheckRegF(C + 2, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc0);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rc1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rc2);
cc.addsd(regF[A], tmp);
}
void JitCompiler::EmitCROSSV_RR()
{
auto tmp = newTempXmmSd();
auto a0 = CheckRegF(B, A);
auto a1 = CheckRegF(B + 1, A + 1);
auto a2 = CheckRegF(B + 2, A + 2);
auto b0 = CheckRegF(C, A);
auto b1 = CheckRegF(C + 1, A + 1);
auto b2 = CheckRegF(C + 2, A + 2);
// r0 = a1b2 - a2b1
cc.movsd(regF[A], a1);
cc.mulsd(regF[A], b2);
cc.movsd(tmp, a2);
cc.mulsd(tmp, b1);
cc.subsd(regF[A], tmp);
// r1 = a2b0 - a0b2
cc.movsd(regF[A + 1], a2);
cc.mulsd(regF[A + 1], b0);
cc.movsd(tmp, a0);
cc.mulsd(tmp, b2);
cc.subsd(regF[A + 1], tmp);
// r2 = a0b1 - a1b0
cc.movsd(regF[A + 2], a0);
cc.mulsd(regF[A + 2], b1);
cc.movsd(tmp, a1);
cc.mulsd(tmp, b0);
cc.subsd(regF[A + 2], tmp);
}
void JitCompiler::EmitMULVF3_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.mulsd(regF[A], rc);
cc.mulsd(regF[A + 1], rc);
cc.mulsd(regF[A + 2], rc);
}
void JitCompiler::EmitMULVF3_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVVF3_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.divsd(regF[A], rc);
cc.divsd(regF[A + 1], rc);
cc.divsd(regF[A + 2], rc);
}
void JitCompiler::EmitDIVVF3_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitLENV3()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], regF[B]);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rb1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rb2);
cc.addsd(regF[A], tmp);
CallSqrt(regF[A], regF[A]);
}
void JitCompiler::EmitEQV3_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
EmitVectorComparison<3> (check, fail, success);
});
}
void JitCompiler::EmitEQV3_K()
{
I_Error("EQV3_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Vector math. (4D/Quaternion)
void JitCompiler::EmitNEGV4()
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.xorpd(regF[A + 1], maskXmm);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.xorpd(regF[A + 2], maskXmm);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.xorpd(regF[A + 3], maskXmm);
}
void JitCompiler::EmitADDV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.addsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.addsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.addsd(regF[A + 3], rc3);
}
void JitCompiler::EmitSUBV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.subsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.subsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.subsd(regF[A + 3], rc3);
}
void JitCompiler::EmitDOTV4_RR()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A);
auto rc2 = CheckRegF(C + 2, A);
auto rc3 = CheckRegF(C + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc0);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rc1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rc2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rc3);
cc.addsd(regF[A], tmp);
}
void JitCompiler::EmitMULVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mulsd(regF[A], rc);
cc.mulsd(regF[A + 1], rc);
cc.mulsd(regF[A + 2], rc);
cc.mulsd(regF[A + 3], rc);
}
void JitCompiler::EmitMULVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.divsd(regF[A], rc);
cc.divsd(regF[A + 1], rc);
cc.divsd(regF[A + 2], rc);
cc.divsd(regF[A + 3], rc);
}
void JitCompiler::EmitDIVVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitLENV4()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], regF[B]);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rb1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rb2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rb3);
cc.addsd(regF[A], tmp);
CallSqrt(regF[A], regF[A]);
}
void JitCompiler::EmitEQV4_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
EmitVectorComparison<4> (check, fail, success);
});
}
void JitCompiler::EmitEQV4_K()
{
I_Error("EQV4_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Pointer math.
void JitCompiler::EmitADDA_RR()
{
auto tmp = newTempIntPtr();
auto label = cc.newLabel();
cc.mov(tmp, regA[B]);
// Check if zero, the first operand is zero, if it is, don't add.
cc.cmp(tmp, 0);
cc.je(label);
auto tmpptr = newTempIntPtr();
cc.mov(tmpptr, regD[C]);
cc.add(tmp, tmpptr);
cc.bind(label);
cc.mov(regA[A], tmp);
}
void JitCompiler::EmitADDA_RK()
{
auto tmp = newTempIntPtr();
auto label = cc.newLabel();
cc.mov(tmp, regA[B]);
// Check if zero, the first operand is zero, if it is, don't add.
cc.cmp(tmp, 0);
cc.je(label);
cc.add(tmp, konstd[C]);
cc.bind(label);
cc.mov(regA[A], tmp);
}
void JitCompiler::EmitSUBA()
{
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[B]);
cc.sub(tmp, regD[C]);
cc.mov(regA[A], tmp);
}
void JitCompiler::EmitEQA_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
cc.cmp(regA[B], regA[C]);
if (check) cc.je(fail);
else cc.jne(fail);
});
}
void JitCompiler::EmitEQA_K()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
auto tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(konsta[C].v));
cc.cmp(regA[B], tmp);
if (check) cc.je(fail);
else cc.jne(fail);
});
}
void JitCompiler::CallSqrt(const asmjit::X86Xmm &a, const asmjit::X86Xmm &b)
{
auto result = newResultXmmSd();
auto call = CreateCall<double, double>(g_sqrt);
call->setRet(0, result);
call->setArg(0, b);
cc.movsd(a, result);
}