Merge pull request #629 from Doom2fan/asmjit

Added support for CMP_APPROX to EQV_R and moved the code to a template.
This commit is contained in:
Magnus Norddahl 2018-11-14 13:35:19 +01:00 committed by GitHub
commit f76109e479
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 52 deletions

View File

@ -1219,27 +1219,7 @@ void JitCompiler::EmitLENV2()
void JitCompiler::EmitEQV2_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_FatalError("CMP_APPROX not implemented for EQV2_R.\n");
cc.ucomisd(regF[B], regF[C]);
if (check) {
cc.jp(success);
cc.jne(success);
}
else {
cc.jp(fail);
cc.jne(fail);
}
cc.ucomisd(regF[B + 1], regF[C + 1]);
if (check) {
cc.jp(success);
cc.je(fail);
}
else {
cc.jp(fail);
cc.jne(fail);
}
EmitVectorComparison<2> (check, fail, success);
});
}
@ -1406,37 +1386,7 @@ void JitCompiler::EmitLENV3()
void JitCompiler::EmitEQV3_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
if (static_cast<bool>(A & CMP_APPROX)) I_FatalError("CMP_APPROX not implemented for EQV3_R.\n");
cc.ucomisd(regF[B], regF[C]);
if (check) {
cc.jp(success);
cc.jne(success);
}
else {
cc.jp(fail);
cc.jne(fail);
}
cc.ucomisd(regF[B + 1], regF[C + 1]);
if (check) {
cc.jp(success);
cc.jne(success);
}
else {
cc.jp(fail);
cc.jne(fail);
}
cc.ucomisd(regF[B + 2], regF[C + 2]);
if (check) {
cc.jp(success);
cc.je(fail);
}
else {
cc.jp(fail);
cc.jne(fail);
}
EmitVectorComparison<3> (check, fail, success);
});
}

View File

@ -72,6 +72,53 @@ private:
pc++; // This instruction uses two instruction slots - skip the next one
}
template<int N>
void EmitVectorComparison(bool check, asmjit::Label& fail, asmjit::Label& success)
{
bool approx = static_cast<bool>(A & CMP_APPROX);
if (!approx)
{
for (int i = 0; i < N; i++)
{
cc.ucomisd(regF[B + i], regF[C + i]);
if (check)
{
cc.jp(success);
cc.jne(success);
}
else
{
cc.jp(fail);
cc.jne(fail);
}
}
}
else
{
auto tmp = newTempXmmSd();
const int64_t absMaskInt = 0x7FFFFFFFFFFFFFFF;
auto absMask = cc.newDoubleConst(asmjit::kConstScopeLocal, reinterpret_cast<const double&>(absMaskInt));
auto absMaskXmm = newTempXmmPd();
auto epsilon = cc.newDoubleConst(asmjit::kConstScopeLocal, VM_EPSILON);
auto epsilonXmm = newTempXmmSd();
for (int i = 0; i < N; i++)
{
cc.movsd(tmp, regF[B + i]);
cc.subsd(tmp, regF[C + i]);
cc.movsd(absMaskXmm, absMask);
cc.andpd(tmp, absMaskXmm);
cc.movsd(epsilonXmm, epsilon);
cc.ucomisd(epsilonXmm, tmp);
if (check) cc.ja(fail);
else cc.jna(fail);
}
}
}
static uint64_t ToMemAddress(const void *d)
{
return (uint64_t)(ptrdiff_t)d;