Merge pull request #540 from Doom2fan/asmjit

Changed binary int ops to use a temporary register, fixed some opcodes.
This commit is contained in:
Magnus Norddahl 2018-08-15 01:33:21 +02:00 committed by GitHub
commit ac462638d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -61,6 +61,13 @@ static asmjit::JitRuntime jit;
// [pbeta] TODO: VM aborts // [pbeta] TODO: VM aborts
#define NULL_POINTER_CHECK(a,o,x) #define NULL_POINTER_CHECK(a,o,x)
#define BINARY_OP_INT(op,out,r,l) \
{ \
auto tmp = cc.newInt32(); \
cc.mov(tmp, r); \
cc.op(tmp, l); \
cc.mov(out, tmp); \
}
static bool CanJit(VMScriptFunction *sfunc) static bool CanJit(VMScriptFunction *sfunc)
{ {
@ -400,7 +407,7 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
break; break;
case OP_LB_R: case OP_LB_R:
NULL_POINTER_CHECK (PB, RC, X_READ_NIL); NULL_POINTER_CHECK (PB, RC, X_READ_NIL);
cc.movsx (regD[a], x86::byte_ptr (PB, KC)); cc.movsx (regD[a], x86::byte_ptr (PB, RC));
break; break;
case OP_LH: // load halfword case OP_LH: // load halfword
NULL_POINTER_CHECK (PB, KC, X_READ_NIL); NULL_POINTER_CHECK (PB, KC, X_READ_NIL);
@ -408,15 +415,15 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
break; break;
case OP_LH_R: case OP_LH_R:
NULL_POINTER_CHECK (PB, RC, X_READ_NIL); NULL_POINTER_CHECK (PB, RC, X_READ_NIL);
cc.movsx (regD[a], x86::word_ptr (PB, KC)); cc.movsx (regD[a], x86::word_ptr (PB, RC));
break; break;
case OP_LW: // load word case OP_LW: // load word
NULL_POINTER_CHECK (PB, KC, X_READ_NIL); NULL_POINTER_CHECK (PB, KC, X_READ_NIL);
cc.movsx (regD[a], x86::dword_ptr (PB, KC)); cc.mov (regD[a], x86::dword_ptr (PB, KC));
break; break;
case OP_LW_R: case OP_LW_R:
NULL_POINTER_CHECK (PB, RC, X_READ_NIL); NULL_POINTER_CHECK (PB, RC, X_READ_NIL);
cc.movsx (regD[a], x86::dword_ptr (PB, KC)); cc.mov (regD[a], x86::dword_ptr (PB, RC));
break; break;
case OP_LBU: // load byte unsigned case OP_LBU: // load byte unsigned
NULL_POINTER_CHECK (PB, KC, X_READ_NIL); NULL_POINTER_CHECK (PB, KC, X_READ_NIL);
@ -424,7 +431,7 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
break; break;
case OP_LBU_R: case OP_LBU_R:
NULL_POINTER_CHECK (PB, RC, X_READ_NIL); NULL_POINTER_CHECK (PB, RC, X_READ_NIL);
cc.mov (regD[a], x86::byte_ptr (PB, KC)); cc.mov (regD[a], x86::byte_ptr (PB, RC));
break; break;
case OP_LHU: // load halfword unsigned case OP_LHU: // load halfword unsigned
NULL_POINTER_CHECK (PB, KC, X_READ_NIL); NULL_POINTER_CHECK (PB, KC, X_READ_NIL);
@ -432,7 +439,7 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
break; break;
case OP_LHU_R: case OP_LHU_R:
NULL_POINTER_CHECK (PB, RC, X_READ_NIL); NULL_POINTER_CHECK (PB, RC, X_READ_NIL);
cc.mov (regD[a], x86::word_ptr (PB, KC)); cc.mov (regD[a], x86::word_ptr (PB, RC));
break; break;
case OP_LSP: // load single-precision fp case OP_LSP: // load single-precision fp
NULL_POINTER_CHECK (PB, KC, X_READ_NIL); NULL_POINTER_CHECK (PB, KC, X_READ_NIL);
@ -787,102 +794,83 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
// Integer math. // Integer math.
case OP_SLL_RR: // dA = dkB << diC case OP_SLL_RR: // dA = dkB << diC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(shl, regD[a], regD[B], regD[C]);
cc.shl(regD[a], regD[C]);
break; break;
case OP_SLL_RI: case OP_SLL_RI:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(shl, regD[a], regD[B], C);
cc.shl(regD[a], C);
break; break;
case OP_SLL_KR: case OP_SLL_KR:
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(shl, regD[a], konstd[B], regD[C]);
cc.shl(regD[a], C);
break; break;
case OP_SRL_RR: // dA = dkB >> diC -- unsigned case OP_SRL_RR: // dA = dkB >> diC -- unsigned
cc.mov(regD[a], regD[B]); BINARY_OP_INT(shr, regD[a], regD[B], regD[C]);
cc.shr(regD[a], regD[C]);
break; break;
case OP_SRL_RI: case OP_SRL_RI:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(shr, regD[a], regD[B], C);
cc.shr(regD[a], C);
break; break;
case OP_SRL_KR: case OP_SRL_KR:
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(shr, regD[a], regD[B], C);
cc.shr(regD[a], C);
break; break;
case OP_SRA_RR: // dA = dkB >> diC -- signed case OP_SRA_RR: // dA = dkB >> diC -- signed
cc.mov(regD[a], regD[B]); BINARY_OP_INT(sar, regD[a], regD[B], regD[C]);
cc.sar(regD[a], regD[C]);
break; break;
case OP_SRA_RI: case OP_SRA_RI:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(sar, regD[a], regD[B], C);
cc.sar(regD[a], C);
break; break;
case OP_SRA_KR: case OP_SRA_KR:
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(sar, regD[a], konstd[B], regD[C]);
cc.sar(regD[a], regD[C]);
break; break;
case OP_ADD_RR: // dA = dB + dkC case OP_ADD_RR: // dA = dB + dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(add, regD[a], regD[B], regD[C]);
cc.add(regD[a], regD[C]);
break; break;
case OP_ADD_RK: case OP_ADD_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(add, regD[a], regD[B], konstd[C]);
cc.add(regD[a], konstd[C]);
break; break;
case OP_ADDI: // dA = dB + C -- C is a signed 8-bit constant case OP_ADDI: // dA = dB + C -- C is a signed 8-bit constant
cc.mov(regD[a], regD[B]); BINARY_OP_INT(add, regD[a], regD[B], Cs);
cc.add(regD[a], Cs);
break; break;
case OP_SUB_RR: // dA = dkB - dkC case OP_SUB_RR: // dA = dkB - dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(sub, regD[a], regD[B], regD[C]);
cc.sub(regD[a], regD[C]);
break; break;
case OP_SUB_RK: case OP_SUB_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(sub, regD[a], regD[B], konstd[C]);
cc.sub(regD[a], konstd[C]);
break; break;
case OP_SUB_KR: case OP_SUB_KR:
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(sub, regD[a], konstd[B], regD[C]);
cc.sub(regD[a], regD[C]);
break; break;
case OP_MUL_RR: // dA = dB * dkC case OP_MUL_RR: // dA = dB * dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(mul, regD[a], regD[B], regD[C]);
cc.mul(regD[a], regD[C]);
break; break;
case OP_MUL_RK: case OP_MUL_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(mul, regD[a], regD[B], konstd[C]);
cc.mul(regD[a], konstd[C]);
break; break;
case OP_DIV_RR: // dA = dkB / dkC (signed) case OP_DIV_RR: // dA = dkB / dkC (signed)
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], regD[B]); BINARY_OP_INT(idiv, regD[a], regD[B], regD[C]);
cc.idiv(regD[a], regD[C]);
break; break;
case OP_DIV_RK: case OP_DIV_RK:
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], regD[B]); BINARY_OP_INT(idiv, regD[a], regD[B], konstd[C]);
cc.div(regD[a], konstd[C]);
break; break;
case OP_DIV_KR: case OP_DIV_KR:
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(idiv, regD[a], konstd[B], regD[C]);
cc.idiv(regD[a], regD[C]);
break; break;
case OP_DIVU_RR: // dA = dkB / dkC (unsigned) case OP_DIVU_RR: // dA = dkB / dkC (unsigned)
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], regD[B]); BINARY_OP_INT(div, regD[a], regD[B], regD[C]);
cc.div(regD[a], regD[C]);
break; break;
case OP_DIVU_RK: case OP_DIVU_RK:
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], regD[B]); BINARY_OP_INT(div, regD[a], regD[B], konstd[C]);
cc.div(regD[a], konstd[C]);
break; break;
case OP_DIVU_KR: case OP_DIVU_KR:
// To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr); // To do: ThrowAbortException(X_DIVISION_BY_ZERO, nullptr);
cc.mov(regD[a], konstd[B]); BINARY_OP_INT(div, regD[a], konstd[B], regD[C]);
cc.div(regD[a], regD[C]);
break; break;
case OP_MOD_RR: // dA = dkB % dkC (signed) case OP_MOD_RR: // dA = dkB % dkC (signed)
case OP_MOD_RK: case OP_MOD_RK:
@ -892,28 +880,22 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
case OP_MODU_KR: case OP_MODU_KR:
break; break;
case OP_AND_RR: // dA = dB & dkC case OP_AND_RR: // dA = dB & dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(and_, regD[a], regD[B], regD[C]);
cc.and_(regD[a], regD[C]);
break; break;
case OP_AND_RK: case OP_AND_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(and_, regD[a], regD[B], konstd[C]);
cc.and_(regD[a], konstd[C]);
break; break;
case OP_OR_RR: // dA = dB | dkC case OP_OR_RR: // dA = dB | dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(or_, regD[a], regD[B], regD[C]);
cc.or_(regD[a], regD[C]);
break; break;
case OP_OR_RK: case OP_OR_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(or_, regD[a], regD[B], konstd[C]);
cc.or_(regD[a], konstd[C]);
break; break;
case OP_XOR_RR: // dA = dB ^ dkC case OP_XOR_RR: // dA = dB ^ dkC
cc.mov(regD[a], regD[B]); BINARY_OP_INT(xor_, regD[a], regD[B], regD[C]);
cc.xor_(regD[a], regD[C]);
break; break;
case OP_XOR_RK: case OP_XOR_RK:
cc.mov(regD[a], regD[B]); BINARY_OP_INT(xor_, regD[a], regD[B], konstd[C]);
cc.xor_(regD[a], konstd[C]);
break; break;
case OP_MIN_RR: // dA = min(dB,dkC) case OP_MIN_RR: // dA = min(dB,dkC)
case OP_MIN_RK: case OP_MIN_RK:
@ -922,9 +904,13 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
case OP_ABS: // dA = abs(dB) case OP_ABS: // dA = abs(dB)
break; break;
case OP_NEG: // dA = -dB case OP_NEG: // dA = -dB
cc.xor_(regD[a], regD[a]); {
cc.sub(regD[a], regD[B]); auto tmp = cc.newInt32 ();
cc.xor_(tmp, tmp);
cc.sub(tmp, regD[B]);
cc.mov(regD[a], tmp);
break; break;
}
case OP_NOT: // dA = ~dB case OP_NOT: // dA = ~dB
cc.mov(regD[a], regD[B]); cc.mov(regD[a], regD[B]);
cc.not_(regD[a]); cc.not_(regD[a]);
@ -1473,19 +1459,31 @@ JitFuncPtr JitCompile(VMScriptFunction *sfunc)
// Pointer math. // Pointer math.
case OP_ADDA_RR: // pA = pB + dkC case OP_ADDA_RR: // pA = pB + dkC
cc.mov(regA[a], regA[B]); {
cc.add(regA[a], regD[C]); auto tmp = cc.newIntPtr();
cc.mov(tmp, regA[B]);
cc.add(tmp, regD[C]);
cc.mov(regA[a], tmp);
break; break;
}
case OP_ADDA_RK: case OP_ADDA_RK:
cc.mov(regA[a], regA[B]); {
cc.add(regA[a], konstd[C]); auto tmp = cc.newIntPtr();
cc.mov(tmp, regA[B]);
cc.add(tmp, konstd[C]);
cc.mov(regA[a], tmp);
break; break;
}
case OP_SUBA: // dA = pB - pC case OP_SUBA: // dA = pB - pC
cc.mov(regA[a], regA[B]); {
cc.sub(regA[a], regD[C]); auto tmp = cc.newIntPtr();
cc.mov(tmp, regA[B]);
cc.sub(tmp, regD[C]);
cc.mov(regA[a], tmp);
break; break;
}
case OP_EQA_R: // if ((pB == pkC) != A) then pc++ case OP_EQA_R: // if ((pB == pkC) != A) then pc++
case OP_EQA_K: case OP_EQA_K: