mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-02-06 05:40:56 +00:00
4091 lines
133 KiB
C++
4091 lines
133 KiB
C++
// [AsmJit]
|
|
// Complete x86/x64 JIT and Remote Assembler for C++.
|
|
//
|
|
// [License]
|
|
// Zlib - See LICENSE.md file in the package.
|
|
|
|
// [Export]
|
|
#define ASMJIT_EXPORTS
|
|
|
|
// [Guard]
|
|
#include "../asmjit_build.h"
|
|
#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_DISABLE_COMPILER)
|
|
|
|
// [Dependencies]
|
|
#include "../base/cpuinfo.h"
|
|
#include "../base/utils.h"
|
|
#include "../x86/x86assembler.h"
|
|
#include "../x86/x86compiler.h"
|
|
#include "../x86/x86internal_p.h"
|
|
#include "../x86/x86regalloc_p.h"
|
|
|
|
// [Api-Begin]
|
|
#include "../asmjit_apibegin.h"
|
|
|
|
namespace asmjit {
|
|
|
|
// ============================================================================
|
|
// [Forward Declarations]
|
|
// ============================================================================
|
|
|
|
enum { kCompilerDefaultLookAhead = 64 };
|
|
|
|
static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount);
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - SpecialInst]
|
|
// ============================================================================
|
|
|
|
struct X86SpecialInst {
|
|
uint8_t inReg;
|
|
uint8_t outReg;
|
|
uint16_t flags;
|
|
};
|
|
|
|
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opArray, uint32_t opCount) noexcept {
|
|
enum { kAny = Globals::kInvalidRegId };
|
|
|
|
#define R(ri) { uint8_t(ri) , uint8_t(kAny), uint16_t(TiedReg::kRReg) }
|
|
#define W(ri) { uint8_t(kAny), uint8_t(ri) , uint16_t(TiedReg::kWReg) }
|
|
#define X(ri) { uint8_t(ri) , uint8_t(ri) , uint16_t(TiedReg::kXReg) }
|
|
#define NONE() { uint8_t(kAny), uint8_t(kAny), 0 }
|
|
static const X86SpecialInst instCpuid[] = { X(X86Gp::kIdAx), W(X86Gp::kIdBx), X(X86Gp::kIdCx), W(X86Gp::kIdDx) };
|
|
static const X86SpecialInst instCbwCdqeCwde[] = { X(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instCdqCwdCqo[] = { W(X86Gp::kIdDx), R(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instCmpxchg[] = { X(kAny), R(kAny), X(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instCmpxchg8b16b[] = { NONE(), X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(X86Gp::kIdCx), R(X86Gp::kIdBx) };
|
|
static const X86SpecialInst instDaaDas[] = { X(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instDiv2[] = { X(X86Gp::kIdAx), R(kAny) };
|
|
static const X86SpecialInst instDiv3[] = { X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) };
|
|
static const X86SpecialInst instJecxz[] = { R(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instMul2[] = { X(X86Gp::kIdAx), R(kAny) };
|
|
static const X86SpecialInst instMul3[] = { W(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) };
|
|
static const X86SpecialInst instMulx[] = { W(kAny), W(kAny), R(kAny), R(X86Gp::kIdDx) };
|
|
static const X86SpecialInst instLahf[] = { W(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instSahf[] = { R(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instMaskmovq[] = { R(kAny), R(kAny), R(X86Gp::kIdDi) };
|
|
static const X86SpecialInst instRdtscRdtscp[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), W(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instRot[] = { X(kAny), R(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instShldShrd[] = { X(kAny), R(kAny), R(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instThirdXMM0[] = { W(kAny), R(kAny), R(0) };
|
|
static const X86SpecialInst instPcmpestri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instPcmpestrm[] = { R(kAny), R(kAny), NONE(), W(0) };
|
|
static const X86SpecialInst instPcmpistri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx), R(X86Gp::kIdAx), R(X86Gp::kIdDx) };
|
|
static const X86SpecialInst instPcmpistrm[] = { R(kAny), R(kAny), NONE(), W(0) , R(X86Gp::kIdAx), R(X86Gp::kIdDx) };
|
|
static const X86SpecialInst instXsaveXrstor[] = { W(kAny), R(X86Gp::kIdDx), R(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instReadMR[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), R(X86Gp::kIdCx) };
|
|
static const X86SpecialInst instWriteMR[] = { R(X86Gp::kIdDx), R(X86Gp::kIdAx), R(X86Gp::kIdCx) };
|
|
|
|
static const X86SpecialInst instCmps[] = { X(X86Gp::kIdSi), X(X86Gp::kIdDi) };
|
|
static const X86SpecialInst instLods[] = { W(X86Gp::kIdAx), X(X86Gp::kIdSi) };
|
|
static const X86SpecialInst instMovs[] = { X(X86Gp::kIdDi), X(X86Gp::kIdSi) };
|
|
static const X86SpecialInst instScas[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) };
|
|
static const X86SpecialInst instStos[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) };
|
|
#undef NONE
|
|
#undef X
|
|
#undef W
|
|
#undef R
|
|
|
|
switch (instId) {
|
|
case X86Inst::kIdCpuid : return instCpuid;
|
|
case X86Inst::kIdCbw :
|
|
case X86Inst::kIdCdqe :
|
|
case X86Inst::kIdCwde : return instCbwCdqeCwde;
|
|
case X86Inst::kIdCdq :
|
|
case X86Inst::kIdCwd :
|
|
case X86Inst::kIdCqo : return instCdqCwdCqo;
|
|
case X86Inst::kIdCmps : return instCmps;
|
|
case X86Inst::kIdCmpxchg : return instCmpxchg;
|
|
case X86Inst::kIdCmpxchg8b :
|
|
case X86Inst::kIdCmpxchg16b : return instCmpxchg8b16b;
|
|
case X86Inst::kIdDaa :
|
|
case X86Inst::kIdDas : return instDaaDas;
|
|
case X86Inst::kIdDiv : return (opCount == 2) ? instDiv2 : instDiv3;
|
|
case X86Inst::kIdIdiv : return (opCount == 2) ? instDiv2 : instDiv3;
|
|
case X86Inst::kIdImul : if (opCount == 2) return nullptr;
|
|
if (opCount == 3 && !(opArray[0].isReg() && opArray[1].isReg() && opArray[2].isRegOrMem())) return nullptr;
|
|
ASMJIT_FALLTHROUGH;
|
|
case X86Inst::kIdMul : return (opCount == 2) ? instMul2 : instMul3;
|
|
case X86Inst::kIdMulx : return instMulx;
|
|
case X86Inst::kIdJecxz : return instJecxz;
|
|
case X86Inst::kIdLods : return instLods;
|
|
case X86Inst::kIdMovs : return instMovs;
|
|
case X86Inst::kIdLahf : return instLahf;
|
|
case X86Inst::kIdSahf : return instSahf;
|
|
case X86Inst::kIdMaskmovq :
|
|
case X86Inst::kIdMaskmovdqu :
|
|
case X86Inst::kIdVmaskmovdqu: return instMaskmovq;
|
|
case X86Inst::kIdEnter : return nullptr; // Not supported.
|
|
case X86Inst::kIdLeave : return nullptr; // Not supported.
|
|
case X86Inst::kIdRet : return nullptr; // Not supported.
|
|
case X86Inst::kIdMonitor : return nullptr; // TODO: [COMPILER] Monitor/MWait.
|
|
case X86Inst::kIdMwait : return nullptr; // TODO: [COMPILER] Monitor/MWait.
|
|
case X86Inst::kIdPop : return nullptr; // TODO: [COMPILER] Pop/Push.
|
|
case X86Inst::kIdPush : return nullptr; // TODO: [COMPILER] Pop/Push.
|
|
case X86Inst::kIdPopa : return nullptr; // Not supported.
|
|
case X86Inst::kIdPopf : return nullptr; // Not supported.
|
|
case X86Inst::kIdPusha : return nullptr; // Not supported.
|
|
case X86Inst::kIdPushf : return nullptr; // Not supported.
|
|
case X86Inst::kIdRcl :
|
|
case X86Inst::kIdRcr :
|
|
case X86Inst::kIdRol :
|
|
case X86Inst::kIdRor :
|
|
case X86Inst::kIdSal :
|
|
case X86Inst::kIdSar :
|
|
case X86Inst::kIdShl : // Rot instruction is special only if the last operand is a variable.
|
|
case X86Inst::kIdShr : if (!opArray[1].isReg()) return nullptr;
|
|
return instRot;
|
|
case X86Inst::kIdShld : // Shld/Shrd instruction is special only if the last operand is a variable.
|
|
case X86Inst::kIdShrd : if (!opArray[2].isReg()) return nullptr;
|
|
return instShldShrd;
|
|
case X86Inst::kIdRdtsc :
|
|
case X86Inst::kIdRdtscp : return instRdtscRdtscp;
|
|
case X86Inst::kIdScas : return instScas;
|
|
case X86Inst::kIdStos : return instStos;
|
|
case X86Inst::kIdBlendvpd :
|
|
case X86Inst::kIdBlendvps :
|
|
case X86Inst::kIdPblendvb :
|
|
case X86Inst::kIdSha256rnds2: return instThirdXMM0;
|
|
case X86Inst::kIdPcmpestri :
|
|
case X86Inst::kIdVpcmpestri : return instPcmpestri;
|
|
case X86Inst::kIdPcmpistri :
|
|
case X86Inst::kIdVpcmpistri : return instPcmpistri;
|
|
case X86Inst::kIdPcmpestrm :
|
|
case X86Inst::kIdVpcmpestrm : return instPcmpestrm;
|
|
case X86Inst::kIdPcmpistrm :
|
|
case X86Inst::kIdVpcmpistrm : return instPcmpistrm;
|
|
case X86Inst::kIdXrstor :
|
|
case X86Inst::kIdXrstor64 :
|
|
case X86Inst::kIdXsave :
|
|
case X86Inst::kIdXsave64 :
|
|
case X86Inst::kIdXsaveopt :
|
|
case X86Inst::kIdXsaveopt64 : return instXsaveXrstor;
|
|
case X86Inst::kIdRdmsr :
|
|
case X86Inst::kIdRdpmc :
|
|
case X86Inst::kIdXgetbv : return instReadMR;
|
|
case X86Inst::kIdWrmsr :
|
|
case X86Inst::kIdXsetbv : return instWriteMR;
|
|
default : return nullptr;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Construction / Destruction]
|
|
// ============================================================================
|
|
|
|
X86RAPass::X86RAPass() noexcept : RAPass() {
|
|
_state = &_x86State;
|
|
_varMapToVaListOffset = ASMJIT_OFFSET_OF(X86RAData, tiedArray);
|
|
}
|
|
X86RAPass::~X86RAPass() noexcept {}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Interface]
|
|
// ============================================================================
|
|
|
|
Error X86RAPass::process(Zone* zone) noexcept {
|
|
return Base::process(zone);
|
|
}
|
|
|
|
Error X86RAPass::prepare(CCFunc* func) noexcept {
|
|
ASMJIT_PROPAGATE(Base::prepare(func));
|
|
|
|
uint32_t archType = cc()->getArchType();
|
|
_regCount._gp = archType == ArchInfo::kTypeX86 ? 8 : 16;
|
|
_regCount._mm = 8;
|
|
_regCount._k = 8;
|
|
_regCount._vec = archType == ArchInfo::kTypeX86 ? 8 : 16;
|
|
_zsp = cc()->zsp();
|
|
_zbp = cc()->zbp();
|
|
|
|
_gaRegs[X86Reg::kKindGp ] = Utils::bits(_regCount.getGp()) & ~Utils::mask(X86Gp::kIdSp);
|
|
_gaRegs[X86Reg::kKindMm ] = Utils::bits(_regCount.getMm());
|
|
_gaRegs[X86Reg::kKindK ] = Utils::bits(_regCount.getK());
|
|
_gaRegs[X86Reg::kKindVec] = Utils::bits(_regCount.getVec());
|
|
|
|
_x86State.reset(0);
|
|
_clobberedRegs.reset();
|
|
|
|
_avxEnabled = false;
|
|
|
|
_varBaseRegId = Globals::kInvalidRegId; // Used by patcher.
|
|
_varBaseOffset = 0; // Used by patcher.
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Emit]
|
|
// ============================================================================
|
|
|
|
Error X86RAPass::emitMove(VirtReg* vReg, uint32_t dstId, uint32_t srcId, const char* reason) {
|
|
const char* comment = nullptr;
|
|
if (_emitComments) {
|
|
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
|
|
comment = _stringBuilder.getData();
|
|
}
|
|
|
|
X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), dstId));
|
|
X86Reg src(X86Reg::fromSignature(vReg->getSignature(), srcId));
|
|
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
|
|
}
|
|
|
|
Error X86RAPass::emitLoad(VirtReg* vReg, uint32_t id, const char* reason) {
|
|
const char* comment = nullptr;
|
|
if (_emitComments) {
|
|
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
|
|
comment = _stringBuilder.getData();
|
|
}
|
|
|
|
X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), id));
|
|
X86Mem src(getVarMem(vReg));
|
|
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
|
|
}
|
|
|
|
Error X86RAPass::emitSave(VirtReg* vReg, uint32_t id, const char* reason) {
|
|
const char* comment = nullptr;
|
|
if (_emitComments) {
|
|
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
|
|
comment = _stringBuilder.getData();
|
|
}
|
|
|
|
X86Mem dst(getVarMem(vReg));
|
|
X86Reg src(X86Reg::fromSignature(vReg->getSignature(), id));
|
|
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
|
|
}
|
|
|
|
Error X86RAPass::emitSwapGp(VirtReg* dstReg, VirtReg* srcReg, uint32_t dstPhysId, uint32_t srcPhysId, const char* reason) noexcept {
|
|
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
|
|
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
|
|
|
|
uint32_t is64 = std::max(dstReg->getTypeId(), srcReg->getTypeId()) >= TypeId::kI64;
|
|
uint32_t sign = is64 ? uint32_t(X86RegTraits<X86Reg::kRegGpq>::kSignature)
|
|
: uint32_t(X86RegTraits<X86Reg::kRegGpd>::kSignature);
|
|
|
|
X86Reg a = X86Reg::fromSignature(sign, dstPhysId);
|
|
X86Reg b = X86Reg::fromSignature(sign, srcPhysId);
|
|
|
|
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXchg, a, b));
|
|
if (_emitComments)
|
|
cc()->getCursor()->setInlineComment(cc()->_cbDataZone.sformat("[%s] %s, %s", reason, dstReg->getName(), srcReg->getName()));
|
|
return kErrorOk;
|
|
}
|
|
|
|
Error X86RAPass::emitSwapVec(VirtReg* dstReg, VirtReg* srcReg, uint32_t dstPhysId, uint32_t srcPhysId, const char* reason) noexcept {
|
|
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
|
|
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
|
|
ASMJIT_ASSERT(dstPhysId != srcPhysId);
|
|
|
|
X86Reg a = X86Reg::fromSignature(dstReg->getSignature(), dstPhysId);
|
|
X86Reg b = X86Reg::fromSignature(srcReg->getSignature(), srcPhysId);
|
|
|
|
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, a, b));
|
|
if (_emitComments)
|
|
cc()->getCursor()->setInlineComment(cc()->_cbDataZone.sformat("[%s] %s, %s", reason, dstReg->getName(), srcReg->getName()));
|
|
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, b, a));
|
|
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, a, b));
|
|
return kErrorOk;
|
|
}
|
|
|
|
Error X86RAPass::emitImmToReg(uint32_t dstTypeId, uint32_t dstPhysId, const Imm* src) noexcept {
|
|
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
|
|
|
|
X86Reg r0;
|
|
Imm imm(*src);
|
|
|
|
switch (dstTypeId) {
|
|
case TypeId::kI8:
|
|
case TypeId::kU8:
|
|
imm.truncateTo8Bits();
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case TypeId::kI16:
|
|
case TypeId::kU16:
|
|
imm.truncateTo16Bits();
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case TypeId::kI32:
|
|
case TypeId::kU32:
|
|
Mov32Truncate:
|
|
imm.truncateTo32Bits();
|
|
r0.setX86RegT<X86Reg::kRegGpd>(dstPhysId);
|
|
cc()->emit(X86Inst::kIdMov, r0, imm);
|
|
break;
|
|
|
|
case TypeId::kI64:
|
|
case TypeId::kU64:
|
|
// Move to GPD register will also clear the high DWORD of GPQ
|
|
// register in 64-bit mode.
|
|
if (imm.isUInt32())
|
|
goto Mov32Truncate;
|
|
|
|
r0.setX86RegT<X86Reg::kRegGpq>(dstPhysId);
|
|
cc()->emit(X86Inst::kIdMov, r0, imm);
|
|
break;
|
|
|
|
case TypeId::kF32:
|
|
case TypeId::kF64:
|
|
// Compiler doesn't manage FPU stack.
|
|
ASMJIT_NOT_REACHED();
|
|
break;
|
|
|
|
case TypeId::kMmx32:
|
|
case TypeId::kMmx64:
|
|
// TODO: [COMPILER] EmitMoveImmToReg.
|
|
break;
|
|
|
|
default:
|
|
// TODO: [COMPILER] EmitMoveImmToReg.
|
|
break;
|
|
}
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
Error X86RAPass::emitImmToStack(uint32_t dstTypeId, const X86Mem* dst, const Imm* src) noexcept {
|
|
X86Mem mem(*dst);
|
|
Imm imm(*src);
|
|
|
|
// One stack entry has the same size as the native register size. That means
|
|
// that if we want to move a 32-bit integer on the stack in 64-bit mode, we
|
|
// need to extend it to a 64-bit integer first. In 32-bit mode, pushing a
|
|
// 64-bit on stack is done in two steps by pushing low and high parts
|
|
// separately.
|
|
uint32_t gpSize = cc()->getGpSize();
|
|
|
|
switch (dstTypeId) {
|
|
case TypeId::kI8:
|
|
case TypeId::kU8:
|
|
imm.truncateTo8Bits();
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case TypeId::kI16:
|
|
case TypeId::kU16:
|
|
imm.truncateTo16Bits();
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case TypeId::kI32:
|
|
case TypeId::kU32:
|
|
case TypeId::kF32:
|
|
mem.setSize(4);
|
|
imm.truncateTo32Bits();
|
|
cc()->emit(X86Inst::kIdMov, mem, imm);
|
|
break;
|
|
|
|
case TypeId::kI64:
|
|
case TypeId::kU64:
|
|
case TypeId::kF64:
|
|
case TypeId::kMmx32:
|
|
case TypeId::kMmx64:
|
|
if (gpSize == 4) {
|
|
uint32_t hi = imm.getUInt32Hi();
|
|
|
|
// Lo-Part.
|
|
mem.setSize(4);
|
|
imm.truncateTo32Bits();
|
|
|
|
cc()->emit(X86Inst::kIdMov, mem, imm);
|
|
mem.addOffsetLo32(gpSize);
|
|
|
|
// Hi-Part.
|
|
imm.setUInt32(hi);
|
|
cc()->emit(X86Inst::kIdMov, mem, imm);
|
|
}
|
|
else {
|
|
mem.setSize(8);
|
|
cc()->emit(X86Inst::kIdMov, mem, imm);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return DebugUtils::errored(kErrorInvalidState);
|
|
}
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
Error X86RAPass::emitRegToStack(uint32_t dstTypeId, const X86Mem* dst, uint32_t srcTypeId, uint32_t srcPhysId) noexcept {
|
|
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
|
|
|
|
X86Mem m0(*dst);
|
|
X86Reg r0, r1;
|
|
|
|
uint32_t gpSize = cc()->getGpSize();
|
|
uint32_t instId = 0;
|
|
|
|
switch (dstTypeId) {
|
|
case TypeId::kI64:
|
|
case TypeId::kU64:
|
|
// Extend BYTE->QWORD (GP).
|
|
if (TypeId::isGpb(srcTypeId)) {
|
|
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
|
|
|
|
instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI8) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpXQ;
|
|
}
|
|
|
|
// Extend WORD->QWORD (GP).
|
|
if (TypeId::isGpw(srcTypeId)) {
|
|
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
|
|
|
|
instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI16) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpXQ;
|
|
}
|
|
|
|
// Extend DWORD->QWORD (GP).
|
|
if (TypeId::isGpd(srcTypeId)) {
|
|
r1.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
|
|
|
|
instId = X86Inst::kIdMovsxd;
|
|
if (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI32)
|
|
goto _ExtendMovGpXQ;
|
|
else
|
|
goto _ZeroExtendGpDQ;
|
|
}
|
|
|
|
// Move QWORD (GP).
|
|
if (TypeId::isGpq(srcTypeId)) goto MovGpQ;
|
|
if (TypeId::isMmx(srcTypeId)) goto MovMmQ;
|
|
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
|
|
break;
|
|
|
|
case TypeId::kI32:
|
|
case TypeId::kU32:
|
|
case TypeId::kI16:
|
|
case TypeId::kU16:
|
|
// DWORD <- WORD (Zero|Sign Extend).
|
|
if (TypeId::isGpw(srcTypeId)) {
|
|
bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32;
|
|
bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16;
|
|
|
|
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
|
|
instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpD;
|
|
}
|
|
|
|
// DWORD <- BYTE (Zero|Sign Extend).
|
|
if (TypeId::isGpb(srcTypeId)) {
|
|
bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32;
|
|
bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16;
|
|
|
|
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
|
|
instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpD;
|
|
}
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case TypeId::kI8:
|
|
case TypeId::kU8:
|
|
if (TypeId::isInt(srcTypeId)) goto MovGpD;
|
|
if (TypeId::isMmx(srcTypeId)) goto MovMmD;
|
|
if (TypeId::isVec(srcTypeId)) goto MovXmmD;
|
|
break;
|
|
|
|
case TypeId::kMmx32:
|
|
case TypeId::kMmx64:
|
|
// Extend BYTE->QWORD (GP).
|
|
if (TypeId::isGpb(srcTypeId)) {
|
|
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
|
|
|
|
instId = X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpXQ;
|
|
}
|
|
|
|
// Extend WORD->QWORD (GP).
|
|
if (TypeId::isGpw(srcTypeId)) {
|
|
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
|
|
|
|
instId = X86Inst::kIdMovzx;
|
|
goto _ExtendMovGpXQ;
|
|
}
|
|
|
|
if (TypeId::isGpd(srcTypeId)) goto _ExtendMovGpDQ;
|
|
if (TypeId::isGpq(srcTypeId)) goto MovGpQ;
|
|
if (TypeId::isMmx(srcTypeId)) goto MovMmQ;
|
|
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
|
|
break;
|
|
|
|
case TypeId::kF32:
|
|
case TypeId::kF32x1:
|
|
if (TypeId::isVec(srcTypeId)) goto MovXmmD;
|
|
break;
|
|
|
|
case TypeId::kF64:
|
|
case TypeId::kF64x1:
|
|
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
|
|
break;
|
|
|
|
default:
|
|
// TODO: Vector types by stack.
|
|
break;
|
|
}
|
|
return DebugUtils::errored(kErrorInvalidState);
|
|
|
|
// Extend+Move Gp.
|
|
_ExtendMovGpD:
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
|
|
|
|
cc()->emit(instId, r0, r1);
|
|
cc()->emit(X86Inst::kIdMov, m0, r0);
|
|
return kErrorOk;
|
|
|
|
_ExtendMovGpXQ:
|
|
if (gpSize == 8) {
|
|
m0.setSize(8);
|
|
r0.setX86RegT<X86Reg::kRegGpq>(srcPhysId);
|
|
|
|
cc()->emit(instId, r0, r1);
|
|
cc()->emit(X86Inst::kIdMov, m0, r0);
|
|
}
|
|
else {
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
|
|
|
|
cc()->emit(instId, r0, r1);
|
|
|
|
_ExtendMovGpDQ:
|
|
cc()->emit(X86Inst::kIdMov, m0, r0);
|
|
m0.addOffsetLo32(4);
|
|
cc()->emit(X86Inst::kIdAnd, m0, 0);
|
|
}
|
|
return kErrorOk;
|
|
|
|
_ZeroExtendGpDQ:
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
|
|
goto _ExtendMovGpDQ;
|
|
|
|
// Move Gp.
|
|
MovGpD:
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMov, m0, r0);
|
|
|
|
MovGpQ:
|
|
m0.setSize(8);
|
|
r0.setX86RegT<X86Reg::kRegGpq>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMov, m0, r0);
|
|
|
|
// Move Mm.
|
|
MovMmD:
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegMm>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMovd, m0, r0);
|
|
|
|
MovMmQ:
|
|
m0.setSize(8);
|
|
r0.setX86RegT<X86Reg::kRegMm>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMovq, m0, r0);
|
|
|
|
// Move XMM.
|
|
MovXmmD:
|
|
m0.setSize(4);
|
|
r0.setX86RegT<X86Reg::kRegXmm>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMovss, m0, r0);
|
|
|
|
MovXmmQ:
|
|
m0.setSize(8);
|
|
r0.setX86RegT<X86Reg::kRegXmm>(srcPhysId);
|
|
return cc()->emit(X86Inst::kIdMovlps, m0, r0);
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Register Management]
|
|
// ============================================================================
|
|
|
|
#if defined(ASMJIT_DEBUG)
|
|
template<int C>
|
|
static ASMJIT_INLINE void X86RAPass_checkStateVars(X86RAPass* self) {
|
|
X86RAState* state = self->getState();
|
|
VirtReg** sVars = state->getListByKind(C);
|
|
|
|
uint32_t physId;
|
|
uint32_t regMask;
|
|
uint32_t regCount = self->_regCount.get(C);
|
|
|
|
uint32_t occupied = state->_occupied.get(C);
|
|
uint32_t modified = state->_modified.get(C);
|
|
|
|
for (physId = 0, regMask = 1; physId < regCount; physId++, regMask <<= 1) {
|
|
VirtReg* vreg = sVars[physId];
|
|
|
|
if (!vreg) {
|
|
ASMJIT_ASSERT((occupied & regMask) == 0);
|
|
ASMJIT_ASSERT((modified & regMask) == 0);
|
|
}
|
|
else {
|
|
ASMJIT_ASSERT((occupied & regMask) != 0);
|
|
ASMJIT_ASSERT((modified & regMask) == (static_cast<uint32_t>(vreg->isModified()) << physId));
|
|
|
|
ASMJIT_ASSERT(vreg->getKind() == C);
|
|
ASMJIT_ASSERT(vreg->getState() == VirtReg::kStateReg);
|
|
ASMJIT_ASSERT(vreg->getPhysId() == physId);
|
|
}
|
|
}
|
|
}
|
|
|
|
void X86RAPass::_checkState() {
|
|
X86RAPass_checkStateVars<X86Reg::kKindGp >(this);
|
|
X86RAPass_checkStateVars<X86Reg::kKindMm >(this);
|
|
X86RAPass_checkStateVars<X86Reg::kKindVec>(this);
|
|
}
|
|
#else
|
|
void X86RAPass::_checkState() {}
|
|
#endif // ASMJIT_DEBUG
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - State - Load]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
static ASMJIT_INLINE void X86RAPass_loadStateVars(X86RAPass* self, X86RAState* src) {
|
|
X86RAState* cur = self->getState();
|
|
|
|
VirtReg** cVars = cur->getListByKind(C);
|
|
VirtReg** sVars = src->getListByKind(C);
|
|
|
|
uint32_t physId;
|
|
uint32_t modified = src->_modified.get(C);
|
|
uint32_t regCount = self->_regCount.get(C);
|
|
|
|
for (physId = 0; physId < regCount; physId++, modified >>= 1) {
|
|
VirtReg* vreg = sVars[physId];
|
|
cVars[physId] = vreg;
|
|
if (!vreg) continue;
|
|
|
|
vreg->setState(VirtReg::kStateReg);
|
|
vreg->setPhysId(physId);
|
|
vreg->setModified(modified & 0x1);
|
|
}
|
|
}
|
|
|
|
void X86RAPass::loadState(RAState* src_) {
|
|
X86RAState* cur = getState();
|
|
X86RAState* src = static_cast<X86RAState*>(src_);
|
|
|
|
VirtReg** vregs = _contextVd.getData();
|
|
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
|
|
|
|
// Load allocated variables.
|
|
X86RAPass_loadStateVars<X86Reg::kKindGp >(this, src);
|
|
X86RAPass_loadStateVars<X86Reg::kKindMm >(this, src);
|
|
X86RAPass_loadStateVars<X86Reg::kKindVec>(this, src);
|
|
|
|
// Load masks.
|
|
cur->_occupied = src->_occupied;
|
|
cur->_modified = src->_modified;
|
|
|
|
// Load states of other variables and clear their 'Modified' flags.
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
uint32_t vState = src->_cells[i].getState();
|
|
|
|
if (vState == VirtReg::kStateReg)
|
|
continue;
|
|
|
|
vregs[i]->setState(vState);
|
|
vregs[i]->setPhysId(Globals::kInvalidRegId);
|
|
vregs[i]->setModified(false);
|
|
}
|
|
|
|
ASMJIT_X86_CHECK_STATE
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - State - Save]
|
|
// ============================================================================
|
|
|
|
RAState* X86RAPass::saveState() {
|
|
VirtReg** vregs = _contextVd.getData();
|
|
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
|
|
|
|
size_t size = Utils::alignTo<size_t>(
|
|
sizeof(X86RAState) + count * sizeof(X86StateCell), sizeof(void*));
|
|
|
|
X86RAState* cur = getState();
|
|
X86RAState* dst = _zone->allocT<X86RAState>(size);
|
|
if (!dst) return nullptr;
|
|
|
|
// Store links.
|
|
::memcpy(dst->_list, cur->_list, X86RAState::kAllCount * sizeof(VirtReg*));
|
|
|
|
// Store masks.
|
|
dst->_occupied = cur->_occupied;
|
|
dst->_modified = cur->_modified;
|
|
|
|
// Store cells.
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
VirtReg* vreg = static_cast<VirtReg*>(vregs[i]);
|
|
X86StateCell& cell = dst->_cells[i];
|
|
|
|
cell.reset();
|
|
cell.setState(vreg->getState());
|
|
}
|
|
|
|
return dst;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - State - Switch]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
static ASMJIT_INLINE void X86RAPass_switchStateVars(X86RAPass* self, X86RAState* src) {
|
|
X86RAState* dst = self->getState();
|
|
|
|
VirtReg** dVars = dst->getListByKind(C);
|
|
VirtReg** sVars = src->getListByKind(C);
|
|
|
|
X86StateCell* cells = src->_cells;
|
|
uint32_t regCount = self->_regCount.get(C);
|
|
bool didWork;
|
|
|
|
do {
|
|
didWork = false;
|
|
|
|
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
|
|
VirtReg* dVReg = dVars[physId];
|
|
VirtReg* sVd = sVars[physId];
|
|
if (dVReg == sVd) continue;
|
|
|
|
if (dVReg) {
|
|
const X86StateCell& cell = cells[dVReg->_raId];
|
|
|
|
if (cell.getState() != VirtReg::kStateReg) {
|
|
if (cell.getState() == VirtReg::kStateMem)
|
|
self->spill<C>(dVReg);
|
|
else
|
|
self->unuse<C>(dVReg);
|
|
|
|
dVReg = nullptr;
|
|
didWork = true;
|
|
if (!sVd) continue;
|
|
}
|
|
}
|
|
|
|
if (!dVReg && sVd) {
|
|
_MoveOrLoad:
|
|
if (sVd->getPhysId() != Globals::kInvalidRegId)
|
|
self->move<C>(sVd, physId);
|
|
else
|
|
self->load<C>(sVd, physId);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
|
|
if (dVReg) {
|
|
const X86StateCell& cell = cells[dVReg->_raId];
|
|
if (!sVd) {
|
|
if (cell.getState() == VirtReg::kStateReg)
|
|
continue;
|
|
|
|
if (cell.getState() == VirtReg::kStateMem)
|
|
self->spill<C>(dVReg);
|
|
else
|
|
self->unuse<C>(dVReg);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
else {
|
|
if (cell.getState() == VirtReg::kStateReg) {
|
|
if (dVReg->getPhysId() != Globals::kInvalidRegId && sVd->getPhysId() != Globals::kInvalidRegId) {
|
|
if (C == X86Reg::kKindGp) {
|
|
self->swapGp(dVReg, sVd);
|
|
}
|
|
else if (C == X86Reg::kKindVec) {
|
|
self->swapVec(dVReg, sVd);
|
|
}
|
|
else {
|
|
self->spill<C>(dVReg);
|
|
self->move<C>(sVd, physId);
|
|
}
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
else {
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (cell.getState() == VirtReg::kStateMem)
|
|
self->spill<C>(dVReg);
|
|
else
|
|
self->unuse<C>(dVReg);
|
|
goto _MoveOrLoad;
|
|
}
|
|
}
|
|
}
|
|
} while (didWork);
|
|
|
|
uint32_t dModified = dst->_modified.get(C);
|
|
uint32_t sModified = src->_modified.get(C);
|
|
|
|
if (dModified != sModified) {
|
|
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
|
|
VirtReg* vreg = dVars[physId];
|
|
if (!vreg) continue;
|
|
|
|
if ((dModified & regMask) && !(sModified & regMask)) {
|
|
self->save<C>(vreg);
|
|
continue;
|
|
}
|
|
|
|
if (!(dModified & regMask) && (sModified & regMask)) {
|
|
self->modify<C>(vreg);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void X86RAPass::switchState(RAState* src_) {
|
|
ASMJIT_ASSERT(src_ != nullptr);
|
|
|
|
X86RAState* cur = getState();
|
|
X86RAState* src = static_cast<X86RAState*>(src_);
|
|
|
|
// Ignore if both states are equal.
|
|
if (cur == src)
|
|
return;
|
|
|
|
// Switch variables.
|
|
X86RAPass_switchStateVars<X86Reg::kKindGp >(this, src);
|
|
X86RAPass_switchStateVars<X86Reg::kKindMm >(this, src);
|
|
X86RAPass_switchStateVars<X86Reg::kKindVec>(this, src);
|
|
|
|
// Calculate changed state.
|
|
VirtReg** vregs = _contextVd.getData();
|
|
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
|
|
|
|
X86StateCell* cells = src->_cells;
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
VirtReg* vreg = static_cast<VirtReg*>(vregs[i]);
|
|
const X86StateCell& cell = cells[i];
|
|
uint32_t vState = cell.getState();
|
|
|
|
if (vState != VirtReg::kStateReg) {
|
|
vreg->setState(vState);
|
|
vreg->setModified(false);
|
|
}
|
|
}
|
|
|
|
ASMJIT_X86_CHECK_STATE
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - State - Intersect]
|
|
// ============================================================================
|
|
|
|
// The algorithm is actually not so smart, but tries to find an intersection od
|
|
// `a` and `b` and tries to move/alloc a variable into that location if it's
|
|
// possible. It also finds out which variables will be spilled/unused by `a`
|
|
// and `b` and performs that action here. It may improve the switch state code
|
|
// in certain cases, but doesn't necessarily do the best job possible.
|
|
template<int C>
|
|
static ASMJIT_INLINE void X86RAPass_intersectStateVars(X86RAPass* self, X86RAState* a, X86RAState* b) {
|
|
X86RAState* dst = self->getState();
|
|
|
|
VirtReg** dVars = dst->getListByKind(C);
|
|
VirtReg** aVars = a->getListByKind(C);
|
|
|
|
X86StateCell* aCells = a->_cells;
|
|
X86StateCell* bCells = b->_cells;
|
|
|
|
uint32_t regCount = self->_regCount.get(C);
|
|
bool didWork;
|
|
|
|
// Similar to `switchStateVars()`, we iterate over and over until there is
|
|
// no work to be done.
|
|
do {
|
|
didWork = false;
|
|
|
|
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
|
|
VirtReg* dVReg = dVars[physId]; // Destination reg.
|
|
VirtReg* aVReg = aVars[physId]; // State-a reg.
|
|
|
|
if (dVReg == aVReg) continue;
|
|
|
|
if (dVReg) {
|
|
const X86StateCell& aCell = aCells[dVReg->_raId];
|
|
const X86StateCell& bCell = bCells[dVReg->_raId];
|
|
|
|
if (aCell.getState() != VirtReg::kStateReg && bCell.getState() != VirtReg::kStateReg) {
|
|
if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem)
|
|
self->spill<C>(dVReg);
|
|
else
|
|
self->unuse<C>(dVReg);
|
|
|
|
dVReg = nullptr;
|
|
didWork = true;
|
|
if (!aVReg) continue;
|
|
}
|
|
}
|
|
|
|
if (!dVReg && aVReg) {
|
|
if (aVReg->getPhysId() != Globals::kInvalidRegId)
|
|
self->move<C>(aVReg, physId);
|
|
else
|
|
self->load<C>(aVReg, physId);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
|
|
if (dVReg) {
|
|
const X86StateCell& aCell = aCells[dVReg->_raId];
|
|
const X86StateCell& bCell = bCells[dVReg->_raId];
|
|
|
|
if (!aVReg) {
|
|
if (aCell.getState() == VirtReg::kStateReg || bCell.getState() == VirtReg::kStateReg)
|
|
continue;
|
|
|
|
if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem)
|
|
self->spill<C>(dVReg);
|
|
else
|
|
self->unuse<C>(dVReg);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
else if (C == X86Reg::kKindGp || C == X86Reg::kKindVec) {
|
|
if (aCell.getState() == VirtReg::kStateReg) {
|
|
if (dVReg->getPhysId() != Globals::kInvalidRegId && aVReg->getPhysId() != Globals::kInvalidRegId) {
|
|
if (C == X86Reg::kKindGp)
|
|
self->swapGp(dVReg, aVReg);
|
|
else
|
|
self->swapVec(dVReg, aVReg);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (didWork);
|
|
|
|
uint32_t dModified = dst->_modified.get(C);
|
|
uint32_t aModified = a->_modified.get(C);
|
|
|
|
if (dModified != aModified) {
|
|
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
|
|
VirtReg* vreg = dVars[physId];
|
|
if (!vreg) continue;
|
|
|
|
const X86StateCell& aCell = aCells[vreg->_raId];
|
|
if ((dModified & regMask) && !(aModified & regMask) && aCell.getState() == VirtReg::kStateReg)
|
|
self->save<C>(vreg);
|
|
}
|
|
}
|
|
}
|
|
|
|
void X86RAPass::intersectStates(RAState* a_, RAState* b_) {
|
|
X86RAState* a = static_cast<X86RAState*>(a_);
|
|
X86RAState* b = static_cast<X86RAState*>(b_);
|
|
|
|
ASMJIT_ASSERT(a != nullptr);
|
|
ASMJIT_ASSERT(b != nullptr);
|
|
|
|
X86RAPass_intersectStateVars<X86Reg::kKindGp >(this, a, b);
|
|
X86RAPass_intersectStateVars<X86Reg::kKindMm >(this, a, b);
|
|
X86RAPass_intersectStateVars<X86Reg::kKindVec>(this, a, b);
|
|
|
|
ASMJIT_X86_CHECK_STATE
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - GetJccFlow / GetOppositeJccFlow]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
static ASMJIT_INLINE CBNode* X86RAPass_getJccFlow(CBJump* jNode) {
|
|
if (jNode->isTaken())
|
|
return jNode->getTarget();
|
|
else
|
|
return jNode->getNext();
|
|
}
|
|
|
|
//! \internal
|
|
static ASMJIT_INLINE CBNode* X86RAPass_getOppositeJccFlow(CBJump* jNode) {
|
|
if (jNode->isTaken())
|
|
return jNode->getNext();
|
|
else
|
|
return jNode->getTarget();
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - SingleVarInst]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
static void X86RAPass_prepareSingleVarInst(uint32_t instId, TiedReg* tr) {
|
|
switch (instId) {
|
|
// - andn reg, reg ; Set all bits in reg to 0.
|
|
// - xor/pxor reg, reg ; Set all bits in reg to 0.
|
|
// - sub/psub reg, reg ; Set all bits in reg to 0.
|
|
// - pcmpgt reg, reg ; Set all bits in reg to 0.
|
|
// - pcmpeq reg, reg ; Set all bits in reg to 1.
|
|
case X86Inst::kIdPandn :
|
|
case X86Inst::kIdXor : case X86Inst::kIdXorpd : case X86Inst::kIdXorps : case X86Inst::kIdPxor :
|
|
case X86Inst::kIdSub:
|
|
case X86Inst::kIdPsubb : case X86Inst::kIdPsubw : case X86Inst::kIdPsubd : case X86Inst::kIdPsubq :
|
|
case X86Inst::kIdPsubsb : case X86Inst::kIdPsubsw : case X86Inst::kIdPsubusb : case X86Inst::kIdPsubusw :
|
|
case X86Inst::kIdPcmpeqb : case X86Inst::kIdPcmpeqw : case X86Inst::kIdPcmpeqd : case X86Inst::kIdPcmpeqq :
|
|
case X86Inst::kIdPcmpgtb : case X86Inst::kIdPcmpgtw : case X86Inst::kIdPcmpgtd : case X86Inst::kIdPcmpgtq :
|
|
tr->flags &= ~TiedReg::kRReg;
|
|
break;
|
|
|
|
// - and reg, reg ; Nop.
|
|
// - or reg, reg ; Nop.
|
|
// - xchg reg, reg ; Nop.
|
|
case X86Inst::kIdAnd : case X86Inst::kIdAndpd : case X86Inst::kIdAndps : case X86Inst::kIdPand :
|
|
case X86Inst::kIdOr : case X86Inst::kIdOrpd : case X86Inst::kIdOrps : case X86Inst::kIdPor :
|
|
case X86Inst::kIdXchg :
|
|
tr->flags &= ~TiedReg::kWReg;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Helpers]
|
|
// ============================================================================
|
|
|
|
static void X86RAPass_assignStackArgsRegId(X86RAPass* self, CCFunc* func) {
|
|
const FuncDetail& fd = func->getDetail();
|
|
FuncFrameInfo& ffi = func->getFrameInfo();
|
|
|
|
// Select some register which will contain the base address of function
|
|
// arguments and return address. The algorithm tries to select registers
|
|
// which are saved or not preserved by default, if not successful it picks
|
|
// any other register and adds it to `_savedRegs`.
|
|
uint32_t stackArgsRegId;
|
|
if (ffi.hasPreservedFP()) {
|
|
stackArgsRegId = X86Gp::kIdBp;
|
|
}
|
|
else {
|
|
// Passed registers as defined by the calling convention.
|
|
uint32_t passed = fd.getPassedRegs(X86Reg::kKindGp);
|
|
|
|
// Registers actually used to pass function arguments (related to this
|
|
// function signature) with ESP|RSP included as this register can't be
|
|
// used in general to hold anything bug stack pointer.
|
|
uint32_t used = fd.getUsedRegs(X86Reg::kKindGp) | Utils::mask(X86Gp::kIdSp);
|
|
|
|
// First try register that is defined to pass a function argument by the
|
|
// calling convention, but is not used by this function. This will most
|
|
// likely fail in 32-bit mode, but there is a high chance that it will
|
|
// pass in 64-bit mode if the function doesn't use so many arguments.
|
|
uint32_t regs = passed & ~used;
|
|
|
|
// Pick any other register if that didn't work out.
|
|
if (!regs) regs = ~passed & ~used;
|
|
|
|
stackArgsRegId = Utils::findFirstBit(regs);
|
|
ASMJIT_ASSERT(stackArgsRegId < self->cc()->getGpCount());
|
|
}
|
|
|
|
ffi.setStackArgsRegId(stackArgsRegId);
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - SArg Insertion]
|
|
// ============================================================================
|
|
|
|
struct SArgData {
|
|
VirtReg* sVd;
|
|
VirtReg* cVd;
|
|
CCPushArg* sArg;
|
|
uint32_t aType;
|
|
};
|
|
|
|
static ASMJIT_INLINE bool X86RAPass_mustConvertSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept{
|
|
bool dstFloatSize = dstTypeId == TypeId::kF32 ? 4 :
|
|
dstTypeId == TypeId::kF64 ? 8 : 0;
|
|
|
|
bool srcFloatSize = srcTypeId == TypeId::kF32 ? 4 :
|
|
srcTypeId == TypeId::kF32x1 ? 4 :
|
|
srcTypeId == TypeId::kF64 ? 8 :
|
|
srcTypeId == TypeId::kF64x1 ? 8 : 0;
|
|
|
|
if (dstFloatSize && srcFloatSize)
|
|
return dstFloatSize != srcFloatSize;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
static ASMJIT_INLINE uint32_t X86RAPass_typeOfConvertedSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept {
|
|
ASMJIT_ASSERT(X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId));
|
|
return dstTypeId == TypeId::kF32 ? TypeId::kF32x1 : TypeId::kF64x1;
|
|
}
|
|
|
|
static ASMJIT_INLINE Error X86RAPass_insertPushArg(
|
|
X86RAPass* self, CCFuncCall* call,
|
|
VirtReg* sReg, const uint32_t* gaRegs,
|
|
const FuncDetail::Value& arg, uint32_t argIndex,
|
|
SArgData* sArgList, uint32_t& sArgCount) {
|
|
|
|
X86Compiler* cc = self->cc();
|
|
uint32_t i;
|
|
uint32_t dstTypeId = arg.getTypeId();
|
|
uint32_t srcTypeId = sReg->getTypeId();
|
|
|
|
// First locate or create sArgBase.
|
|
for (i = 0; i < sArgCount; i++)
|
|
if (sArgList[i].sVd == sReg && !sArgList[i].cVd)
|
|
break;
|
|
|
|
SArgData* sArgData = &sArgList[i];
|
|
if (i == sArgCount) {
|
|
sArgData->sVd = sReg;
|
|
sArgData->cVd = nullptr;
|
|
sArgData->sArg = nullptr;
|
|
sArgData->aType = 0xFF;
|
|
sArgCount++;
|
|
}
|
|
|
|
uint32_t srcRegKind = sReg->getKind();
|
|
|
|
// Only handles float<->double conversion.
|
|
if (X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId)) {
|
|
uint32_t cvtTypeId = X86RAPass_typeOfConvertedSArg(self, dstTypeId, srcTypeId);
|
|
uint32_t cvtRegKind = X86Reg::kKindVec;
|
|
|
|
while (++i < sArgCount) {
|
|
sArgData = &sArgList[i];
|
|
if (sArgData->sVd != sReg)
|
|
break;
|
|
|
|
if (sArgData->cVd->getTypeId() != cvtTypeId || sArgData->aType != dstTypeId)
|
|
continue;
|
|
|
|
sArgData->sArg->_args |= Utils::mask(argIndex);
|
|
return kErrorOk;
|
|
}
|
|
|
|
VirtReg* cReg = cc->newVirtReg(dstTypeId, x86OpData.archRegs.regInfo[X86Reg::kRegXmm].getSignature(), nullptr);
|
|
if (!cReg) return DebugUtils::errored(kErrorNoHeapMemory);
|
|
|
|
CCPushArg* sArg = cc->newNodeT<CCPushArg>(call, sReg, cReg);
|
|
if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory);
|
|
|
|
X86RAData* raData = self->newRAData(2);
|
|
if (!raData) return DebugUtils::errored(kErrorNoHeapMemory);
|
|
|
|
ASMJIT_PROPAGATE(self->assignRAId(cReg));
|
|
ASMJIT_PROPAGATE(self->assignRAId(sReg));
|
|
|
|
raData->tiedTotal = 2;
|
|
raData->tiedCount.reset();
|
|
raData->tiedCount.add(srcRegKind);
|
|
raData->tiedCount.add(cvtRegKind);
|
|
|
|
raData->tiedIndex.reset();
|
|
raData->inRegs.reset();
|
|
raData->outRegs.reset();
|
|
raData->clobberedRegs.reset();
|
|
|
|
if (srcRegKind <= cvtRegKind) {
|
|
raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
|
|
raData->tiedArray[1].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]);
|
|
raData->tiedIndex.set(cvtRegKind, srcRegKind != cvtRegKind);
|
|
}
|
|
else {
|
|
raData->tiedArray[0].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]);
|
|
raData->tiedArray[1].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
|
|
raData->tiedIndex.set(srcRegKind, 1);
|
|
}
|
|
|
|
sArg->setPassData(raData);
|
|
sArg->_args |= Utils::mask(argIndex);
|
|
|
|
cc->addBefore(sArg, call);
|
|
::memmove(sArgData + 1, sArgData, (sArgCount - i) * sizeof(SArgData));
|
|
|
|
sArgData->sVd = sReg;
|
|
sArgData->cVd = cReg;
|
|
sArgData->sArg = sArg;
|
|
sArgData->aType = dstTypeId;
|
|
|
|
sArgCount++;
|
|
return kErrorOk;
|
|
}
|
|
else {
|
|
CCPushArg* sArg = sArgData->sArg;
|
|
ASMJIT_PROPAGATE(self->assignRAId(sReg));
|
|
|
|
if (!sArg) {
|
|
sArg = cc->newNodeT<CCPushArg>(call, sReg, (VirtReg*)nullptr);
|
|
if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory);
|
|
|
|
X86RAData* raData = self->newRAData(1);
|
|
if (!raData) return DebugUtils::errored(kErrorNoHeapMemory);
|
|
|
|
raData->tiedTotal = 1;
|
|
raData->tiedIndex.reset();
|
|
raData->tiedCount.reset();
|
|
raData->tiedCount.add(srcRegKind);
|
|
raData->inRegs.reset();
|
|
raData->outRegs.reset();
|
|
raData->clobberedRegs.reset();
|
|
raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
|
|
|
|
sArg->setPassData(raData);
|
|
sArgData->sArg = sArg;
|
|
|
|
cc->addBefore(sArg, call);
|
|
}
|
|
|
|
sArg->_args |= Utils::mask(argIndex);
|
|
return kErrorOk;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Fetch]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
//!
|
|
//! Prepare the given function `func`.
|
|
//!
|
|
//! For each node:
|
|
//! - Create and assign groupId and position.
|
|
//! - Collect all variables and merge them to vaList.
|
|
Error X86RAPass::fetch() {
|
|
uint32_t archType = cc()->getArchType();
|
|
CCFunc* func = getFunc();
|
|
|
|
CBNode* node_ = func;
|
|
CBNode* next = nullptr;
|
|
CBNode* stop = getStop();
|
|
|
|
TiedReg agTmp[80];
|
|
SArgData sArgList[80];
|
|
|
|
uint32_t position = 0;
|
|
ZoneList<CBNode*>::Link* jLink = nullptr;
|
|
|
|
// Global allocable registers.
|
|
uint32_t* gaRegs = _gaRegs;
|
|
|
|
if (func->getFrameInfo().hasPreservedFP())
|
|
gaRegs[X86Reg::kKindGp] &= ~Utils::mask(X86Gp::kIdBp);
|
|
|
|
// Allowed index registers (GP/XMM/YMM).
|
|
const uint32_t indexMask = Utils::bits(_regCount.getGp()) & ~(Utils::mask(4));
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [VI Macros]
|
|
// --------------------------------------------------------------------------
|
|
|
|
#define RA_POPULATE(NODE) \
|
|
do { \
|
|
X86RAData* raData = newRAData(0); \
|
|
if (!raData) goto NoMem; \
|
|
NODE->setPassData(raData); \
|
|
} while (0)
|
|
|
|
#define RA_DECLARE() \
|
|
do { \
|
|
X86RegCount tiedCount; \
|
|
X86RegCount tiedIndex; \
|
|
uint32_t tiedTotal = 0; \
|
|
\
|
|
X86RegMask inRegs; \
|
|
X86RegMask outRegs; \
|
|
X86RegMask clobberedRegs; \
|
|
\
|
|
tiedCount.reset(); \
|
|
inRegs.reset(); \
|
|
outRegs.reset(); \
|
|
clobberedRegs.reset()
|
|
|
|
#define RA_FINALIZE(NODE) \
|
|
{ \
|
|
X86RAData* raData = newRAData(tiedTotal); \
|
|
if (!raData) goto NoMem; \
|
|
\
|
|
tiedIndex.indexFromRegCount(tiedCount); \
|
|
raData->tiedCount = tiedCount; \
|
|
raData->tiedIndex = tiedIndex; \
|
|
\
|
|
raData->inRegs = inRegs; \
|
|
raData->outRegs = outRegs; \
|
|
raData->clobberedRegs = clobberedRegs; \
|
|
\
|
|
TiedReg* tied = agTmp; \
|
|
while (tiedTotal) { \
|
|
VirtReg* vreg = tied->vreg; \
|
|
\
|
|
uint32_t _kind = vreg->getKind(); \
|
|
uint32_t _index = tiedIndex.get(_kind); \
|
|
\
|
|
tiedIndex.add(_kind); \
|
|
if (tied->inRegs) \
|
|
tied->allocableRegs = tied->inRegs; \
|
|
else if (tied->outPhysId != Globals::kInvalidRegId) \
|
|
tied->allocableRegs = Utils::mask(tied->outPhysId); \
|
|
else \
|
|
tied->allocableRegs &= ~inRegs.get(_kind); \
|
|
\
|
|
vreg->_tied = nullptr; \
|
|
raData->setTiedAt(_index, *tied); \
|
|
\
|
|
tied++; \
|
|
tiedTotal--; \
|
|
} \
|
|
NODE->setPassData(raData); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define RA_INSERT(REG, TIED, FLAGS, NEW_ALLOCABLE) \
|
|
do { \
|
|
ASMJIT_ASSERT(REG->_tied == nullptr); \
|
|
TIED = &agTmp[tiedTotal++]; \
|
|
TIED->init(REG, FLAGS, 0, NEW_ALLOCABLE); \
|
|
TIED->refCount++; \
|
|
REG->_tied = TIED; \
|
|
\
|
|
if (assignRAId(REG) != kErrorOk) goto NoMem; \
|
|
tiedCount.add(REG->getKind()); \
|
|
} while (0)
|
|
|
|
#define RA_MERGE(REG, TIED, FLAGS, NEW_ALLOCABLE) \
|
|
do { \
|
|
TIED = REG->_tied; \
|
|
\
|
|
if (!TIED) { \
|
|
TIED = &agTmp[tiedTotal++]; \
|
|
TIED->init(REG, 0, 0, NEW_ALLOCABLE); \
|
|
REG->_tied = TIED; \
|
|
\
|
|
if (assignRAId(REG) != kErrorOk) goto NoMem; \
|
|
tiedCount.add(REG->getKind()); \
|
|
} \
|
|
\
|
|
TIED->flags |= FLAGS; \
|
|
TIED->refCount++; \
|
|
} while (0)
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Loop]
|
|
// --------------------------------------------------------------------------
|
|
|
|
do {
|
|
_Do:
|
|
while (node_->hasPassData()) {
|
|
_NextGroup:
|
|
if (!jLink)
|
|
jLink = _jccList.getFirst();
|
|
else
|
|
jLink = jLink->getNext();
|
|
|
|
if (!jLink) goto _Done;
|
|
node_ = X86RAPass_getOppositeJccFlow(static_cast<CBJump*>(jLink->getValue()));
|
|
}
|
|
|
|
position++;
|
|
|
|
next = node_->getNext();
|
|
node_->setPosition(position);
|
|
|
|
switch (node_->getType()) {
|
|
// ----------------------------------------------------------------------
|
|
// [Align/Embed]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeAlign:
|
|
case CBNode::kNodeData:
|
|
default:
|
|
RA_POPULATE(node_);
|
|
break;
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Hint]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeHint: {
|
|
CCHint* node = static_cast<CCHint*>(node_);
|
|
RA_DECLARE();
|
|
|
|
if (node->getHint() == CCHint::kHintAlloc) {
|
|
uint32_t remain[Globals::kMaxVRegKinds];
|
|
CCHint* cur = node;
|
|
|
|
remain[X86Reg::kKindGp ] = _regCount.getGp() - 1 - func->getFrameInfo().hasPreservedFP();
|
|
remain[X86Reg::kKindMm ] = _regCount.getMm();
|
|
remain[X86Reg::kKindK ] = _regCount.getK();
|
|
remain[X86Reg::kKindVec] = _regCount.getVec();
|
|
|
|
// Merge as many alloc-hints as possible.
|
|
for (;;) {
|
|
VirtReg* vreg = static_cast<VirtReg*>(cur->getVReg());
|
|
TiedReg* tied = vreg->_tied;
|
|
|
|
uint32_t kind = vreg->getKind();
|
|
uint32_t physId = cur->getValue();
|
|
uint32_t regMask = 0;
|
|
|
|
// We handle both kInvalidReg and kInvalidValue.
|
|
if (physId < Globals::kInvalidRegId)
|
|
regMask = Utils::mask(physId);
|
|
|
|
if (!tied) {
|
|
if (inRegs.has(kind, regMask) || remain[kind] == 0)
|
|
break;
|
|
RA_INSERT(vreg, tied, TiedReg::kRReg, gaRegs[kind]);
|
|
|
|
if (regMask != 0) {
|
|
inRegs.xor_(kind, regMask);
|
|
tied->inRegs = regMask;
|
|
tied->setInPhysId(physId);
|
|
}
|
|
remain[kind]--;
|
|
}
|
|
else if (regMask != 0) {
|
|
if (inRegs.has(kind, regMask) && tied->inRegs != regMask)
|
|
break;
|
|
|
|
inRegs.xor_(kind, tied->inRegs | regMask);
|
|
tied->inRegs = regMask;
|
|
tied->setInPhysId(physId);
|
|
}
|
|
|
|
if (cur != node)
|
|
cc()->removeNode(cur);
|
|
|
|
cur = static_cast<CCHint*>(node->getNext());
|
|
if (!cur || cur->getType() != CBNode::kNodeHint || cur->getHint() != CCHint::kHintAlloc)
|
|
break;
|
|
}
|
|
|
|
next = node->getNext();
|
|
}
|
|
else {
|
|
VirtReg* vreg = static_cast<VirtReg*>(node->getVReg());
|
|
TiedReg* tied;
|
|
|
|
uint32_t flags = 0;
|
|
switch (node->getHint()) {
|
|
case CCHint::kHintSpill : flags = TiedReg::kRMem | TiedReg::kSpill; break;
|
|
case CCHint::kHintSave : flags = TiedReg::kRMem ; break;
|
|
case CCHint::kHintSaveAndUnuse: flags = TiedReg::kRMem | TiedReg::kUnuse; break;
|
|
case CCHint::kHintUnuse : flags = TiedReg::kUnuse ; break;
|
|
}
|
|
RA_INSERT(vreg, tied, flags, 0);
|
|
}
|
|
|
|
RA_FINALIZE(node_);
|
|
break;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Label]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeLabel: {
|
|
RA_POPULATE(node_);
|
|
if (node_ == func->getExitNode()) {
|
|
ASMJIT_PROPAGATE(addReturningNode(node_));
|
|
goto _NextGroup;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Inst]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeInst: {
|
|
CBInst* node = static_cast<CBInst*>(node_);
|
|
|
|
uint32_t instId = node->getInstId();
|
|
uint32_t flags = node->getFlags();
|
|
uint32_t options = node->getOptions();
|
|
uint32_t gpAllowedMask = 0xFFFFFFFF;
|
|
|
|
Operand* opArray = node->getOpArray();
|
|
uint32_t opCount = node->getOpCount();
|
|
|
|
RA_DECLARE();
|
|
if (opCount) {
|
|
const X86Inst& inst = X86Inst::getInst(instId);
|
|
const X86Inst::CommonData& commonData = inst.getCommonData();
|
|
const X86SpecialInst* special = nullptr;
|
|
|
|
// Collect instruction flags and merge all 'TiedReg's.
|
|
if (commonData.isFpu())
|
|
flags |= CBNode::kFlagIsFp;
|
|
|
|
if (commonData.hasFixedRM() && (special = X86SpecialInst_get(instId, opArray, opCount)) != nullptr)
|
|
flags |= CBNode::kFlagIsSpecial;
|
|
|
|
for (uint32_t i = 0; i < opCount; i++) {
|
|
Operand* op = &opArray[i];
|
|
VirtReg* vreg;
|
|
TiedReg* tied;
|
|
|
|
if (op->isVirtReg()) {
|
|
vreg = cc()->getVirtRegById(op->getId());
|
|
if (vreg->isFixed()) continue;
|
|
|
|
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
|
|
if (static_cast<X86Reg*>(op)->isGpb()) {
|
|
tied->flags |= static_cast<X86Gp*>(op)->isGpbLo() ? TiedReg::kX86GpbLo : TiedReg::kX86GpbHi;
|
|
if (archType == ArchInfo::kTypeX86) {
|
|
// If a byte register is accessed in 32-bit mode we have to limit
|
|
// all allocable registers for that variable to eax/ebx/ecx/edx.
|
|
// Other variables are not affected.
|
|
tied->allocableRegs &= 0x0F;
|
|
}
|
|
else {
|
|
// It's fine if lo-byte register is accessed in 64-bit mode;
|
|
// however, hi-byte has to be checked and if it's used all
|
|
// registers (GP/XMM) could be only allocated in the lower eight
|
|
// half. To do that, we patch 'allocableRegs' of all variables
|
|
// we collected until now and change the allocable restriction
|
|
// for variables that come after.
|
|
if (static_cast<X86Gp*>(op)->isGpbHi()) {
|
|
tied->allocableRegs &= 0x0F;
|
|
if (gpAllowedMask != 0xFF) {
|
|
for (uint32_t j = 0; j < i; j++)
|
|
agTmp[j].allocableRegs &= (agTmp[j].flags & TiedReg::kX86GpbHi) ? 0x0F : 0xFF;
|
|
gpAllowedMask = 0xFF;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (special) {
|
|
uint32_t inReg = special[i].inReg;
|
|
uint32_t outReg = special[i].outReg;
|
|
uint32_t c;
|
|
|
|
if (static_cast<const X86Reg*>(op)->isGp())
|
|
c = X86Reg::kKindGp;
|
|
else
|
|
c = X86Reg::kKindVec;
|
|
|
|
if (inReg != Globals::kInvalidRegId) {
|
|
uint32_t mask = Utils::mask(inReg);
|
|
inRegs.or_(c, mask);
|
|
tied->inRegs |= mask;
|
|
}
|
|
|
|
if (outReg != Globals::kInvalidRegId) {
|
|
uint32_t mask = Utils::mask(outReg);
|
|
outRegs.or_(c, mask);
|
|
tied->setOutPhysId(outReg);
|
|
}
|
|
|
|
tied->flags |= special[i].flags;
|
|
}
|
|
else {
|
|
uint32_t inFlags = TiedReg::kRReg;
|
|
uint32_t outFlags = TiedReg::kWReg;
|
|
uint32_t combinedFlags;
|
|
|
|
if (i == 0) {
|
|
// Read/Write is usually the combination of the first operand.
|
|
combinedFlags = inFlags | outFlags;
|
|
|
|
if (node->getOptions() & CodeEmitter::kOptionOverwrite) {
|
|
// Manually forcing write-only.
|
|
combinedFlags = outFlags;
|
|
}
|
|
else if (commonData.isUseW()) {
|
|
// Write-only instruction.
|
|
uint32_t movSize = commonData.getWriteSize();
|
|
uint32_t regSize = vreg->getSize();
|
|
|
|
// Exception - If the source operand is a memory location
|
|
// promote move size into 16 bytes.
|
|
if (opArray[1].isMem() && inst.getOperationData().isMovSsSd())
|
|
movSize = 16;
|
|
|
|
if (static_cast<const X86Reg*>(op)->isGp()) {
|
|
uint32_t opSize = static_cast<const X86Reg*>(op)->getSize();
|
|
|
|
// Move size is zero in case that it should be determined
|
|
// from the destination register.
|
|
if (movSize == 0)
|
|
movSize = opSize;
|
|
|
|
// Handle the case that a 32-bit operation in 64-bit mode
|
|
// always clears the rest of the destination register and
|
|
// the case that move size is actually greater than or
|
|
// equal to the size of the variable.
|
|
if (movSize >= 4 || movSize >= regSize)
|
|
combinedFlags = outFlags;
|
|
}
|
|
else if (movSize == 0 || movSize >= regSize) {
|
|
// If move size is greater than or equal to the size of
|
|
// the variable there is nothing to do, because the move
|
|
// will overwrite the variable in all cases.
|
|
combinedFlags = outFlags;
|
|
}
|
|
}
|
|
else if (commonData.isUseR()) {
|
|
// Comparison/Test instructions don't modify any operand.
|
|
combinedFlags = inFlags;
|
|
}
|
|
else if (instId == X86Inst::kIdImul && opCount == 3) {
|
|
// Imul.
|
|
combinedFlags = outFlags;
|
|
}
|
|
}
|
|
else {
|
|
// Read-Only is usualy the combination of the second/third/fourth operands.
|
|
combinedFlags = inFlags;
|
|
|
|
// Idiv is a special instruction, never handled here.
|
|
ASMJIT_ASSERT(instId != X86Inst::kIdIdiv);
|
|
|
|
// Xchg/Xadd/Imul.
|
|
if (commonData.isUseXX() || (instId == X86Inst::kIdImul && opCount == 3 && i == 1))
|
|
combinedFlags = inFlags | outFlags;
|
|
}
|
|
tied->flags |= combinedFlags;
|
|
}
|
|
}
|
|
else if (op->isMem()) {
|
|
X86Mem* m = static_cast<X86Mem*>(op);
|
|
node->setMemOpIndex(i);
|
|
|
|
uint32_t specBase = special ? uint32_t(special[i].inReg) : uint32_t(Globals::kInvalidRegId);
|
|
|
|
if (m->hasBaseReg()) {
|
|
uint32_t id = m->getBaseId();
|
|
if (cc()->isVirtRegValid(id)) {
|
|
vreg = cc()->getVirtRegById(id);
|
|
if (!vreg->isStack() && !vreg->isFixed()) {
|
|
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
|
|
if (m->isRegHome()) {
|
|
uint32_t inFlags = TiedReg::kRMem;
|
|
uint32_t outFlags = TiedReg::kWMem;
|
|
uint32_t combinedFlags;
|
|
|
|
if (i == 0) {
|
|
// Default for the first operand.
|
|
combinedFlags = inFlags | outFlags;
|
|
|
|
if (commonData.isUseW()) {
|
|
// Move to memory - setting the right flags is important
|
|
// as if it's just move to the register. It's just a bit
|
|
// simpler as there are no special cases.
|
|
uint32_t movSize = std::max<uint32_t>(commonData.getWriteSize(), m->getSize());
|
|
uint32_t regSize = vreg->getSize();
|
|
|
|
if (movSize >= regSize)
|
|
combinedFlags = outFlags;
|
|
}
|
|
else if (commonData.isUseR()) {
|
|
// Comparison/Test instructions don't modify any operand.
|
|
combinedFlags = inFlags;
|
|
}
|
|
}
|
|
else {
|
|
// Default for the second operand.
|
|
combinedFlags = inFlags;
|
|
|
|
// Handle Xchg instruction (modifies both operands).
|
|
if (commonData.isUseXX())
|
|
combinedFlags = inFlags | outFlags;
|
|
}
|
|
|
|
tied->flags |= combinedFlags;
|
|
}
|
|
else {
|
|
if (specBase != Globals::kInvalidRegId) {
|
|
uint32_t mask = Utils::mask(specBase);
|
|
inRegs.or_(vreg->getKind(), mask);
|
|
outRegs.or_(vreg->getKind(), mask);
|
|
tied->inRegs |= mask;
|
|
tied->setOutPhysId(specBase);
|
|
tied->flags |= special[i].flags;
|
|
}
|
|
else {
|
|
tied->flags |= TiedReg::kRReg;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m->hasIndexReg()) {
|
|
uint32_t id = m->getIndexId();
|
|
if (cc()->isVirtRegValid(id)) {
|
|
// Restrict allocation to all registers except ESP|RSP.
|
|
vreg = cc()->getVirtRegById(m->getIndexId());
|
|
if (!vreg->isFixed()) {
|
|
// TODO: AVX vector operands support.
|
|
RA_MERGE(vreg, tied, 0, gaRegs[X86Reg::kKindGp] & gpAllowedMask);
|
|
tied->allocableRegs &= indexMask;
|
|
tied->flags |= TiedReg::kRReg;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
node->setFlags(flags);
|
|
if (tiedTotal) {
|
|
// Handle instructions which result in zeros/ones or nop if used with the
|
|
// same destination and source operand.
|
|
if (tiedTotal == 1 && opCount >= 2 && opArray[0].isVirtReg() && opArray[1].isVirtReg() && !node->hasMemOp())
|
|
X86RAPass_prepareSingleVarInst(instId, &agTmp[0]);
|
|
}
|
|
|
|
// Turn on AVX if the instruction operates on XMM|YMM|ZMM registers and uses VEX|EVEX prefix.
|
|
if (tiedCount.getVec() && commonData.hasFlag(X86Inst::kFlagVex | X86Inst::kFlagEvex))
|
|
_avxEnabled = true;
|
|
}
|
|
|
|
const RegOnly& extraReg = node->getExtraReg();
|
|
if (extraReg.isValid()) {
|
|
uint32_t id = extraReg.getId();
|
|
if (cc()->isVirtRegValid(id)) {
|
|
VirtReg* vreg = cc()->getVirtRegById(id);
|
|
TiedReg* tied;
|
|
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
|
|
|
|
if (options & (X86Inst::kOptionRep | X86Inst::kOptionRepnz)) {
|
|
tied->allocableRegs = Utils::mask(X86Gp::kIdCx);
|
|
tied->flags |= TiedReg::kXReg;
|
|
}
|
|
else {
|
|
tied->flags |= TiedReg::kRReg;
|
|
}
|
|
}
|
|
}
|
|
|
|
RA_FINALIZE(node_);
|
|
|
|
// Handle conditional/unconditional jump.
|
|
if (node->isJmpOrJcc()) {
|
|
CBJump* jNode = static_cast<CBJump*>(node);
|
|
CBLabel* jTarget = jNode->getTarget();
|
|
|
|
// If this jump is unconditional we put next node to unreachable node
|
|
// list so we can eliminate possible dead code. We have to do this in
|
|
// all cases since we are unable to translate without fetch() step.
|
|
//
|
|
// We also advance our node pointer to the target node to simulate
|
|
// natural flow of the function.
|
|
if (jNode->isJmp()) {
|
|
if (next && !next->hasPassData())
|
|
ASMJIT_PROPAGATE(addUnreachableNode(next));
|
|
|
|
// Jump not followed.
|
|
if (!jTarget) {
|
|
ASMJIT_PROPAGATE(addReturningNode(jNode));
|
|
goto _NextGroup;
|
|
}
|
|
|
|
node_ = jTarget;
|
|
goto _Do;
|
|
}
|
|
else {
|
|
// Jump not followed.
|
|
if (!jTarget) break;
|
|
|
|
if (jTarget->hasPassData()) {
|
|
uint32_t jTargetPosition = jTarget->getPosition();
|
|
|
|
// Update CBNode::kFlagIsTaken to true if this is a conditional
|
|
// backward jump. This behavior can be overridden by using
|
|
// `X86Inst::kOptionTaken` when the instruction is created.
|
|
if (!jNode->isTaken() && opCount == 1 && jTargetPosition <= position) {
|
|
jNode->_flags |= CBNode::kFlagIsTaken;
|
|
}
|
|
}
|
|
else if (next->hasPassData()) {
|
|
node_ = jTarget;
|
|
goto _Do;
|
|
}
|
|
else {
|
|
ASMJIT_PROPAGATE(addJccNode(jNode));
|
|
node_ = X86RAPass_getJccFlow(jNode);
|
|
goto _Do;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Func-Entry]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeFunc: {
|
|
ASMJIT_ASSERT(node_ == func);
|
|
X86RAPass_assignStackArgsRegId(this, func);
|
|
|
|
FuncDetail& fd = func->getDetail();
|
|
TiedReg* tied;
|
|
|
|
RA_DECLARE();
|
|
cc()->setCursor(node_);
|
|
|
|
X86Gp saReg;
|
|
uint32_t argCount = fd.getArgCount();
|
|
|
|
for (uint32_t i = 0; i < argCount; i++) {
|
|
const FuncDetail::Value& arg = fd.getArg(i);
|
|
|
|
VirtReg* vReg = func->getArg(i);
|
|
if (!vReg) continue;
|
|
|
|
// Overlapped function arguments.
|
|
if (vReg->_tied)
|
|
return DebugUtils::errored(kErrorOverlappedRegs);
|
|
|
|
uint32_t aKind = X86Reg::kindOf(arg.getRegType());
|
|
uint32_t vKind = vReg->getKind();
|
|
|
|
if (arg.byReg()) {
|
|
if (aKind == vKind) {
|
|
RA_INSERT(vReg, tied, TiedReg::kWReg, 0);
|
|
tied->setOutPhysId(arg.getRegId());
|
|
}
|
|
else {
|
|
X86Reg rTmp = cc()->newReg(arg.getTypeId(), "arg%u", i);
|
|
VirtReg* vTmp = cc()->getVirtReg(rTmp);
|
|
|
|
RA_INSERT(vTmp, tied, TiedReg::kWReg, 0);
|
|
tied->setOutPhysId(arg.getRegId());
|
|
|
|
X86Reg dstReg(X86Reg::fromSignature(vReg->getSignature(), vReg->getId()));
|
|
X86Reg srcReg(X86Reg::fromSignature(vTmp->getSignature(), vTmp->getId()));
|
|
|
|
// Emit conversion after the prolog.
|
|
return X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(cc()),
|
|
dstReg, vReg->getTypeId(),
|
|
srcReg, vTmp->getTypeId(), _avxEnabled);
|
|
}
|
|
}
|
|
else {
|
|
// Instead of complicating the prolog allocation we create a virtual
|
|
// register that holds the base address to all arguments passed by
|
|
// stack and then insert nodes that copy these arguments to registers.
|
|
if (!saReg.isValid()) {
|
|
saReg = cc()->newGpz("__args");
|
|
if (!saReg.isValid()) goto NoMem;
|
|
|
|
VirtReg* saBase = cc()->getVirtReg(saReg);
|
|
RA_INSERT(saBase, tied, TiedReg::kWReg, 0);
|
|
|
|
if (func->getFrameInfo().hasPreservedFP())
|
|
saBase->_isFixed = true;
|
|
tied->setOutPhysId(func->getFrameInfo().getStackArgsRegId());
|
|
}
|
|
|
|
// Argument passed by stack is handled after the prolog.
|
|
X86Gp aReg = X86Gp::fromSignature(vReg->getSignature(), vReg->getId());
|
|
X86Mem aMem = x86::ptr(saReg, arg.getStackOffset());
|
|
aMem.setArgHome();
|
|
|
|
ASMJIT_PROPAGATE(
|
|
X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(cc()),
|
|
aReg, vReg->getTypeId(), aMem, arg.getTypeId(), _avxEnabled));
|
|
}
|
|
}
|
|
|
|
// If saReg is not needed, clear it also from FuncFrameInfo.
|
|
if (!saReg.isValid())
|
|
func->getFrameInfo().setStackArgsRegId(Globals::kInvalidRegId);
|
|
|
|
RA_FINALIZE(node_);
|
|
next = node_->getNext();
|
|
break;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [End]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeSentinel: {
|
|
RA_POPULATE(node_);
|
|
ASMJIT_PROPAGATE(addReturningNode(node_));
|
|
goto _NextGroup;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Func-Exit]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeFuncExit: {
|
|
CCFuncRet* node = static_cast<CCFuncRet*>(node_);
|
|
ASMJIT_PROPAGATE(addReturningNode(node));
|
|
|
|
FuncDetail& fd = func->getDetail();
|
|
RA_DECLARE();
|
|
|
|
if (fd.hasRet()) {
|
|
const FuncDetail::Value& ret = fd.getRet(0);
|
|
uint32_t retKind = X86Reg::kindOf(ret.getRegType());
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
Operand_* op = &node->_ret[i];
|
|
if (op->isVirtReg()) {
|
|
VirtReg* vreg = cc()->getVirtRegById(op->getId());
|
|
TiedReg* tied;
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
|
|
if (retKind == vreg->getKind()) {
|
|
tied->flags |= TiedReg::kRReg;
|
|
tied->inRegs = Utils::mask(ret.getRegId());
|
|
inRegs.or_(retKind, tied->inRegs);
|
|
}
|
|
else if (retKind == X86Reg::kKindFp) {
|
|
uint32_t fldFlag = ret.getTypeId() == TypeId::kF32 ? TiedReg::kX86Fld4 : TiedReg::kX86Fld8;
|
|
tied->flags |= TiedReg::kRMem | fldFlag;
|
|
}
|
|
else {
|
|
// TODO: Fix possible other return type conversions.
|
|
ASMJIT_NOT_REACHED();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
RA_FINALIZE(node_);
|
|
|
|
if (!next->hasPassData())
|
|
ASMJIT_PROPAGATE(addUnreachableNode(next));
|
|
goto _NextGroup;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// [Func-Call]
|
|
// ----------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeFuncCall: {
|
|
CCFuncCall* node = static_cast<CCFuncCall*>(node_);
|
|
FuncDetail& fd = node->getDetail();
|
|
|
|
Operand_* target = node->_opArray;
|
|
Operand_* args = node->_args;
|
|
Operand_* rets = node->_ret;
|
|
|
|
func->getFrameInfo().enableCalls();
|
|
func->getFrameInfo().mergeCallFrameSize(fd.getArgStackSize());
|
|
// TODO: Each function frame should also define its stack arguments' alignment.
|
|
// func->getFrameInfo().mergeCallFrameAlignment();
|
|
|
|
uint32_t i;
|
|
uint32_t argCount = fd.getArgCount();
|
|
uint32_t sArgCount = 0;
|
|
uint32_t gpAllocableMask = gaRegs[X86Reg::kKindGp] & ~node->getDetail().getUsedRegs(X86Reg::kKindGp);
|
|
|
|
VirtReg* vreg;
|
|
TiedReg* tied;
|
|
|
|
RA_DECLARE();
|
|
|
|
// Function-call operand.
|
|
if (target->isVirtReg()) {
|
|
vreg = cc()->getVirtRegById(target->getId());
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
|
|
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
|
|
if (tied->inRegs == 0)
|
|
tied->allocableRegs |= gpAllocableMask;
|
|
}
|
|
else if (target->isMem()) {
|
|
X86Mem* m = static_cast<X86Mem*>(target);
|
|
|
|
if (m->hasBaseReg() && Operand::isPackedId(m->getBaseId())) {
|
|
vreg = cc()->getVirtRegById(m->getBaseId());
|
|
if (!vreg->isStack()) {
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
if (m->isRegHome()) {
|
|
tied->flags |= TiedReg::kRMem | TiedReg::kRCall;
|
|
}
|
|
else {
|
|
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
|
|
if (tied->inRegs == 0)
|
|
tied->allocableRegs |= gpAllocableMask;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m->hasIndexReg() && Operand::isPackedId(m->getIndexId())) {
|
|
// Restrict allocation to all registers except ESP/RSP.
|
|
vreg = cc()->getVirtRegById(m->getIndexId());
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
|
|
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
|
|
if ((tied->inRegs & ~indexMask) == 0)
|
|
tied->allocableRegs &= gpAllocableMask & indexMask;
|
|
}
|
|
}
|
|
|
|
// Function-call arguments.
|
|
for (i = 0; i < argCount; i++) {
|
|
Operand_* op = &args[i];
|
|
if (!op->isVirtReg()) continue;
|
|
|
|
vreg = cc()->getVirtRegById(op->getId());
|
|
const FuncDetail::Value& arg = fd.getArg(i);
|
|
|
|
if (arg.byReg()) {
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
|
|
uint32_t argClass = X86Reg::kindOf(arg.getRegType());
|
|
|
|
if (vreg->getKind() == argClass) {
|
|
tied->inRegs |= Utils::mask(arg.getRegId());
|
|
tied->flags |= TiedReg::kRReg | TiedReg::kRFunc;
|
|
}
|
|
else {
|
|
// TODO: Function-call argument conversion.
|
|
}
|
|
}
|
|
// If this is a stack-based argument we insert CCPushArg instead of
|
|
// using TiedReg. It improves the code, because the argument can be
|
|
// moved onto stack as soon as it is ready and the register used by
|
|
// the variable can be reused for something else. It is also much
|
|
// easier to handle argument conversions, because there will be at
|
|
// most only one node per conversion.
|
|
else {
|
|
if (X86RAPass_insertPushArg(this, node, vreg, gaRegs, arg, i, sArgList, sArgCount) != kErrorOk)
|
|
goto NoMem;
|
|
}
|
|
}
|
|
|
|
// Function-call returns.
|
|
for (i = 0; i < 2; i++) {
|
|
Operand_* op = &rets[i];
|
|
if (!op->isVirtReg()) continue;
|
|
|
|
const FuncDetail::Value& ret = fd.getRet(i);
|
|
if (ret.byReg()) {
|
|
uint32_t retKind = X86Reg::kindOf(ret.getRegType());
|
|
|
|
vreg = cc()->getVirtRegById(op->getId());
|
|
RA_MERGE(vreg, tied, 0, 0);
|
|
|
|
if (vreg->getKind() == retKind) {
|
|
tied->setOutPhysId(ret.getRegId());
|
|
tied->flags |= TiedReg::kWReg | TiedReg::kWFunc;
|
|
}
|
|
else {
|
|
// TODO: Function-call return value conversion.
|
|
}
|
|
}
|
|
}
|
|
|
|
// Init clobbered.
|
|
clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (fd.getPassedRegs(X86Reg::kKindGp ) | ~fd.getPreservedRegs(X86Reg::kKindGp )));
|
|
clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (fd.getPassedRegs(X86Reg::kKindMm ) | ~fd.getPreservedRegs(X86Reg::kKindMm )));
|
|
clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (fd.getPassedRegs(X86Reg::kKindK ) | ~fd.getPreservedRegs(X86Reg::kKindK )));
|
|
clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (fd.getPassedRegs(X86Reg::kKindVec) | ~fd.getPreservedRegs(X86Reg::kKindVec)));
|
|
|
|
RA_FINALIZE(node_);
|
|
break;
|
|
}
|
|
}
|
|
|
|
node_ = next;
|
|
} while (node_ != stop);
|
|
|
|
_Done:
|
|
// Mark exit label and end node as fetched, otherwise they can be removed by
|
|
// `removeUnreachableCode()`, which could lead to a crash in some later step.
|
|
node_ = func->getEnd();
|
|
if (!node_->hasPassData()) {
|
|
CBLabel* fExit = func->getExitNode();
|
|
RA_POPULATE(fExit);
|
|
fExit->setPosition(++position);
|
|
|
|
RA_POPULATE(node_);
|
|
node_->setPosition(++position);
|
|
}
|
|
return kErrorOk;
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Failure]
|
|
// --------------------------------------------------------------------------
|
|
|
|
NoMem:
|
|
return DebugUtils::errored(kErrorNoHeapMemory);
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Annotate]
|
|
// ============================================================================
|
|
|
|
Error X86RAPass::annotate() {
|
|
#if !defined(ASMJIT_DISABLE_LOGGING)
|
|
CCFunc* func = getFunc();
|
|
|
|
CBNode* node_ = func;
|
|
CBNode* end = func->getEnd();
|
|
|
|
Zone& dataZone = cc()->_cbDataZone;
|
|
StringBuilderTmp<256> sb;
|
|
|
|
uint32_t maxLen = 0;
|
|
while (node_ && node_ != end) {
|
|
if (!node_->hasInlineComment()) {
|
|
if (node_->getType() == CBNode::kNodeInst) {
|
|
CBInst* node = static_cast<CBInst*>(node_);
|
|
Logging::formatInstruction(
|
|
sb,
|
|
0,
|
|
cc(),
|
|
cc()->getArchType(),
|
|
node->getInstDetail(), node->getOpArray(), node->getOpCount());
|
|
|
|
node_->setInlineComment(
|
|
static_cast<char*>(dataZone.dup(sb.getData(), sb.getLength(), true)));
|
|
maxLen = std::max<uint32_t>(maxLen, static_cast<uint32_t>(sb.getLength()));
|
|
|
|
sb.clear();
|
|
}
|
|
}
|
|
|
|
node_ = node_->getNext();
|
|
}
|
|
_annotationLength = maxLen + 1;
|
|
#endif // !ASMJIT_DISABLE_LOGGING
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86BaseAlloc]
|
|
// ============================================================================
|
|
|
|
struct X86BaseAlloc {
|
|
// --------------------------------------------------------------------------
|
|
// [Construction / Destruction]
|
|
// --------------------------------------------------------------------------
|
|
|
|
ASMJIT_INLINE X86BaseAlloc(X86RAPass* context) {
|
|
_context = context;
|
|
_cc = context->cc();
|
|
}
|
|
ASMJIT_INLINE ~X86BaseAlloc() {}
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Accessors]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! Get the context.
|
|
ASMJIT_INLINE X86RAPass* getContext() const { return _context; }
|
|
//! Get the current state (always the same instance as X86RAPass::_x86State).
|
|
ASMJIT_INLINE X86RAState* getState() const { return _context->getState(); }
|
|
|
|
//! Get the node.
|
|
ASMJIT_INLINE CBNode* getNode() const { return _node; }
|
|
|
|
//! Get TiedReg list (all).
|
|
ASMJIT_INLINE TiedReg* getTiedArray() const { return _tiedArray[0]; }
|
|
//! Get TiedReg list (per class).
|
|
ASMJIT_INLINE TiedReg* getTiedArrayByKind(uint32_t kind) const { return _tiedArray[kind]; }
|
|
|
|
//! Get TiedReg count (all).
|
|
ASMJIT_INLINE uint32_t getTiedCount() const { return _tiedTotal; }
|
|
//! Get TiedReg count (per class).
|
|
ASMJIT_INLINE uint32_t getTiedCountByKind(uint32_t kind) const { return _tiedCount.get(kind); }
|
|
|
|
//! Get if all variables of the given register `kind` are done.
|
|
ASMJIT_INLINE bool isTiedDone(uint32_t kind) const { return _tiedDone.get(kind) == _tiedCount.get(kind); }
|
|
|
|
//! Get how many variables have been allocated.
|
|
ASMJIT_INLINE uint32_t getTiedDone(uint32_t kind) const { return _tiedDone.get(kind); }
|
|
//! Add to the count of variables allocated.
|
|
ASMJIT_INLINE void addTiedDone(uint32_t kind, uint32_t n = 1) { _tiedDone.add(kind, n); }
|
|
|
|
//! Get number of allocable registers per class.
|
|
ASMJIT_INLINE uint32_t getGaRegs(uint32_t kind) const {
|
|
return _context->_gaRegs[kind];
|
|
}
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Init / Cleanup]
|
|
// --------------------------------------------------------------------------
|
|
|
|
protected:
|
|
// Just to prevent calling these methods by X86RAPass::translate().
|
|
ASMJIT_INLINE void init(CBNode* node, X86RAData* map);
|
|
ASMJIT_INLINE void cleanup();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Unuse]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void unuseBefore();
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void unuseAfter();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Members]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! RA context.
|
|
X86RAPass* _context;
|
|
//! Compiler.
|
|
X86Compiler* _cc;
|
|
|
|
//! Node.
|
|
CBNode* _node;
|
|
|
|
//! Register allocator (RA) data.
|
|
X86RAData* _raData;
|
|
//! TiedReg list (per register kind).
|
|
TiedReg* _tiedArray[Globals::kMaxVRegKinds];
|
|
|
|
//! Count of all TiedReg's.
|
|
uint32_t _tiedTotal;
|
|
|
|
//! TiedReg's total counter.
|
|
X86RegCount _tiedCount;
|
|
//! TiedReg's done counter.
|
|
X86RegCount _tiedDone;
|
|
};
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86BaseAlloc - Init / Cleanup]
|
|
// ============================================================================
|
|
|
|
ASMJIT_INLINE void X86BaseAlloc::init(CBNode* node, X86RAData* raData) {
|
|
_node = node;
|
|
_raData = raData;
|
|
|
|
// We have to set the correct cursor in case any instruction is emitted
|
|
// during the allocation phase; it has to be emitted before the current
|
|
// instruction.
|
|
_cc->_setCursor(node->getPrev());
|
|
|
|
// Setup the lists of variables.
|
|
{
|
|
TiedReg* tied = raData->getTiedArray();
|
|
_tiedArray[X86Reg::kKindGp ] = tied;
|
|
_tiedArray[X86Reg::kKindMm ] = tied + raData->getTiedStart(X86Reg::kKindMm );
|
|
_tiedArray[X86Reg::kKindK ] = tied + raData->getTiedStart(X86Reg::kKindK );
|
|
_tiedArray[X86Reg::kKindVec] = tied + raData->getTiedStart(X86Reg::kKindVec);
|
|
}
|
|
|
|
// Setup counters.
|
|
_tiedTotal = raData->tiedTotal;
|
|
_tiedCount = raData->tiedCount;
|
|
_tiedDone.reset();
|
|
|
|
// Connect VREG->TIED.
|
|
for (uint32_t i = 0; i < _tiedTotal; i++) {
|
|
TiedReg* tied = &_tiedArray[0][i];
|
|
VirtReg* vreg = tied->vreg;
|
|
vreg->_tied = tied;
|
|
}
|
|
}
|
|
|
|
ASMJIT_INLINE void X86BaseAlloc::cleanup() {
|
|
// Disconnect VREG->TIED.
|
|
for (uint32_t i = 0; i < _tiedTotal; i++) {
|
|
TiedReg* tied = &_tiedArray[0][i];
|
|
VirtReg* vreg = tied->vreg;
|
|
vreg->_tied = nullptr;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86BaseAlloc - Unuse]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86BaseAlloc::unuseBefore() {
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
const uint32_t checkFlags = TiedReg::kXReg |
|
|
TiedReg::kRMem |
|
|
TiedReg::kRFunc |
|
|
TiedReg::kRCall ;
|
|
|
|
for (uint32_t i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
if ((tied->flags & checkFlags) == TiedReg::kWReg)
|
|
_context->unuse<C>(tied->vreg);
|
|
}
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86BaseAlloc::unuseAfter() {
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
for (uint32_t i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
if (tied->flags & TiedReg::kUnuse)
|
|
_context->unuse<C>(tied->vreg);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
//!
|
|
//! Register allocator context (asm instructions).
|
|
struct X86VarAlloc : public X86BaseAlloc {
|
|
// --------------------------------------------------------------------------
|
|
// [Construction / Destruction]
|
|
// --------------------------------------------------------------------------
|
|
|
|
ASMJIT_INLINE X86VarAlloc(X86RAPass* context) : X86BaseAlloc(context) {}
|
|
ASMJIT_INLINE ~X86VarAlloc() {}
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Run]
|
|
// --------------------------------------------------------------------------
|
|
|
|
Error run(CBNode* node);
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Init / Cleanup]
|
|
// --------------------------------------------------------------------------
|
|
|
|
protected:
|
|
// Just to prevent calling these methods by X86RAPass::translate().
|
|
ASMJIT_INLINE void init(CBNode* node, X86RAData* map);
|
|
ASMJIT_INLINE void cleanup();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Plan / Spill / Alloc]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void plan();
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void spill();
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void alloc();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [GuessAlloc / GuessSpill]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! Guess which register is the best candidate for `vreg` from `allocableRegs`.
|
|
//!
|
|
//! The guess is based on looking ahead and inspecting register allocator
|
|
//! instructions. The main reason is to prevent allocation to a register
|
|
//! which is needed by next instruction(s). The guess look tries to go as far
|
|
//! as possible, after the remaining registers are zero, the mask of previous
|
|
//! registers (called 'safeRegs') is returned.
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs);
|
|
|
|
//! Guess whether to move the given `vreg` instead of spill.
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs);
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Modified]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void modified();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Members]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! Will alloc to these registers.
|
|
X86RegMask _willAlloc;
|
|
//! Will spill these registers.
|
|
X86RegMask _willSpill;
|
|
};
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc - Run]
|
|
// ============================================================================
|
|
|
|
Error X86VarAlloc::run(CBNode* node_) {
|
|
// Initialize.
|
|
X86RAData* raData = node_->getPassData<X86RAData>();
|
|
// Initialize the allocator; connect Vd->Va.
|
|
init(node_, raData);
|
|
|
|
if (raData->tiedTotal != 0) {
|
|
// Unuse overwritten variables.
|
|
unuseBefore<X86Reg::kKindGp>();
|
|
unuseBefore<X86Reg::kKindMm>();
|
|
unuseBefore<X86Reg::kKindVec>();
|
|
|
|
// Plan the allocation. Planner assigns input/output registers for each
|
|
// variable and decides whether to allocate it in register or stack.
|
|
plan<X86Reg::kKindGp>();
|
|
plan<X86Reg::kKindMm>();
|
|
plan<X86Reg::kKindVec>();
|
|
|
|
// Spill all variables marked by plan().
|
|
spill<X86Reg::kKindGp>();
|
|
spill<X86Reg::kKindMm>();
|
|
spill<X86Reg::kKindVec>();
|
|
|
|
// Alloc all variables marked by plan().
|
|
alloc<X86Reg::kKindGp>();
|
|
alloc<X86Reg::kKindMm>();
|
|
alloc<X86Reg::kKindVec>();
|
|
|
|
// Translate node operands.
|
|
if (node_->getType() == CBNode::kNodeInst) {
|
|
CBInst* node = static_cast<CBInst*>(node_);
|
|
if (node->hasExtraReg()) {
|
|
Reg reg = node->getExtraReg().toReg<Reg>();
|
|
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, ®, 1));
|
|
node->setExtraReg(reg);
|
|
}
|
|
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount()));
|
|
}
|
|
else if (node_->getType() == CBNode::kNodePushArg) {
|
|
CCPushArg* node = static_cast<CCPushArg*>(node_);
|
|
|
|
CCFuncCall* call = static_cast<CCFuncCall*>(node->getCall());
|
|
FuncDetail& fd = call->getDetail();
|
|
|
|
uint32_t argIndex = 0;
|
|
uint32_t argMask = node->_args;
|
|
|
|
VirtReg* cvtReg = node->getCvtReg();
|
|
VirtReg* srcReg = node->getSrcReg();
|
|
|
|
// Convert first.
|
|
ASMJIT_ASSERT(srcReg->getPhysId() != Globals::kInvalidRegId);
|
|
|
|
if (cvtReg) {
|
|
ASMJIT_ASSERT(cvtReg->getPhysId() != Globals::kInvalidRegId);
|
|
|
|
X86Reg dstOp(X86Reg::fromSignature(cvtReg->getSignature(), cvtReg->getId()));
|
|
X86Reg srcOp(X86Reg::fromSignature(srcReg->getSignature(), srcReg->getId()));
|
|
|
|
// Emit conversion after the prolog.
|
|
X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(_context->cc()),
|
|
dstOp, cvtReg->getTypeId(),
|
|
srcOp, srcReg->getTypeId(), _context->_avxEnabled);
|
|
srcReg = cvtReg;
|
|
}
|
|
|
|
while (argMask != 0) {
|
|
if (argMask & 0x1) {
|
|
FuncDetail::Value& arg = fd.getArg(argIndex);
|
|
ASMJIT_ASSERT(arg.byStack());
|
|
|
|
X86Mem dst = x86::ptr(_context->_zsp, -static_cast<int>(_context->getGpSize()) + arg.getStackOffset());
|
|
_context->emitRegToStack(arg.getTypeId(), &dst, srcReg->getTypeId(), srcReg->getPhysId());
|
|
}
|
|
|
|
argIndex++;
|
|
argMask >>= 1;
|
|
}
|
|
}
|
|
|
|
// Mark variables as modified.
|
|
modified<X86Reg::kKindGp>();
|
|
modified<X86Reg::kKindMm>();
|
|
modified<X86Reg::kKindVec>();
|
|
|
|
// Cleanup; disconnect Vd->Va.
|
|
cleanup();
|
|
|
|
// Update clobbered mask.
|
|
_context->_clobberedRegs.or_(_willAlloc);
|
|
}
|
|
|
|
// Update clobbered mask.
|
|
_context->_clobberedRegs.or_(raData->clobberedRegs);
|
|
|
|
// Unuse.
|
|
if (raData->tiedTotal != 0) {
|
|
unuseAfter<X86Reg::kKindGp>();
|
|
unuseAfter<X86Reg::kKindMm>();
|
|
unuseAfter<X86Reg::kKindVec>();
|
|
}
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc - Init / Cleanup]
|
|
// ============================================================================
|
|
|
|
ASMJIT_INLINE void X86VarAlloc::init(CBNode* node, X86RAData* raData) {
|
|
X86BaseAlloc::init(node, raData);
|
|
|
|
// These will block planner from assigning them during planning. Planner will
|
|
// add more registers when assigning registers to variables that don't need
|
|
// any specific register.
|
|
_willAlloc = raData->inRegs;
|
|
_willAlloc.or_(raData->outRegs);
|
|
_willSpill.reset();
|
|
}
|
|
|
|
ASMJIT_INLINE void X86VarAlloc::cleanup() {
|
|
X86BaseAlloc::cleanup();
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc - Plan / Spill / Alloc]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86VarAlloc::plan() {
|
|
if (isTiedDone(C)) return;
|
|
|
|
uint32_t i;
|
|
uint32_t willAlloc = _willAlloc.get(C);
|
|
uint32_t willFree = 0;
|
|
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
X86RAState* state = getState();
|
|
|
|
// Calculate 'willAlloc' and 'willFree' masks based on mandatory masks.
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
uint32_t vaFlags = tied->flags;
|
|
uint32_t physId = vreg->getPhysId();
|
|
uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0;
|
|
|
|
if ((vaFlags & TiedReg::kXReg) != 0) {
|
|
// Planning register allocation. First check whether the variable is
|
|
// already allocated in register and if it can stay allocated there.
|
|
//
|
|
// The following conditions may happen:
|
|
//
|
|
// a) Allocated register is one of the mandatoryRegs.
|
|
// b) Allocated register is one of the allocableRegs.
|
|
uint32_t mandatoryRegs = tied->inRegs;
|
|
uint32_t allocableRegs = tied->allocableRegs;
|
|
|
|
if (regMask != 0) {
|
|
// Special path for planning output-only registers.
|
|
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
|
|
uint32_t outPhysId = tied->outPhysId;
|
|
mandatoryRegs = (outPhysId != Globals::kInvalidRegId) ? Utils::mask(outPhysId) : 0;
|
|
|
|
if ((mandatoryRegs | allocableRegs) & regMask) {
|
|
tied->setOutPhysId(physId);
|
|
tied->flags |= TiedReg::kWDone;
|
|
|
|
if (mandatoryRegs & regMask) {
|
|
// Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's.
|
|
ASMJIT_ASSERT((willAlloc & regMask) != 0);
|
|
}
|
|
else {
|
|
// Case 'b'.
|
|
tied->setOutPhysId(physId);
|
|
willAlloc |= regMask;
|
|
}
|
|
|
|
addTiedDone(C);
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
if ((mandatoryRegs | allocableRegs) & regMask) {
|
|
tied->setInPhysId(physId);
|
|
tied->flags |= TiedReg::kRDone;
|
|
|
|
if (mandatoryRegs & regMask) {
|
|
// Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's.
|
|
ASMJIT_ASSERT((willAlloc & regMask) != 0);
|
|
}
|
|
else {
|
|
// Case 'b'.
|
|
tied->inRegs |= regMask;
|
|
willAlloc |= regMask;
|
|
}
|
|
|
|
addTiedDone(C);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Variable is not allocated or allocated in register that doesn't
|
|
// match inRegs or allocableRegs. The next step is to pick the best
|
|
// register for this variable. If `inRegs` contains any register the
|
|
// decision is simple - we have to follow, in other case will use
|
|
// the advantage of `guessAlloc()` to find a register (or registers)
|
|
// by looking ahead. But the best way to find a good register is not
|
|
// here since now we have no information about the registers that
|
|
// will be freed. So instead of finding register here, we just mark
|
|
// the current register (if variable is allocated) as `willFree` so
|
|
// the planner can use this information in the second step to plan the
|
|
// allocation as a whole.
|
|
willFree |= regMask;
|
|
continue;
|
|
}
|
|
else {
|
|
if (regMask != 0) {
|
|
willFree |= regMask;
|
|
continue;
|
|
}
|
|
else {
|
|
tied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Occupied registers without 'willFree' registers; contains basically
|
|
// all the registers we can use to allocate variables without inRegs
|
|
// specified.
|
|
uint32_t occupied = state->_occupied.get(C) & ~willFree;
|
|
uint32_t willSpill = 0;
|
|
|
|
// Find the best registers for variables that are not allocated yet.
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
uint32_t vaFlags = tied->flags;
|
|
|
|
if ((vaFlags & TiedReg::kXReg) != 0) {
|
|
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
|
|
if (vaFlags & TiedReg::kWDone)
|
|
continue;
|
|
|
|
// Skip all registers that have assigned outPhysId. Spill if occupied.
|
|
if (tied->hasOutPhysId()) {
|
|
uint32_t outRegs = Utils::mask(tied->outPhysId);
|
|
willSpill |= occupied & outRegs;
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
if (vaFlags & TiedReg::kRDone)
|
|
continue;
|
|
|
|
// We skip all registers that have assigned inPhysId, indicates that
|
|
// the register to allocate in is known.
|
|
if (tied->hasInPhysId()) {
|
|
uint32_t inRegs = tied->inRegs;
|
|
willSpill |= occupied & inRegs;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
uint32_t m = tied->inRegs;
|
|
if (tied->hasOutPhysId())
|
|
m |= Utils::mask(tied->outPhysId);
|
|
|
|
m = tied->allocableRegs & ~(willAlloc ^ m);
|
|
m = guessAlloc<C>(vreg, m);
|
|
ASMJIT_ASSERT(m != 0);
|
|
|
|
uint32_t candidateRegs = m & ~occupied;
|
|
uint32_t homeMask = vreg->getHomeMask();
|
|
|
|
uint32_t physId;
|
|
uint32_t regMask;
|
|
|
|
if (candidateRegs == 0) {
|
|
candidateRegs = m & occupied & ~state->_modified.get(C);
|
|
if (candidateRegs == 0)
|
|
candidateRegs = m;
|
|
}
|
|
if (candidateRegs & homeMask) candidateRegs &= homeMask;
|
|
|
|
physId = Utils::findFirstBit(candidateRegs);
|
|
regMask = Utils::mask(physId);
|
|
|
|
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
|
|
tied->setOutPhysId(physId);
|
|
}
|
|
else {
|
|
tied->setInPhysId(physId);
|
|
tied->inRegs = regMask;
|
|
}
|
|
|
|
willAlloc |= regMask;
|
|
willSpill |= regMask & occupied;
|
|
willFree &=~regMask;
|
|
occupied |= regMask;
|
|
|
|
continue;
|
|
}
|
|
else if ((vaFlags & TiedReg::kXMem) != 0) {
|
|
uint32_t physId = vreg->getPhysId();
|
|
if (physId != Globals::kInvalidRegId && (vaFlags & TiedReg::kXMem) != TiedReg::kWMem) {
|
|
willSpill |= Utils::mask(physId);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set calculated masks back to the allocator; needed by spill() and alloc().
|
|
_willSpill.set(C, willSpill);
|
|
_willAlloc.set(C, willAlloc);
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86VarAlloc::spill() {
|
|
uint32_t m = _willSpill.get(C);
|
|
uint32_t i = static_cast<uint32_t>(0) - 1;
|
|
if (m == 0) return;
|
|
|
|
X86RAState* state = getState();
|
|
VirtReg** vregs = state->getListByKind(C);
|
|
|
|
// Available registers for decision if move has any benefit over spill.
|
|
uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C));
|
|
|
|
do {
|
|
// We always advance one more to destroy the bit that we have found.
|
|
uint32_t bitIndex = Utils::findFirstBit(m) + 1;
|
|
|
|
i += bitIndex;
|
|
m >>= bitIndex;
|
|
|
|
VirtReg* vreg = vregs[i];
|
|
ASMJIT_ASSERT(vreg);
|
|
|
|
TiedReg* tied = vreg->_tied;
|
|
ASMJIT_ASSERT(!tied || (tied->flags & TiedReg::kXReg) == 0);
|
|
|
|
if (vreg->isModified() && availableRegs) {
|
|
// Don't check for alternatives if the variable has to be spilled.
|
|
if (!tied || (tied->flags & TiedReg::kSpill) == 0) {
|
|
uint32_t altRegs = guessSpill<C>(vreg, availableRegs);
|
|
|
|
if (altRegs != 0) {
|
|
uint32_t physId = Utils::findFirstBit(altRegs);
|
|
uint32_t regMask = Utils::mask(physId);
|
|
|
|
_context->move<C>(vreg, physId);
|
|
availableRegs ^= regMask;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
_context->spill<C>(vreg);
|
|
} while (m != 0);
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86VarAlloc::alloc() {
|
|
if (isTiedDone(C)) return;
|
|
|
|
uint32_t i;
|
|
bool didWork;
|
|
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
// Alloc `in` regs.
|
|
do {
|
|
didWork = false;
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* aTied = &tiedArray[i];
|
|
VirtReg* aVReg = aTied->vreg;
|
|
|
|
if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg)
|
|
continue;
|
|
|
|
uint32_t aPhysId = aVReg->getPhysId();
|
|
uint32_t bPhysId = aTied->inPhysId;
|
|
|
|
// Shouldn't be the same.
|
|
ASMJIT_ASSERT(aPhysId != bPhysId);
|
|
|
|
VirtReg* bVReg = getState()->getListByKind(C)[bPhysId];
|
|
if (bVReg) {
|
|
// Gp registers only - Swap two registers if we can solve two
|
|
// allocation tasks by a single 'xchg' instruction, swapping
|
|
// two registers required by the instruction/node or one register
|
|
// required with another non-required.
|
|
// Uses xor swap for Vec registers.
|
|
if ((C == X86Reg::kKindGp || C == X86Reg::kKindVec) && aPhysId != Globals::kInvalidRegId) {
|
|
TiedReg* bTied = bVReg->_tied;
|
|
if (C == X86Reg::kKindGp)
|
|
_context->swapGp(aVReg, bVReg);
|
|
else
|
|
_context->swapVec(aVReg, bVReg);
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
// Double-hit, two registers allocated by a single xchg.
|
|
if (bTied && bTied->inPhysId == aPhysId) {
|
|
bTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
}
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
else if (aPhysId != Globals::kInvalidRegId) {
|
|
_context->move<C>(aVReg, bPhysId);
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
else {
|
|
_context->alloc<C>(aVReg, bPhysId);
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
} while (didWork);
|
|
|
|
// Alloc 'out' regs.
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
if ((tied->flags & (TiedReg::kXReg | TiedReg::kWDone)) != TiedReg::kWReg)
|
|
continue;
|
|
|
|
uint32_t physId = tied->outPhysId;
|
|
ASMJIT_ASSERT(physId != Globals::kInvalidRegId);
|
|
|
|
if (vreg->getPhysId() != physId) {
|
|
ASMJIT_ASSERT(getState()->getListByKind(C)[physId] == nullptr);
|
|
_context->attach<C>(vreg, physId, false);
|
|
}
|
|
|
|
tied->flags |= TiedReg::kWDone;
|
|
addTiedDone(C);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc - GuessAlloc / GuessSpill]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t X86VarAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) {
|
|
ASMJIT_ASSERT(allocableRegs != 0);
|
|
|
|
// Stop now if there is only one bit (register) set in `allocableRegs` mask.
|
|
if (Utils::isPowerOf2(allocableRegs)) return allocableRegs;
|
|
|
|
uint32_t raId = vreg->_raId;
|
|
uint32_t safeRegs = allocableRegs;
|
|
|
|
uint32_t i;
|
|
uint32_t maxLookAhead = kCompilerDefaultLookAhead;
|
|
|
|
// Look ahead and calculate mask of special registers on both - input/output.
|
|
CBNode* node = _node;
|
|
for (i = 0; i < maxLookAhead; i++) {
|
|
X86RAData* raData = node->getPassData<X86RAData>();
|
|
RABits* liveness = raData ? raData->liveness : static_cast<RABits*>(nullptr);
|
|
|
|
// If the variable becomes dead it doesn't make sense to continue.
|
|
if (liveness && !liveness->getBit(raId)) break;
|
|
|
|
// Stop on `CBSentinel` and `CCFuncRet`.
|
|
if (node->hasFlag(CBNode::kFlagIsRet)) break;
|
|
|
|
// Stop on conditional jump, we don't follow them.
|
|
if (node->hasFlag(CBNode::kFlagIsJcc)) break;
|
|
|
|
// Advance on non-conditional jump.
|
|
if (node->hasFlag(CBNode::kFlagIsJmp)) {
|
|
node = static_cast<CBJump*>(node)->getTarget();
|
|
// Stop on jump that is not followed.
|
|
if (!node) break;
|
|
}
|
|
|
|
node = node->getNext();
|
|
ASMJIT_ASSERT(node != nullptr);
|
|
|
|
raData = node->getPassData<X86RAData>();
|
|
if (raData) {
|
|
TiedReg* tied = raData->findTiedByKind(C, vreg);
|
|
uint32_t mask;
|
|
|
|
if (tied) {
|
|
// If the variable is overwritten it doesn't make sense to continue.
|
|
if ((tied->flags & TiedReg::kRAll) == 0)
|
|
break;
|
|
|
|
mask = tied->allocableRegs;
|
|
if (mask != 0) {
|
|
allocableRegs &= mask;
|
|
if (allocableRegs == 0) break;
|
|
safeRegs = allocableRegs;
|
|
}
|
|
|
|
mask = tied->inRegs;
|
|
if (mask != 0) {
|
|
allocableRegs &= mask;
|
|
if (allocableRegs == 0) break;
|
|
safeRegs = allocableRegs;
|
|
break;
|
|
}
|
|
|
|
allocableRegs &= ~(raData->outRegs.get(C) | raData->clobberedRegs.get(C));
|
|
if (allocableRegs == 0) break;
|
|
}
|
|
else {
|
|
allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C));
|
|
if (allocableRegs == 0) break;
|
|
}
|
|
|
|
safeRegs = allocableRegs;
|
|
}
|
|
}
|
|
|
|
return safeRegs;
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t X86VarAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) {
|
|
ASMJIT_ASSERT(allocableRegs != 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86VarAlloc - Modified]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86VarAlloc::modified() {
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
for (uint32_t i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
|
|
if (tied->flags & TiedReg::kWReg) {
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
uint32_t physId = vreg->getPhysId();
|
|
uint32_t regMask = Utils::mask(physId);
|
|
|
|
vreg->setModified(true);
|
|
_context->_x86State._modified.or_(C, regMask);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
//!
|
|
//! Register allocator context (function call).
|
|
struct X86CallAlloc : public X86BaseAlloc {
|
|
// --------------------------------------------------------------------------
|
|
// [Construction / Destruction]
|
|
// --------------------------------------------------------------------------
|
|
|
|
ASMJIT_INLINE X86CallAlloc(X86RAPass* context) : X86BaseAlloc(context) {}
|
|
ASMJIT_INLINE ~X86CallAlloc() {}
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Accessors]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! Get the node.
|
|
ASMJIT_INLINE CCFuncCall* getNode() const { return static_cast<CCFuncCall*>(_node); }
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Run]
|
|
// --------------------------------------------------------------------------
|
|
|
|
Error run(CCFuncCall* node);
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Init / Cleanup]
|
|
// --------------------------------------------------------------------------
|
|
|
|
protected:
|
|
// Just to prevent calling these methods from X86RAPass::translate().
|
|
ASMJIT_INLINE void init(CCFuncCall* node, X86RAData* raData);
|
|
ASMJIT_INLINE void cleanup();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Plan / Alloc / Spill / Move]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void plan();
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void spill();
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void alloc();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [AllocImmsOnStack]
|
|
// --------------------------------------------------------------------------
|
|
|
|
ASMJIT_INLINE void allocImmsOnStack();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Duplicate]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void duplicate();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [GuessAlloc / GuessSpill]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs);
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs);
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Save]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void save();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Clobber]
|
|
// --------------------------------------------------------------------------
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void clobber();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Ret]
|
|
// --------------------------------------------------------------------------
|
|
|
|
ASMJIT_INLINE void ret();
|
|
|
|
// --------------------------------------------------------------------------
|
|
// [Members]
|
|
// --------------------------------------------------------------------------
|
|
|
|
//! Will alloc to these registers.
|
|
X86RegMask _willAlloc;
|
|
//! Will spill these registers.
|
|
X86RegMask _willSpill;
|
|
};
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Run]
|
|
// ============================================================================
|
|
|
|
Error X86CallAlloc::run(CCFuncCall* node) {
|
|
// Initialize the allocator; prepare basics and connect Vd->Va.
|
|
X86RAData* raData = node->getPassData<X86RAData>();
|
|
init(node, raData);
|
|
|
|
// Plan register allocation. Planner is only able to assign one register per
|
|
// variable. If any variable is used multiple times it will be handled later.
|
|
plan<X86Reg::kKindGp >();
|
|
plan<X86Reg::kKindMm >();
|
|
plan<X86Reg::kKindVec>();
|
|
|
|
// Spill.
|
|
spill<X86Reg::kKindGp >();
|
|
spill<X86Reg::kKindMm >();
|
|
spill<X86Reg::kKindVec>();
|
|
|
|
// Alloc.
|
|
alloc<X86Reg::kKindGp >();
|
|
alloc<X86Reg::kKindMm >();
|
|
alloc<X86Reg::kKindVec>();
|
|
|
|
// Unuse clobbered registers that are not used to pass function arguments and
|
|
// save variables used to pass function arguments that will be reused later on.
|
|
save<X86Reg::kKindGp >();
|
|
save<X86Reg::kKindMm >();
|
|
save<X86Reg::kKindVec>();
|
|
|
|
// Allocate immediates in registers and on the stack.
|
|
allocImmsOnStack();
|
|
|
|
// Duplicate.
|
|
duplicate<X86Reg::kKindGp >();
|
|
duplicate<X86Reg::kKindMm >();
|
|
duplicate<X86Reg::kKindVec>();
|
|
|
|
// Translate call operand.
|
|
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount()));
|
|
|
|
// To emit instructions after call.
|
|
_cc->_setCursor(node);
|
|
|
|
// If the callee pops stack it has to be manually adjusted back.
|
|
FuncDetail& fd = node->getDetail();
|
|
if (fd.hasFlag(CallConv::kFlagCalleePopsStack) && fd.getArgStackSize() != 0)
|
|
_cc->emit(X86Inst::kIdSub, _context->_zsp, static_cast<int>(fd.getArgStackSize()));
|
|
|
|
// Clobber.
|
|
clobber<X86Reg::kKindGp >();
|
|
clobber<X86Reg::kKindMm >();
|
|
clobber<X86Reg::kKindVec>();
|
|
|
|
// Return.
|
|
ret();
|
|
|
|
// Unuse.
|
|
unuseAfter<X86Reg::kKindGp >();
|
|
unuseAfter<X86Reg::kKindMm >();
|
|
unuseAfter<X86Reg::kKindVec>();
|
|
|
|
// Cleanup; disconnect Vd->Va.
|
|
cleanup();
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Init / Cleanup]
|
|
// ============================================================================
|
|
|
|
ASMJIT_INLINE void X86CallAlloc::init(CCFuncCall* node, X86RAData* raData) {
|
|
X86BaseAlloc::init(node, raData);
|
|
|
|
// Create mask of all registers that will be used to pass function arguments.
|
|
_willAlloc.reset();
|
|
_willAlloc.set(X86Reg::kKindGp , node->getDetail().getUsedRegs(X86Reg::kKindGp ));
|
|
_willAlloc.set(X86Reg::kKindMm , node->getDetail().getUsedRegs(X86Reg::kKindMm ));
|
|
_willAlloc.set(X86Reg::kKindK , node->getDetail().getUsedRegs(X86Reg::kKindK ));
|
|
_willAlloc.set(X86Reg::kKindVec, node->getDetail().getUsedRegs(X86Reg::kKindVec));
|
|
_willSpill.reset();
|
|
}
|
|
|
|
ASMJIT_INLINE void X86CallAlloc::cleanup() {
|
|
X86BaseAlloc::cleanup();
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Plan / Spill / Alloc]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::plan() {
|
|
uint32_t i;
|
|
uint32_t clobbered = _raData->clobberedRegs.get(C);
|
|
|
|
uint32_t willAlloc = _willAlloc.get(C);
|
|
uint32_t willFree = clobbered & ~willAlloc;
|
|
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
X86RAState* state = getState();
|
|
|
|
// Calculate 'willAlloc' and 'willFree' masks based on mandatory masks.
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
uint32_t vaFlags = tied->flags;
|
|
uint32_t physId = vreg->getPhysId();
|
|
uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0;
|
|
|
|
if ((vaFlags & TiedReg::kRReg) != 0) {
|
|
// Planning register allocation. First check whether the variable is
|
|
// already allocated in register and if it can stay there. Function
|
|
// arguments are passed either in a specific register or in stack so
|
|
// we care mostly of mandatory registers.
|
|
uint32_t inRegs = tied->inRegs;
|
|
|
|
if (inRegs == 0) {
|
|
inRegs = tied->allocableRegs;
|
|
}
|
|
|
|
// Optimize situation where the variable has to be allocated in a
|
|
// mandatory register, but it's already allocated in register that
|
|
// is not clobbered (i.e. it will survive function call).
|
|
if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & TiedReg::kUnuse) == 0)) {
|
|
tied->setInPhysId(physId);
|
|
tied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
}
|
|
else {
|
|
willFree |= regMask;
|
|
}
|
|
}
|
|
else {
|
|
// Memory access - if variable is allocated it has to be freed.
|
|
if (regMask != 0) {
|
|
willFree |= regMask;
|
|
}
|
|
else {
|
|
tied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Occupied registers without 'willFree' registers; contains basically
|
|
// all the registers we can use to allocate variables without inRegs
|
|
// speficied.
|
|
uint32_t occupied = state->_occupied.get(C) & ~willFree;
|
|
uint32_t willSpill = 0;
|
|
|
|
// Find the best registers for variables that are not allocated yet. Only
|
|
// useful for Gp registers used as call operand.
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
uint32_t vaFlags = tied->flags;
|
|
if ((vaFlags & TiedReg::kRDone) != 0 || (vaFlags & TiedReg::kRReg) == 0)
|
|
continue;
|
|
|
|
// All registers except Gp used by call itself must have inPhysId.
|
|
uint32_t m = tied->inRegs;
|
|
if (C != X86Reg::kKindGp || m) {
|
|
ASMJIT_ASSERT(m != 0);
|
|
tied->setInPhysId(Utils::findFirstBit(m));
|
|
willSpill |= occupied & m;
|
|
continue;
|
|
}
|
|
|
|
m = tied->allocableRegs & ~(willAlloc ^ m);
|
|
m = guessAlloc<C>(vreg, m);
|
|
ASMJIT_ASSERT(m != 0);
|
|
|
|
uint32_t candidateRegs = m & ~occupied;
|
|
if (candidateRegs == 0) {
|
|
candidateRegs = m & occupied & ~state->_modified.get(C);
|
|
if (candidateRegs == 0)
|
|
candidateRegs = m;
|
|
}
|
|
|
|
if (!(vaFlags & (TiedReg::kWReg | TiedReg::kUnuse)) && (candidateRegs & ~clobbered))
|
|
candidateRegs &= ~clobbered;
|
|
|
|
uint32_t physId = Utils::findFirstBit(candidateRegs);
|
|
uint32_t regMask = Utils::mask(physId);
|
|
|
|
tied->setInPhysId(physId);
|
|
tied->inRegs = regMask;
|
|
|
|
willAlloc |= regMask;
|
|
willSpill |= regMask & occupied;
|
|
willFree &= ~regMask;
|
|
|
|
occupied |= regMask;
|
|
continue;
|
|
}
|
|
|
|
// Set calculated masks back to the allocator; needed by spill() and alloc().
|
|
_willSpill.set(C, willSpill);
|
|
_willAlloc.set(C, willAlloc);
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::spill() {
|
|
uint32_t m = _willSpill.get(C);
|
|
uint32_t i = static_cast<uint32_t>(0) - 1;
|
|
|
|
if (m == 0)
|
|
return;
|
|
|
|
X86RAState* state = getState();
|
|
VirtReg** sVars = state->getListByKind(C);
|
|
|
|
// Available registers for decision if move has any benefit over spill.
|
|
uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C));
|
|
|
|
do {
|
|
// We always advance one more to destroy the bit that we have found.
|
|
uint32_t bitIndex = Utils::findFirstBit(m) + 1;
|
|
|
|
i += bitIndex;
|
|
m >>= bitIndex;
|
|
|
|
VirtReg* vreg = sVars[i];
|
|
ASMJIT_ASSERT(vreg && !vreg->_tied);
|
|
|
|
if (vreg->isModified() && availableRegs) {
|
|
uint32_t available = guessSpill<C>(vreg, availableRegs);
|
|
if (available != 0) {
|
|
uint32_t physId = Utils::findFirstBit(available);
|
|
uint32_t regMask = Utils::mask(physId);
|
|
|
|
_context->move<C>(vreg, physId);
|
|
availableRegs ^= regMask;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
_context->spill<C>(vreg);
|
|
} while (m != 0);
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::alloc() {
|
|
if (isTiedDone(C)) return;
|
|
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
uint32_t i;
|
|
bool didWork;
|
|
|
|
do {
|
|
didWork = false;
|
|
for (i = 0; i < tiedCount; i++) {
|
|
TiedReg* aTied = &tiedArray[i];
|
|
VirtReg* aVReg = aTied->vreg;
|
|
if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg) continue;
|
|
|
|
uint32_t sPhysId = aVReg->getPhysId();
|
|
uint32_t bPhysId = aTied->inPhysId;
|
|
|
|
// Shouldn't be the same.
|
|
ASMJIT_ASSERT(sPhysId != bPhysId);
|
|
|
|
VirtReg* bVReg = getState()->getListByKind(C)[bPhysId];
|
|
if (bVReg) {
|
|
TiedReg* bTied = bVReg->_tied;
|
|
|
|
// GP registers only - Swap two registers if we can solve two
|
|
// allocation tasks by a single 'xchg' instruction, swapping
|
|
// two registers required by the instruction/node or one register
|
|
// required with another non-required.
|
|
if ((C == X86Reg::kKindGp || C == X86Reg::kKindVec) && sPhysId != Globals::kInvalidRegId) {
|
|
if (C == X86Reg::kKindGp)
|
|
_context->swapGp(aVReg, bVReg);
|
|
else
|
|
_context->swapVec(aVReg, bVReg);
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
// Double-hit, two registers allocated by a single swap.
|
|
if (bTied && bTied->inPhysId == sPhysId) {
|
|
bTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
}
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
else if (sPhysId != Globals::kInvalidRegId) {
|
|
_context->move<C>(aVReg, bPhysId);
|
|
_context->_clobberedRegs.or_(C, Utils::mask(bPhysId));
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
else {
|
|
_context->alloc<C>(aVReg, bPhysId);
|
|
_context->_clobberedRegs.or_(C, Utils::mask(bPhysId));
|
|
|
|
aTied->flags |= TiedReg::kRDone;
|
|
addTiedDone(C);
|
|
|
|
didWork = true;
|
|
continue;
|
|
}
|
|
}
|
|
} while (didWork);
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - AllocImmsOnStack]
|
|
// ============================================================================
|
|
|
|
ASMJIT_INLINE void X86CallAlloc::allocImmsOnStack() {
|
|
CCFuncCall* node = getNode();
|
|
FuncDetail& fd = node->getDetail();
|
|
|
|
uint32_t argCount = fd.getArgCount();
|
|
Operand_* args = node->_args;
|
|
|
|
for (uint32_t i = 0; i < argCount; i++) {
|
|
Operand_& op = args[i];
|
|
if (!op.isImm()) continue;
|
|
|
|
const Imm& imm = static_cast<const Imm&>(op);
|
|
const FuncDetail::Value& arg = fd.getArg(i);
|
|
uint32_t varType = arg.getTypeId();
|
|
|
|
if (arg.byReg()) {
|
|
_context->emitImmToReg(varType, arg.getRegId(), &imm);
|
|
}
|
|
else {
|
|
X86Mem dst = x86::ptr(_context->_zsp, -static_cast<int>(_context->getGpSize()) + arg.getStackOffset());
|
|
_context->emitImmToStack(varType, &dst, &imm);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Duplicate]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::duplicate() {
|
|
TiedReg* tiedArray = getTiedArrayByKind(C);
|
|
uint32_t tiedCount = getTiedCountByKind(C);
|
|
|
|
for (uint32_t i = 0; i < tiedCount; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
if ((tied->flags & TiedReg::kRReg) == 0) continue;
|
|
|
|
uint32_t inRegs = tied->inRegs;
|
|
if (!inRegs) continue;
|
|
|
|
VirtReg* vreg = tied->vreg;
|
|
uint32_t physId = vreg->getPhysId();
|
|
|
|
ASMJIT_ASSERT(physId != Globals::kInvalidRegId);
|
|
|
|
inRegs &= ~Utils::mask(physId);
|
|
if (!inRegs) continue;
|
|
|
|
for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) {
|
|
if (inRegs & 0x1) {
|
|
_context->emitMove(vreg, dupIndex, physId, "Duplicate");
|
|
_context->_clobberedRegs.or_(C, Utils::mask(dupIndex));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - GuessAlloc / GuessSpill]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t X86CallAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) {
|
|
ASMJIT_ASSERT(allocableRegs != 0);
|
|
|
|
// Stop now if there is only one bit (register) set in 'allocableRegs' mask.
|
|
if (Utils::isPowerOf2(allocableRegs))
|
|
return allocableRegs;
|
|
|
|
uint32_t i;
|
|
uint32_t safeRegs = allocableRegs;
|
|
uint32_t maxLookAhead = kCompilerDefaultLookAhead;
|
|
|
|
// Look ahead and calculate mask of special registers on both - input/output.
|
|
CBNode* node = _node;
|
|
for (i = 0; i < maxLookAhead; i++) {
|
|
// Stop on `CCFuncRet` and `CBSentinel`.
|
|
if (node->hasFlag(CBNode::kFlagIsRet))
|
|
break;
|
|
|
|
// Stop on conditional jump, we don't follow them.
|
|
if (node->hasFlag(CBNode::kFlagIsJcc))
|
|
break;
|
|
|
|
// Advance on non-conditional jump.
|
|
if (node->hasFlag(CBNode::kFlagIsJmp)) {
|
|
node = static_cast<CBJump*>(node)->getTarget();
|
|
// Stop on jump that is not followed.
|
|
if (!node) break;
|
|
}
|
|
|
|
node = node->getNext();
|
|
ASMJIT_ASSERT(node != nullptr);
|
|
|
|
X86RAData* raData = node->getPassData<X86RAData>();
|
|
if (raData) {
|
|
TiedReg* tied = raData->findTiedByKind(C, vreg);
|
|
if (tied) {
|
|
uint32_t inRegs = tied->inRegs;
|
|
if (inRegs != 0) {
|
|
safeRegs = allocableRegs;
|
|
allocableRegs &= inRegs;
|
|
|
|
if (allocableRegs == 0)
|
|
goto _UseSafeRegs;
|
|
else
|
|
return allocableRegs;
|
|
}
|
|
}
|
|
|
|
safeRegs = allocableRegs;
|
|
allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C));
|
|
|
|
if (allocableRegs == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
_UseSafeRegs:
|
|
return safeRegs;
|
|
}
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE uint32_t X86CallAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) {
|
|
ASMJIT_ASSERT(allocableRegs != 0);
|
|
return 0;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Save]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::save() {
|
|
X86RAState* state = getState();
|
|
VirtReg** sVars = state->getListByKind(C);
|
|
|
|
uint32_t i;
|
|
uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C) & state->_modified.get(C);
|
|
|
|
for (i = 0; affected != 0; i++, affected >>= 1) {
|
|
if (affected & 0x1) {
|
|
VirtReg* vreg = sVars[i];
|
|
ASMJIT_ASSERT(vreg != nullptr);
|
|
ASMJIT_ASSERT(vreg->isModified());
|
|
|
|
TiedReg* tied = vreg->_tied;
|
|
if (!tied || (tied->flags & (TiedReg::kWReg | TiedReg::kUnuse)) == 0)
|
|
_context->save<C>(vreg);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Clobber]
|
|
// ============================================================================
|
|
|
|
template<int C>
|
|
ASMJIT_INLINE void X86CallAlloc::clobber() {
|
|
X86RAState* state = getState();
|
|
VirtReg** sVars = state->getListByKind(C);
|
|
|
|
uint32_t i;
|
|
uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C);
|
|
|
|
for (i = 0; affected != 0; i++, affected >>= 1) {
|
|
if (affected & 0x1) {
|
|
VirtReg* vreg = sVars[i];
|
|
ASMJIT_ASSERT(vreg != nullptr);
|
|
|
|
TiedReg* tied = vreg->_tied;
|
|
uint32_t vdState = VirtReg::kStateNone;
|
|
|
|
if (!vreg->isModified() || (tied && (tied->flags & (TiedReg::kWAll | TiedReg::kUnuse)) != 0))
|
|
vdState = VirtReg::kStateMem;
|
|
_context->unuse<C>(vreg, vdState);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86CallAlloc - Ret]
|
|
// ============================================================================
|
|
|
|
ASMJIT_INLINE void X86CallAlloc::ret() {
|
|
CCFuncCall* node = getNode();
|
|
FuncDetail& fd = node->getDetail();
|
|
Operand_* rets = node->_ret;
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
const FuncDetail::Value& ret = fd.getRet(i);
|
|
Operand_* op = &rets[i];
|
|
|
|
if (!ret.byReg() || !op->isVirtReg())
|
|
continue;
|
|
|
|
VirtReg* vreg = _cc->getVirtRegById(op->getId());
|
|
uint32_t regId = ret.getRegId();
|
|
|
|
switch (vreg->getKind()) {
|
|
case X86Reg::kKindGp:
|
|
_context->unuse<X86Reg::kKindGp>(vreg);
|
|
_context->attach<X86Reg::kKindGp>(vreg, regId, true);
|
|
break;
|
|
|
|
case X86Reg::kKindMm:
|
|
_context->unuse<X86Reg::kKindMm>(vreg);
|
|
_context->attach<X86Reg::kKindMm>(vreg, regId, true);
|
|
break;
|
|
|
|
case X86Reg::kKindVec:
|
|
if (X86Reg::kindOf(ret.getRegType()) == X86Reg::kKindVec) {
|
|
_context->unuse<X86Reg::kKindVec>(vreg);
|
|
_context->attach<X86Reg::kKindVec>(vreg, regId, true);
|
|
}
|
|
else {
|
|
uint32_t elementId = TypeId::elementOf(vreg->getTypeId());
|
|
uint32_t size = (elementId == TypeId::kF32) ? 4 : 8;
|
|
|
|
X86Mem m = _context->getVarMem(vreg);
|
|
m.setSize(size);
|
|
|
|
_context->unuse<X86Reg::kKindVec>(vreg, VirtReg::kStateMem);
|
|
_cc->fstp(m);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - TranslateOperands]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount) {
|
|
X86Compiler* cc = self->cc();
|
|
|
|
// Translate variables into registers.
|
|
for (uint32_t i = 0; i < opCount; i++) {
|
|
Operand_* op = &opArray[i];
|
|
if (op->isVirtReg()) {
|
|
VirtReg* vreg = cc->getVirtRegById(op->getId());
|
|
ASMJIT_ASSERT(vreg != nullptr);
|
|
ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId);
|
|
op->_reg.id = vreg->getPhysId();
|
|
}
|
|
else if (op->isMem()) {
|
|
X86Mem* m = static_cast<X86Mem*>(op);
|
|
|
|
if (m->hasBaseReg() && cc->isVirtRegValid(m->getBaseId())) {
|
|
VirtReg* vreg = cc->getVirtRegById(m->getBaseId());
|
|
|
|
if (m->isRegHome()) {
|
|
self->getVarCell(vreg);
|
|
}
|
|
else {
|
|
ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId);
|
|
op->_mem.base = vreg->getPhysId();
|
|
}
|
|
}
|
|
|
|
if (m->hasIndexReg() && cc->isVirtRegValid(m->getIndexId())) {
|
|
VirtReg* vreg = cc->getVirtRegById(m->getIndexId());
|
|
op->_mem.index = vreg->getPhysId();
|
|
}
|
|
}
|
|
}
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - TranslatePrologEpilog]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
static Error X86RAPass_prepareFuncFrame(X86RAPass* self, CCFunc* func) {
|
|
FuncFrameInfo& ffi = func->getFrameInfo();
|
|
|
|
X86RegMask& clobberedRegs = self->_clobberedRegs;
|
|
|
|
// Initialize dirty registers.
|
|
ffi.setDirtyRegs(X86Reg::kKindGp , clobberedRegs.get(X86Reg::kKindGp ));
|
|
ffi.setDirtyRegs(X86Reg::kKindMm , clobberedRegs.get(X86Reg::kKindMm ));
|
|
ffi.setDirtyRegs(X86Reg::kKindK , clobberedRegs.get(X86Reg::kKindK ));
|
|
ffi.setDirtyRegs(X86Reg::kKindVec, clobberedRegs.get(X86Reg::kKindVec));
|
|
|
|
// Initialize stack size & alignment.
|
|
ffi.setStackFrameSize(self->_memAllTotal);
|
|
ffi.setStackFrameAlignment(self->_memMaxAlign);
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
//! \internal
|
|
static Error X86RAPass_patchFuncMem(X86RAPass* self, CCFunc* func, CBNode* stop, FuncFrameLayout& layout) {
|
|
X86Compiler* cc = self->cc();
|
|
CBNode* node = func;
|
|
|
|
do {
|
|
if (node->getType() == CBNode::kNodeInst) {
|
|
CBInst* iNode = static_cast<CBInst*>(node);
|
|
|
|
if (iNode->hasMemOp()) {
|
|
X86Mem* m = iNode->getMemOp<X86Mem>();
|
|
|
|
if (m->isArgHome()) {
|
|
m->addOffsetLo32(layout.getStackArgsOffset());
|
|
m->clearArgHome();
|
|
}
|
|
|
|
if (m->isRegHome() && Operand::isPackedId(m->getBaseId())) {
|
|
VirtReg* vreg = cc->getVirtRegById(m->getBaseId());
|
|
ASMJIT_ASSERT(vreg != nullptr);
|
|
|
|
RACell* cell = vreg->getMemCell();
|
|
ASMJIT_ASSERT(cell != nullptr);
|
|
|
|
m->_setBase(cc->_nativeGpReg.getType(), self->_varBaseRegId);
|
|
m->addOffsetLo32(self->_varBaseOffset + cell->offset);
|
|
m->clearRegHome();
|
|
}
|
|
}
|
|
}
|
|
|
|
node = node->getNext();
|
|
} while (node != stop);
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Translate - Jump]
|
|
// ============================================================================
|
|
|
|
//! \internal
|
|
static void X86RAPass_translateJump(X86RAPass* self, CBJump* jNode, CBLabel* jTarget) {
|
|
X86Compiler* cc = self->cc();
|
|
|
|
CBNode* injectRef = self->getFunc()->getEnd()->getPrev();
|
|
CBNode* prevCursor = cc->setCursor(injectRef);
|
|
|
|
self->switchState(jTarget->getPassData<RAData>()->state);
|
|
|
|
// Any code necessary to `switchState()` will be added at the end of the function.
|
|
if (cc->getCursor() != injectRef) {
|
|
// TODO: Can fail.
|
|
CBLabel* injectLabel = cc->newLabelNode();
|
|
|
|
// Add the jump to the target.
|
|
cc->jmp(jTarget->getLabel());
|
|
|
|
// Inject the label.
|
|
cc->_setCursor(injectRef);
|
|
cc->addNode(injectLabel);
|
|
|
|
// Finally, patch `jNode` target.
|
|
ASMJIT_ASSERT(jNode->getOpCount() > 0);
|
|
jNode->_opArray[jNode->getOpCount() - 1] = injectLabel->getLabel();
|
|
jNode->_target = injectLabel;
|
|
// If we injected any code it may not satisfy short form anymore.
|
|
jNode->delOptions(X86Inst::kOptionShortForm);
|
|
}
|
|
|
|
cc->_setCursor(prevCursor);
|
|
self->loadState(jNode->getPassData<RAData>()->state);
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Translate - Ret]
|
|
// ============================================================================
|
|
|
|
static Error X86RAPass_translateRet(X86RAPass* self, CCFuncRet* rNode, CBLabel* exitTarget) {
|
|
X86Compiler* cc = self->cc();
|
|
CBNode* node = rNode->getNext();
|
|
|
|
// 32-bit mode requires to push floating point return value(s), handle it
|
|
// here as it's a special case.
|
|
X86RAData* raData = rNode->getPassData<X86RAData>();
|
|
if (raData) {
|
|
TiedReg* tiedArray = raData->tiedArray;
|
|
uint32_t tiedTotal = raData->tiedTotal;
|
|
|
|
for (uint32_t i = 0; i < tiedTotal; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
if (tied->flags & (TiedReg::kX86Fld4 | TiedReg::kX86Fld8)) {
|
|
VirtReg* vreg = tied->vreg;
|
|
X86Mem m(self->getVarMem(vreg));
|
|
|
|
uint32_t elementId = TypeId::elementOf(vreg->getTypeId());
|
|
m.setSize(elementId == TypeId::kF32 ? 4 :
|
|
elementId == TypeId::kF64 ? 8 :
|
|
(tied->flags & TiedReg::kX86Fld4) ? 4 : 8);
|
|
|
|
cc->fld(m);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Decide whether to `jmp` or not in case we are next to the return label.
|
|
while (node) {
|
|
switch (node->getType()) {
|
|
// If we have found an exit label we just return, there is no need to
|
|
// emit jump to that.
|
|
case CBNode::kNodeLabel:
|
|
if (static_cast<CBLabel*>(node) == exitTarget)
|
|
return kErrorOk;
|
|
goto _EmitRet;
|
|
|
|
case CBNode::kNodeData:
|
|
case CBNode::kNodeInst:
|
|
case CBNode::kNodeFuncCall:
|
|
case CBNode::kNodeFuncExit:
|
|
goto _EmitRet;
|
|
|
|
// Continue iterating.
|
|
case CBNode::kNodeComment:
|
|
case CBNode::kNodeAlign:
|
|
case CBNode::kNodeHint:
|
|
break;
|
|
|
|
// Invalid node to be here.
|
|
case CBNode::kNodeFunc:
|
|
return DebugUtils::errored(kErrorInvalidState);
|
|
|
|
// We can't go forward from here.
|
|
case CBNode::kNodeSentinel:
|
|
return kErrorOk;
|
|
}
|
|
|
|
node = node->getNext();
|
|
}
|
|
|
|
_EmitRet:
|
|
{
|
|
cc->_setCursor(rNode);
|
|
cc->jmp(exitTarget->getLabel());
|
|
}
|
|
return kErrorOk;
|
|
}
|
|
|
|
// ============================================================================
|
|
// [asmjit::X86RAPass - Translate - Func]
|
|
// ============================================================================
|
|
|
|
Error X86RAPass::translate() {
|
|
X86Compiler* cc = this->cc();
|
|
CCFunc* func = getFunc();
|
|
|
|
// Register allocator contexts.
|
|
X86VarAlloc vAlloc(this);
|
|
X86CallAlloc cAlloc(this);
|
|
|
|
// Flow.
|
|
CBNode* node_ = func;
|
|
CBNode* next = nullptr;
|
|
CBNode* stop = getStop();
|
|
|
|
ZoneList<CBNode*>::Link* jLink = _jccList.getFirst();
|
|
|
|
for (;;) {
|
|
while (node_->isTranslated()) {
|
|
// Switch state if we went to a node that is already translated.
|
|
if (node_->getType() == CBNode::kNodeLabel) {
|
|
CBLabel* node = static_cast<CBLabel*>(node_);
|
|
cc->_setCursor(node->getPrev());
|
|
switchState(node->getPassData<RAData>()->state);
|
|
}
|
|
|
|
_NextGroup:
|
|
if (!jLink) {
|
|
goto _Done;
|
|
}
|
|
else {
|
|
node_ = jLink->getValue();
|
|
jLink = jLink->getNext();
|
|
|
|
CBNode* jFlow = X86RAPass_getOppositeJccFlow(static_cast<CBJump*>(node_));
|
|
loadState(node_->getPassData<RAData>()->state);
|
|
|
|
if (jFlow->hasPassData() && jFlow->getPassData<RAData>()->state) {
|
|
X86RAPass_translateJump(this, static_cast<CBJump*>(node_), static_cast<CBLabel*>(jFlow));
|
|
|
|
node_ = jFlow;
|
|
if (node_->isTranslated())
|
|
goto _NextGroup;
|
|
}
|
|
else {
|
|
node_ = jFlow;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
next = node_->getNext();
|
|
node_->_flags |= CBNode::kFlagIsTranslated;
|
|
|
|
if (node_->hasPassData()) {
|
|
switch (node_->getType()) {
|
|
// --------------------------------------------------------------------
|
|
// [Align / Embed]
|
|
// --------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeAlign:
|
|
case CBNode::kNodeData:
|
|
break;
|
|
|
|
// --------------------------------------------------------------------
|
|
// [Label]
|
|
// --------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeLabel: {
|
|
CBLabel* node = static_cast<CBLabel*>(node_);
|
|
ASMJIT_ASSERT(node->getPassData<RAData>()->state == nullptr);
|
|
node->getPassData<RAData>()->state = saveState();
|
|
|
|
if (node == func->getExitNode())
|
|
goto _NextGroup;
|
|
break;
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
// [Inst/Call/SArg/Ret]
|
|
// --------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeInst:
|
|
case CBNode::kNodeFunc:
|
|
case CBNode::kNodeFuncCall:
|
|
case CBNode::kNodePushArg:
|
|
// Update TiedReg's unuse flags based on liveness of the next node.
|
|
if (!node_->isJcc()) {
|
|
X86RAData* raData = node_->getPassData<X86RAData>();
|
|
RABits* liveness;
|
|
|
|
if (raData && next && next->hasPassData() && (liveness = next->getPassData<RAData>()->liveness)) {
|
|
TiedReg* tiedArray = raData->tiedArray;
|
|
uint32_t tiedTotal = raData->tiedTotal;
|
|
|
|
for (uint32_t i = 0; i < tiedTotal; i++) {
|
|
TiedReg* tied = &tiedArray[i];
|
|
VirtReg* vreg = tied->vreg;
|
|
|
|
if (!liveness->getBit(vreg->_raId) && !vreg->isFixed())
|
|
tied->flags |= TiedReg::kUnuse;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (node_->getType() == CBNode::kNodeFuncCall) {
|
|
ASMJIT_PROPAGATE(cAlloc.run(static_cast<CCFuncCall*>(node_)));
|
|
break;
|
|
}
|
|
ASMJIT_FALLTHROUGH;
|
|
|
|
case CBNode::kNodeHint:
|
|
case CBNode::kNodeFuncExit: {
|
|
ASMJIT_PROPAGATE(vAlloc.run(node_));
|
|
|
|
// Handle conditional/unconditional jump.
|
|
if (node_->isJmpOrJcc()) {
|
|
CBJump* node = static_cast<CBJump*>(node_);
|
|
CBLabel* jTarget = node->getTarget();
|
|
|
|
// Target not followed.
|
|
if (!jTarget) {
|
|
if (node->isJmp())
|
|
goto _NextGroup;
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (node->isJmp()) {
|
|
if (jTarget->hasPassData() && jTarget->getPassData<RAData>()->state) {
|
|
cc->_setCursor(node->getPrev());
|
|
switchState(jTarget->getPassData<RAData>()->state);
|
|
|
|
goto _NextGroup;
|
|
}
|
|
else {
|
|
next = jTarget;
|
|
}
|
|
}
|
|
else {
|
|
CBNode* jNext = node->getNext();
|
|
|
|
if (jTarget->isTranslated()) {
|
|
if (jNext->isTranslated()) {
|
|
ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel);
|
|
cc->_setCursor(node->getPrev());
|
|
intersectStates(
|
|
jTarget->getPassData<RAData>()->state,
|
|
jNext->getPassData<RAData>()->state);
|
|
}
|
|
|
|
RAState* savedState = saveState();
|
|
node->getPassData<RAData>()->state = savedState;
|
|
|
|
X86RAPass_translateJump(this, node, jTarget);
|
|
next = jNext;
|
|
}
|
|
else if (jNext->isTranslated()) {
|
|
ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel);
|
|
|
|
RAState* savedState = saveState();
|
|
node->getPassData<RAData>()->state = savedState;
|
|
|
|
cc->_setCursor(node);
|
|
switchState(jNext->getPassData<RAData>()->state);
|
|
next = jTarget;
|
|
}
|
|
else {
|
|
node->getPassData<RAData>()->state = saveState();
|
|
next = X86RAPass_getJccFlow(node);
|
|
}
|
|
}
|
|
}
|
|
else if (node_->isRet()) {
|
|
ASMJIT_PROPAGATE(
|
|
X86RAPass_translateRet(this, static_cast<CCFuncRet*>(node_), func->getExitNode()));
|
|
goto _NextGroup;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
// [End]
|
|
// --------------------------------------------------------------------
|
|
|
|
case CBNode::kNodeSentinel: {
|
|
goto _NextGroup;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (next == stop)
|
|
goto _NextGroup;
|
|
node_ = next;
|
|
}
|
|
|
|
_Done:
|
|
{
|
|
ASMJIT_PROPAGATE(resolveCellOffsets());
|
|
ASMJIT_PROPAGATE(X86RAPass_prepareFuncFrame(this, func));
|
|
|
|
FuncFrameLayout layout;
|
|
ASMJIT_PROPAGATE(layout.init(func->getDetail(), func->getFrameInfo()));
|
|
|
|
_varBaseRegId = layout._stackBaseRegId;
|
|
_varBaseOffset = layout._stackBaseOffset;
|
|
|
|
ASMJIT_PROPAGATE(X86RAPass_patchFuncMem(this, func, stop, layout));
|
|
|
|
cc->_setCursor(func);
|
|
ASMJIT_PROPAGATE(FuncUtils::emitProlog(this->cc(), layout));
|
|
|
|
cc->_setCursor(func->getExitNode());
|
|
ASMJIT_PROPAGATE(FuncUtils::emitEpilog(this->cc(), layout));
|
|
}
|
|
|
|
return kErrorOk;
|
|
}
|
|
|
|
} // asmjit namespace
|
|
|
|
// [Api-End]
|
|
#include "../asmjit_apiend.h"
|
|
|
|
// [Guard]
|
|
#endif // ASMJIT_BUILD_X86 && !ASMJIT_DISABLE_COMPILER
|