raze-gles/libraries/asmjit/asmjit/x86/x86regalloc.cpp
2020-05-23 22:43:03 +02:00

4091 lines
133 KiB
C++

// [AsmJit]
// Complete x86/x64 JIT and Remote Assembler for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.
// [Export]
#define ASMJIT_EXPORTS
// [Guard]
#include "../asmjit_build.h"
#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_DISABLE_COMPILER)
// [Dependencies]
#include "../base/cpuinfo.h"
#include "../base/utils.h"
#include "../x86/x86assembler.h"
#include "../x86/x86compiler.h"
#include "../x86/x86internal_p.h"
#include "../x86/x86regalloc_p.h"
// [Api-Begin]
#include "../asmjit_apibegin.h"
namespace asmjit {
// ============================================================================
// [Forward Declarations]
// ============================================================================
enum { kCompilerDefaultLookAhead = 64 };
static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount);
// ============================================================================
// [asmjit::X86RAPass - SpecialInst]
// ============================================================================
struct X86SpecialInst {
uint8_t inReg;
uint8_t outReg;
uint16_t flags;
};
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opArray, uint32_t opCount) noexcept {
enum { kAny = Globals::kInvalidRegId };
#define R(ri) { uint8_t(ri) , uint8_t(kAny), uint16_t(TiedReg::kRReg) }
#define W(ri) { uint8_t(kAny), uint8_t(ri) , uint16_t(TiedReg::kWReg) }
#define X(ri) { uint8_t(ri) , uint8_t(ri) , uint16_t(TiedReg::kXReg) }
#define NONE() { uint8_t(kAny), uint8_t(kAny), 0 }
static const X86SpecialInst instCpuid[] = { X(X86Gp::kIdAx), W(X86Gp::kIdBx), X(X86Gp::kIdCx), W(X86Gp::kIdDx) };
static const X86SpecialInst instCbwCdqeCwde[] = { X(X86Gp::kIdAx) };
static const X86SpecialInst instCdqCwdCqo[] = { W(X86Gp::kIdDx), R(X86Gp::kIdAx) };
static const X86SpecialInst instCmpxchg[] = { X(kAny), R(kAny), X(X86Gp::kIdAx) };
static const X86SpecialInst instCmpxchg8b16b[] = { NONE(), X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(X86Gp::kIdCx), R(X86Gp::kIdBx) };
static const X86SpecialInst instDaaDas[] = { X(X86Gp::kIdAx) };
static const X86SpecialInst instDiv2[] = { X(X86Gp::kIdAx), R(kAny) };
static const X86SpecialInst instDiv3[] = { X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) };
static const X86SpecialInst instJecxz[] = { R(X86Gp::kIdCx) };
static const X86SpecialInst instMul2[] = { X(X86Gp::kIdAx), R(kAny) };
static const X86SpecialInst instMul3[] = { W(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) };
static const X86SpecialInst instMulx[] = { W(kAny), W(kAny), R(kAny), R(X86Gp::kIdDx) };
static const X86SpecialInst instLahf[] = { W(X86Gp::kIdAx) };
static const X86SpecialInst instSahf[] = { R(X86Gp::kIdAx) };
static const X86SpecialInst instMaskmovq[] = { R(kAny), R(kAny), R(X86Gp::kIdDi) };
static const X86SpecialInst instRdtscRdtscp[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), W(X86Gp::kIdCx) };
static const X86SpecialInst instRot[] = { X(kAny), R(X86Gp::kIdCx) };
static const X86SpecialInst instShldShrd[] = { X(kAny), R(kAny), R(X86Gp::kIdCx) };
static const X86SpecialInst instThirdXMM0[] = { W(kAny), R(kAny), R(0) };
static const X86SpecialInst instPcmpestri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx) };
static const X86SpecialInst instPcmpestrm[] = { R(kAny), R(kAny), NONE(), W(0) };
static const X86SpecialInst instPcmpistri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx), R(X86Gp::kIdAx), R(X86Gp::kIdDx) };
static const X86SpecialInst instPcmpistrm[] = { R(kAny), R(kAny), NONE(), W(0) , R(X86Gp::kIdAx), R(X86Gp::kIdDx) };
static const X86SpecialInst instXsaveXrstor[] = { W(kAny), R(X86Gp::kIdDx), R(X86Gp::kIdAx) };
static const X86SpecialInst instReadMR[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), R(X86Gp::kIdCx) };
static const X86SpecialInst instWriteMR[] = { R(X86Gp::kIdDx), R(X86Gp::kIdAx), R(X86Gp::kIdCx) };
static const X86SpecialInst instCmps[] = { X(X86Gp::kIdSi), X(X86Gp::kIdDi) };
static const X86SpecialInst instLods[] = { W(X86Gp::kIdAx), X(X86Gp::kIdSi) };
static const X86SpecialInst instMovs[] = { X(X86Gp::kIdDi), X(X86Gp::kIdSi) };
static const X86SpecialInst instScas[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) };
static const X86SpecialInst instStos[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) };
#undef NONE
#undef X
#undef W
#undef R
switch (instId) {
case X86Inst::kIdCpuid : return instCpuid;
case X86Inst::kIdCbw :
case X86Inst::kIdCdqe :
case X86Inst::kIdCwde : return instCbwCdqeCwde;
case X86Inst::kIdCdq :
case X86Inst::kIdCwd :
case X86Inst::kIdCqo : return instCdqCwdCqo;
case X86Inst::kIdCmps : return instCmps;
case X86Inst::kIdCmpxchg : return instCmpxchg;
case X86Inst::kIdCmpxchg8b :
case X86Inst::kIdCmpxchg16b : return instCmpxchg8b16b;
case X86Inst::kIdDaa :
case X86Inst::kIdDas : return instDaaDas;
case X86Inst::kIdDiv : return (opCount == 2) ? instDiv2 : instDiv3;
case X86Inst::kIdIdiv : return (opCount == 2) ? instDiv2 : instDiv3;
case X86Inst::kIdImul : if (opCount == 2) return nullptr;
if (opCount == 3 && !(opArray[0].isReg() && opArray[1].isReg() && opArray[2].isRegOrMem())) return nullptr;
ASMJIT_FALLTHROUGH;
case X86Inst::kIdMul : return (opCount == 2) ? instMul2 : instMul3;
case X86Inst::kIdMulx : return instMulx;
case X86Inst::kIdJecxz : return instJecxz;
case X86Inst::kIdLods : return instLods;
case X86Inst::kIdMovs : return instMovs;
case X86Inst::kIdLahf : return instLahf;
case X86Inst::kIdSahf : return instSahf;
case X86Inst::kIdMaskmovq :
case X86Inst::kIdMaskmovdqu :
case X86Inst::kIdVmaskmovdqu: return instMaskmovq;
case X86Inst::kIdEnter : return nullptr; // Not supported.
case X86Inst::kIdLeave : return nullptr; // Not supported.
case X86Inst::kIdRet : return nullptr; // Not supported.
case X86Inst::kIdMonitor : return nullptr; // TODO: [COMPILER] Monitor/MWait.
case X86Inst::kIdMwait : return nullptr; // TODO: [COMPILER] Monitor/MWait.
case X86Inst::kIdPop : return nullptr; // TODO: [COMPILER] Pop/Push.
case X86Inst::kIdPush : return nullptr; // TODO: [COMPILER] Pop/Push.
case X86Inst::kIdPopa : return nullptr; // Not supported.
case X86Inst::kIdPopf : return nullptr; // Not supported.
case X86Inst::kIdPusha : return nullptr; // Not supported.
case X86Inst::kIdPushf : return nullptr; // Not supported.
case X86Inst::kIdRcl :
case X86Inst::kIdRcr :
case X86Inst::kIdRol :
case X86Inst::kIdRor :
case X86Inst::kIdSal :
case X86Inst::kIdSar :
case X86Inst::kIdShl : // Rot instruction is special only if the last operand is a variable.
case X86Inst::kIdShr : if (!opArray[1].isReg()) return nullptr;
return instRot;
case X86Inst::kIdShld : // Shld/Shrd instruction is special only if the last operand is a variable.
case X86Inst::kIdShrd : if (!opArray[2].isReg()) return nullptr;
return instShldShrd;
case X86Inst::kIdRdtsc :
case X86Inst::kIdRdtscp : return instRdtscRdtscp;
case X86Inst::kIdScas : return instScas;
case X86Inst::kIdStos : return instStos;
case X86Inst::kIdBlendvpd :
case X86Inst::kIdBlendvps :
case X86Inst::kIdPblendvb :
case X86Inst::kIdSha256rnds2: return instThirdXMM0;
case X86Inst::kIdPcmpestri :
case X86Inst::kIdVpcmpestri : return instPcmpestri;
case X86Inst::kIdPcmpistri :
case X86Inst::kIdVpcmpistri : return instPcmpistri;
case X86Inst::kIdPcmpestrm :
case X86Inst::kIdVpcmpestrm : return instPcmpestrm;
case X86Inst::kIdPcmpistrm :
case X86Inst::kIdVpcmpistrm : return instPcmpistrm;
case X86Inst::kIdXrstor :
case X86Inst::kIdXrstor64 :
case X86Inst::kIdXsave :
case X86Inst::kIdXsave64 :
case X86Inst::kIdXsaveopt :
case X86Inst::kIdXsaveopt64 : return instXsaveXrstor;
case X86Inst::kIdRdmsr :
case X86Inst::kIdRdpmc :
case X86Inst::kIdXgetbv : return instReadMR;
case X86Inst::kIdWrmsr :
case X86Inst::kIdXsetbv : return instWriteMR;
default : return nullptr;
}
}
// ============================================================================
// [asmjit::X86RAPass - Construction / Destruction]
// ============================================================================
X86RAPass::X86RAPass() noexcept : RAPass() {
_state = &_x86State;
_varMapToVaListOffset = ASMJIT_OFFSET_OF(X86RAData, tiedArray);
}
X86RAPass::~X86RAPass() noexcept {}
// ============================================================================
// [asmjit::X86RAPass - Interface]
// ============================================================================
Error X86RAPass::process(Zone* zone) noexcept {
return Base::process(zone);
}
Error X86RAPass::prepare(CCFunc* func) noexcept {
ASMJIT_PROPAGATE(Base::prepare(func));
uint32_t archType = cc()->getArchType();
_regCount._gp = archType == ArchInfo::kTypeX86 ? 8 : 16;
_regCount._mm = 8;
_regCount._k = 8;
_regCount._vec = archType == ArchInfo::kTypeX86 ? 8 : 16;
_zsp = cc()->zsp();
_zbp = cc()->zbp();
_gaRegs[X86Reg::kKindGp ] = Utils::bits(_regCount.getGp()) & ~Utils::mask(X86Gp::kIdSp);
_gaRegs[X86Reg::kKindMm ] = Utils::bits(_regCount.getMm());
_gaRegs[X86Reg::kKindK ] = Utils::bits(_regCount.getK());
_gaRegs[X86Reg::kKindVec] = Utils::bits(_regCount.getVec());
_x86State.reset(0);
_clobberedRegs.reset();
_avxEnabled = false;
_varBaseRegId = Globals::kInvalidRegId; // Used by patcher.
_varBaseOffset = 0; // Used by patcher.
return kErrorOk;
}
// ============================================================================
// [asmjit::X86RAPass - Emit]
// ============================================================================
Error X86RAPass::emitMove(VirtReg* vReg, uint32_t dstId, uint32_t srcId, const char* reason) {
const char* comment = nullptr;
if (_emitComments) {
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
comment = _stringBuilder.getData();
}
X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), dstId));
X86Reg src(X86Reg::fromSignature(vReg->getSignature(), srcId));
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
}
Error X86RAPass::emitLoad(VirtReg* vReg, uint32_t id, const char* reason) {
const char* comment = nullptr;
if (_emitComments) {
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
comment = _stringBuilder.getData();
}
X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), id));
X86Mem src(getVarMem(vReg));
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
}
Error X86RAPass::emitSave(VirtReg* vReg, uint32_t id, const char* reason) {
const char* comment = nullptr;
if (_emitComments) {
_stringBuilder.setFormat("[%s] %s", reason, vReg->getName());
comment = _stringBuilder.getData();
}
X86Mem dst(getVarMem(vReg));
X86Reg src(X86Reg::fromSignature(vReg->getSignature(), id));
return X86Internal::emitRegMove(reinterpret_cast<X86Emitter*>(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment);
}
Error X86RAPass::emitSwapGp(VirtReg* dstReg, VirtReg* srcReg, uint32_t dstPhysId, uint32_t srcPhysId, const char* reason) noexcept {
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
uint32_t is64 = std::max(dstReg->getTypeId(), srcReg->getTypeId()) >= TypeId::kI64;
uint32_t sign = is64 ? uint32_t(X86RegTraits<X86Reg::kRegGpq>::kSignature)
: uint32_t(X86RegTraits<X86Reg::kRegGpd>::kSignature);
X86Reg a = X86Reg::fromSignature(sign, dstPhysId);
X86Reg b = X86Reg::fromSignature(sign, srcPhysId);
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXchg, a, b));
if (_emitComments)
cc()->getCursor()->setInlineComment(cc()->_cbDataZone.sformat("[%s] %s, %s", reason, dstReg->getName(), srcReg->getName()));
return kErrorOk;
}
Error X86RAPass::emitSwapVec(VirtReg* dstReg, VirtReg* srcReg, uint32_t dstPhysId, uint32_t srcPhysId, const char* reason) noexcept {
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
ASMJIT_ASSERT(dstPhysId != srcPhysId);
X86Reg a = X86Reg::fromSignature(dstReg->getSignature(), dstPhysId);
X86Reg b = X86Reg::fromSignature(srcReg->getSignature(), srcPhysId);
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, a, b));
if (_emitComments)
cc()->getCursor()->setInlineComment(cc()->_cbDataZone.sformat("[%s] %s, %s", reason, dstReg->getName(), srcReg->getName()));
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, b, a));
ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXorps, a, b));
return kErrorOk;
}
Error X86RAPass::emitImmToReg(uint32_t dstTypeId, uint32_t dstPhysId, const Imm* src) noexcept {
ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId);
X86Reg r0;
Imm imm(*src);
switch (dstTypeId) {
case TypeId::kI8:
case TypeId::kU8:
imm.truncateTo8Bits();
ASMJIT_FALLTHROUGH;
case TypeId::kI16:
case TypeId::kU16:
imm.truncateTo16Bits();
ASMJIT_FALLTHROUGH;
case TypeId::kI32:
case TypeId::kU32:
Mov32Truncate:
imm.truncateTo32Bits();
r0.setX86RegT<X86Reg::kRegGpd>(dstPhysId);
cc()->emit(X86Inst::kIdMov, r0, imm);
break;
case TypeId::kI64:
case TypeId::kU64:
// Move to GPD register will also clear the high DWORD of GPQ
// register in 64-bit mode.
if (imm.isUInt32())
goto Mov32Truncate;
r0.setX86RegT<X86Reg::kRegGpq>(dstPhysId);
cc()->emit(X86Inst::kIdMov, r0, imm);
break;
case TypeId::kF32:
case TypeId::kF64:
// Compiler doesn't manage FPU stack.
ASMJIT_NOT_REACHED();
break;
case TypeId::kMmx32:
case TypeId::kMmx64:
// TODO: [COMPILER] EmitMoveImmToReg.
break;
default:
// TODO: [COMPILER] EmitMoveImmToReg.
break;
}
return kErrorOk;
}
Error X86RAPass::emitImmToStack(uint32_t dstTypeId, const X86Mem* dst, const Imm* src) noexcept {
X86Mem mem(*dst);
Imm imm(*src);
// One stack entry has the same size as the native register size. That means
// that if we want to move a 32-bit integer on the stack in 64-bit mode, we
// need to extend it to a 64-bit integer first. In 32-bit mode, pushing a
// 64-bit on stack is done in two steps by pushing low and high parts
// separately.
uint32_t gpSize = cc()->getGpSize();
switch (dstTypeId) {
case TypeId::kI8:
case TypeId::kU8:
imm.truncateTo8Bits();
ASMJIT_FALLTHROUGH;
case TypeId::kI16:
case TypeId::kU16:
imm.truncateTo16Bits();
ASMJIT_FALLTHROUGH;
case TypeId::kI32:
case TypeId::kU32:
case TypeId::kF32:
mem.setSize(4);
imm.truncateTo32Bits();
cc()->emit(X86Inst::kIdMov, mem, imm);
break;
case TypeId::kI64:
case TypeId::kU64:
case TypeId::kF64:
case TypeId::kMmx32:
case TypeId::kMmx64:
if (gpSize == 4) {
uint32_t hi = imm.getUInt32Hi();
// Lo-Part.
mem.setSize(4);
imm.truncateTo32Bits();
cc()->emit(X86Inst::kIdMov, mem, imm);
mem.addOffsetLo32(gpSize);
// Hi-Part.
imm.setUInt32(hi);
cc()->emit(X86Inst::kIdMov, mem, imm);
}
else {
mem.setSize(8);
cc()->emit(X86Inst::kIdMov, mem, imm);
}
break;
default:
return DebugUtils::errored(kErrorInvalidState);
}
return kErrorOk;
}
Error X86RAPass::emitRegToStack(uint32_t dstTypeId, const X86Mem* dst, uint32_t srcTypeId, uint32_t srcPhysId) noexcept {
ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId);
X86Mem m0(*dst);
X86Reg r0, r1;
uint32_t gpSize = cc()->getGpSize();
uint32_t instId = 0;
switch (dstTypeId) {
case TypeId::kI64:
case TypeId::kU64:
// Extend BYTE->QWORD (GP).
if (TypeId::isGpb(srcTypeId)) {
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI8) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
goto _ExtendMovGpXQ;
}
// Extend WORD->QWORD (GP).
if (TypeId::isGpw(srcTypeId)) {
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI16) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
goto _ExtendMovGpXQ;
}
// Extend DWORD->QWORD (GP).
if (TypeId::isGpd(srcTypeId)) {
r1.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
instId = X86Inst::kIdMovsxd;
if (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI32)
goto _ExtendMovGpXQ;
else
goto _ZeroExtendGpDQ;
}
// Move QWORD (GP).
if (TypeId::isGpq(srcTypeId)) goto MovGpQ;
if (TypeId::isMmx(srcTypeId)) goto MovMmQ;
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
break;
case TypeId::kI32:
case TypeId::kU32:
case TypeId::kI16:
case TypeId::kU16:
// DWORD <- WORD (Zero|Sign Extend).
if (TypeId::isGpw(srcTypeId)) {
bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32;
bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16;
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
goto _ExtendMovGpD;
}
// DWORD <- BYTE (Zero|Sign Extend).
if (TypeId::isGpb(srcTypeId)) {
bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32;
bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16;
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx;
goto _ExtendMovGpD;
}
ASMJIT_FALLTHROUGH;
case TypeId::kI8:
case TypeId::kU8:
if (TypeId::isInt(srcTypeId)) goto MovGpD;
if (TypeId::isMmx(srcTypeId)) goto MovMmD;
if (TypeId::isVec(srcTypeId)) goto MovXmmD;
break;
case TypeId::kMmx32:
case TypeId::kMmx64:
// Extend BYTE->QWORD (GP).
if (TypeId::isGpb(srcTypeId)) {
r1.setX86RegT<X86Reg::kRegGpbLo>(srcPhysId);
instId = X86Inst::kIdMovzx;
goto _ExtendMovGpXQ;
}
// Extend WORD->QWORD (GP).
if (TypeId::isGpw(srcTypeId)) {
r1.setX86RegT<X86Reg::kRegGpw>(srcPhysId);
instId = X86Inst::kIdMovzx;
goto _ExtendMovGpXQ;
}
if (TypeId::isGpd(srcTypeId)) goto _ExtendMovGpDQ;
if (TypeId::isGpq(srcTypeId)) goto MovGpQ;
if (TypeId::isMmx(srcTypeId)) goto MovMmQ;
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
break;
case TypeId::kF32:
case TypeId::kF32x1:
if (TypeId::isVec(srcTypeId)) goto MovXmmD;
break;
case TypeId::kF64:
case TypeId::kF64x1:
if (TypeId::isVec(srcTypeId)) goto MovXmmQ;
break;
default:
// TODO: Vector types by stack.
break;
}
return DebugUtils::errored(kErrorInvalidState);
// Extend+Move Gp.
_ExtendMovGpD:
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
cc()->emit(instId, r0, r1);
cc()->emit(X86Inst::kIdMov, m0, r0);
return kErrorOk;
_ExtendMovGpXQ:
if (gpSize == 8) {
m0.setSize(8);
r0.setX86RegT<X86Reg::kRegGpq>(srcPhysId);
cc()->emit(instId, r0, r1);
cc()->emit(X86Inst::kIdMov, m0, r0);
}
else {
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
cc()->emit(instId, r0, r1);
_ExtendMovGpDQ:
cc()->emit(X86Inst::kIdMov, m0, r0);
m0.addOffsetLo32(4);
cc()->emit(X86Inst::kIdAnd, m0, 0);
}
return kErrorOk;
_ZeroExtendGpDQ:
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
goto _ExtendMovGpDQ;
// Move Gp.
MovGpD:
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegGpd>(srcPhysId);
return cc()->emit(X86Inst::kIdMov, m0, r0);
MovGpQ:
m0.setSize(8);
r0.setX86RegT<X86Reg::kRegGpq>(srcPhysId);
return cc()->emit(X86Inst::kIdMov, m0, r0);
// Move Mm.
MovMmD:
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegMm>(srcPhysId);
return cc()->emit(X86Inst::kIdMovd, m0, r0);
MovMmQ:
m0.setSize(8);
r0.setX86RegT<X86Reg::kRegMm>(srcPhysId);
return cc()->emit(X86Inst::kIdMovq, m0, r0);
// Move XMM.
MovXmmD:
m0.setSize(4);
r0.setX86RegT<X86Reg::kRegXmm>(srcPhysId);
return cc()->emit(X86Inst::kIdMovss, m0, r0);
MovXmmQ:
m0.setSize(8);
r0.setX86RegT<X86Reg::kRegXmm>(srcPhysId);
return cc()->emit(X86Inst::kIdMovlps, m0, r0);
}
// ============================================================================
// [asmjit::X86RAPass - Register Management]
// ============================================================================
#if defined(ASMJIT_DEBUG)
template<int C>
static ASMJIT_INLINE void X86RAPass_checkStateVars(X86RAPass* self) {
X86RAState* state = self->getState();
VirtReg** sVars = state->getListByKind(C);
uint32_t physId;
uint32_t regMask;
uint32_t regCount = self->_regCount.get(C);
uint32_t occupied = state->_occupied.get(C);
uint32_t modified = state->_modified.get(C);
for (physId = 0, regMask = 1; physId < regCount; physId++, regMask <<= 1) {
VirtReg* vreg = sVars[physId];
if (!vreg) {
ASMJIT_ASSERT((occupied & regMask) == 0);
ASMJIT_ASSERT((modified & regMask) == 0);
}
else {
ASMJIT_ASSERT((occupied & regMask) != 0);
ASMJIT_ASSERT((modified & regMask) == (static_cast<uint32_t>(vreg->isModified()) << physId));
ASMJIT_ASSERT(vreg->getKind() == C);
ASMJIT_ASSERT(vreg->getState() == VirtReg::kStateReg);
ASMJIT_ASSERT(vreg->getPhysId() == physId);
}
}
}
void X86RAPass::_checkState() {
X86RAPass_checkStateVars<X86Reg::kKindGp >(this);
X86RAPass_checkStateVars<X86Reg::kKindMm >(this);
X86RAPass_checkStateVars<X86Reg::kKindVec>(this);
}
#else
void X86RAPass::_checkState() {}
#endif // ASMJIT_DEBUG
// ============================================================================
// [asmjit::X86RAPass - State - Load]
// ============================================================================
template<int C>
static ASMJIT_INLINE void X86RAPass_loadStateVars(X86RAPass* self, X86RAState* src) {
X86RAState* cur = self->getState();
VirtReg** cVars = cur->getListByKind(C);
VirtReg** sVars = src->getListByKind(C);
uint32_t physId;
uint32_t modified = src->_modified.get(C);
uint32_t regCount = self->_regCount.get(C);
for (physId = 0; physId < regCount; physId++, modified >>= 1) {
VirtReg* vreg = sVars[physId];
cVars[physId] = vreg;
if (!vreg) continue;
vreg->setState(VirtReg::kStateReg);
vreg->setPhysId(physId);
vreg->setModified(modified & 0x1);
}
}
void X86RAPass::loadState(RAState* src_) {
X86RAState* cur = getState();
X86RAState* src = static_cast<X86RAState*>(src_);
VirtReg** vregs = _contextVd.getData();
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
// Load allocated variables.
X86RAPass_loadStateVars<X86Reg::kKindGp >(this, src);
X86RAPass_loadStateVars<X86Reg::kKindMm >(this, src);
X86RAPass_loadStateVars<X86Reg::kKindVec>(this, src);
// Load masks.
cur->_occupied = src->_occupied;
cur->_modified = src->_modified;
// Load states of other variables and clear their 'Modified' flags.
for (uint32_t i = 0; i < count; i++) {
uint32_t vState = src->_cells[i].getState();
if (vState == VirtReg::kStateReg)
continue;
vregs[i]->setState(vState);
vregs[i]->setPhysId(Globals::kInvalidRegId);
vregs[i]->setModified(false);
}
ASMJIT_X86_CHECK_STATE
}
// ============================================================================
// [asmjit::X86RAPass - State - Save]
// ============================================================================
RAState* X86RAPass::saveState() {
VirtReg** vregs = _contextVd.getData();
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
size_t size = Utils::alignTo<size_t>(
sizeof(X86RAState) + count * sizeof(X86StateCell), sizeof(void*));
X86RAState* cur = getState();
X86RAState* dst = _zone->allocT<X86RAState>(size);
if (!dst) return nullptr;
// Store links.
::memcpy(dst->_list, cur->_list, X86RAState::kAllCount * sizeof(VirtReg*));
// Store masks.
dst->_occupied = cur->_occupied;
dst->_modified = cur->_modified;
// Store cells.
for (uint32_t i = 0; i < count; i++) {
VirtReg* vreg = static_cast<VirtReg*>(vregs[i]);
X86StateCell& cell = dst->_cells[i];
cell.reset();
cell.setState(vreg->getState());
}
return dst;
}
// ============================================================================
// [asmjit::X86RAPass - State - Switch]
// ============================================================================
template<int C>
static ASMJIT_INLINE void X86RAPass_switchStateVars(X86RAPass* self, X86RAState* src) {
X86RAState* dst = self->getState();
VirtReg** dVars = dst->getListByKind(C);
VirtReg** sVars = src->getListByKind(C);
X86StateCell* cells = src->_cells;
uint32_t regCount = self->_regCount.get(C);
bool didWork;
do {
didWork = false;
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
VirtReg* dVReg = dVars[physId];
VirtReg* sVd = sVars[physId];
if (dVReg == sVd) continue;
if (dVReg) {
const X86StateCell& cell = cells[dVReg->_raId];
if (cell.getState() != VirtReg::kStateReg) {
if (cell.getState() == VirtReg::kStateMem)
self->spill<C>(dVReg);
else
self->unuse<C>(dVReg);
dVReg = nullptr;
didWork = true;
if (!sVd) continue;
}
}
if (!dVReg && sVd) {
_MoveOrLoad:
if (sVd->getPhysId() != Globals::kInvalidRegId)
self->move<C>(sVd, physId);
else
self->load<C>(sVd, physId);
didWork = true;
continue;
}
if (dVReg) {
const X86StateCell& cell = cells[dVReg->_raId];
if (!sVd) {
if (cell.getState() == VirtReg::kStateReg)
continue;
if (cell.getState() == VirtReg::kStateMem)
self->spill<C>(dVReg);
else
self->unuse<C>(dVReg);
didWork = true;
continue;
}
else {
if (cell.getState() == VirtReg::kStateReg) {
if (dVReg->getPhysId() != Globals::kInvalidRegId && sVd->getPhysId() != Globals::kInvalidRegId) {
if (C == X86Reg::kKindGp) {
self->swapGp(dVReg, sVd);
}
else if (C == X86Reg::kKindVec) {
self->swapVec(dVReg, sVd);
}
else {
self->spill<C>(dVReg);
self->move<C>(sVd, physId);
}
didWork = true;
continue;
}
else {
didWork = true;
continue;
}
}
if (cell.getState() == VirtReg::kStateMem)
self->spill<C>(dVReg);
else
self->unuse<C>(dVReg);
goto _MoveOrLoad;
}
}
}
} while (didWork);
uint32_t dModified = dst->_modified.get(C);
uint32_t sModified = src->_modified.get(C);
if (dModified != sModified) {
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
VirtReg* vreg = dVars[physId];
if (!vreg) continue;
if ((dModified & regMask) && !(sModified & regMask)) {
self->save<C>(vreg);
continue;
}
if (!(dModified & regMask) && (sModified & regMask)) {
self->modify<C>(vreg);
continue;
}
}
}
}
void X86RAPass::switchState(RAState* src_) {
ASMJIT_ASSERT(src_ != nullptr);
X86RAState* cur = getState();
X86RAState* src = static_cast<X86RAState*>(src_);
// Ignore if both states are equal.
if (cur == src)
return;
// Switch variables.
X86RAPass_switchStateVars<X86Reg::kKindGp >(this, src);
X86RAPass_switchStateVars<X86Reg::kKindMm >(this, src);
X86RAPass_switchStateVars<X86Reg::kKindVec>(this, src);
// Calculate changed state.
VirtReg** vregs = _contextVd.getData();
uint32_t count = static_cast<uint32_t>(_contextVd.getLength());
X86StateCell* cells = src->_cells;
for (uint32_t i = 0; i < count; i++) {
VirtReg* vreg = static_cast<VirtReg*>(vregs[i]);
const X86StateCell& cell = cells[i];
uint32_t vState = cell.getState();
if (vState != VirtReg::kStateReg) {
vreg->setState(vState);
vreg->setModified(false);
}
}
ASMJIT_X86_CHECK_STATE
}
// ============================================================================
// [asmjit::X86RAPass - State - Intersect]
// ============================================================================
// The algorithm is actually not so smart, but tries to find an intersection od
// `a` and `b` and tries to move/alloc a variable into that location if it's
// possible. It also finds out which variables will be spilled/unused by `a`
// and `b` and performs that action here. It may improve the switch state code
// in certain cases, but doesn't necessarily do the best job possible.
template<int C>
static ASMJIT_INLINE void X86RAPass_intersectStateVars(X86RAPass* self, X86RAState* a, X86RAState* b) {
X86RAState* dst = self->getState();
VirtReg** dVars = dst->getListByKind(C);
VirtReg** aVars = a->getListByKind(C);
X86StateCell* aCells = a->_cells;
X86StateCell* bCells = b->_cells;
uint32_t regCount = self->_regCount.get(C);
bool didWork;
// Similar to `switchStateVars()`, we iterate over and over until there is
// no work to be done.
do {
didWork = false;
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
VirtReg* dVReg = dVars[physId]; // Destination reg.
VirtReg* aVReg = aVars[physId]; // State-a reg.
if (dVReg == aVReg) continue;
if (dVReg) {
const X86StateCell& aCell = aCells[dVReg->_raId];
const X86StateCell& bCell = bCells[dVReg->_raId];
if (aCell.getState() != VirtReg::kStateReg && bCell.getState() != VirtReg::kStateReg) {
if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem)
self->spill<C>(dVReg);
else
self->unuse<C>(dVReg);
dVReg = nullptr;
didWork = true;
if (!aVReg) continue;
}
}
if (!dVReg && aVReg) {
if (aVReg->getPhysId() != Globals::kInvalidRegId)
self->move<C>(aVReg, physId);
else
self->load<C>(aVReg, physId);
didWork = true;
continue;
}
if (dVReg) {
const X86StateCell& aCell = aCells[dVReg->_raId];
const X86StateCell& bCell = bCells[dVReg->_raId];
if (!aVReg) {
if (aCell.getState() == VirtReg::kStateReg || bCell.getState() == VirtReg::kStateReg)
continue;
if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem)
self->spill<C>(dVReg);
else
self->unuse<C>(dVReg);
didWork = true;
continue;
}
else if (C == X86Reg::kKindGp || C == X86Reg::kKindVec) {
if (aCell.getState() == VirtReg::kStateReg) {
if (dVReg->getPhysId() != Globals::kInvalidRegId && aVReg->getPhysId() != Globals::kInvalidRegId) {
if (C == X86Reg::kKindGp)
self->swapGp(dVReg, aVReg);
else
self->swapVec(dVReg, aVReg);
didWork = true;
continue;
}
}
}
}
}
} while (didWork);
uint32_t dModified = dst->_modified.get(C);
uint32_t aModified = a->_modified.get(C);
if (dModified != aModified) {
for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) {
VirtReg* vreg = dVars[physId];
if (!vreg) continue;
const X86StateCell& aCell = aCells[vreg->_raId];
if ((dModified & regMask) && !(aModified & regMask) && aCell.getState() == VirtReg::kStateReg)
self->save<C>(vreg);
}
}
}
void X86RAPass::intersectStates(RAState* a_, RAState* b_) {
X86RAState* a = static_cast<X86RAState*>(a_);
X86RAState* b = static_cast<X86RAState*>(b_);
ASMJIT_ASSERT(a != nullptr);
ASMJIT_ASSERT(b != nullptr);
X86RAPass_intersectStateVars<X86Reg::kKindGp >(this, a, b);
X86RAPass_intersectStateVars<X86Reg::kKindMm >(this, a, b);
X86RAPass_intersectStateVars<X86Reg::kKindVec>(this, a, b);
ASMJIT_X86_CHECK_STATE
}
// ============================================================================
// [asmjit::X86RAPass - GetJccFlow / GetOppositeJccFlow]
// ============================================================================
//! \internal
static ASMJIT_INLINE CBNode* X86RAPass_getJccFlow(CBJump* jNode) {
if (jNode->isTaken())
return jNode->getTarget();
else
return jNode->getNext();
}
//! \internal
static ASMJIT_INLINE CBNode* X86RAPass_getOppositeJccFlow(CBJump* jNode) {
if (jNode->isTaken())
return jNode->getNext();
else
return jNode->getTarget();
}
// ============================================================================
// [asmjit::X86RAPass - SingleVarInst]
// ============================================================================
//! \internal
static void X86RAPass_prepareSingleVarInst(uint32_t instId, TiedReg* tr) {
switch (instId) {
// - andn reg, reg ; Set all bits in reg to 0.
// - xor/pxor reg, reg ; Set all bits in reg to 0.
// - sub/psub reg, reg ; Set all bits in reg to 0.
// - pcmpgt reg, reg ; Set all bits in reg to 0.
// - pcmpeq reg, reg ; Set all bits in reg to 1.
case X86Inst::kIdPandn :
case X86Inst::kIdXor : case X86Inst::kIdXorpd : case X86Inst::kIdXorps : case X86Inst::kIdPxor :
case X86Inst::kIdSub:
case X86Inst::kIdPsubb : case X86Inst::kIdPsubw : case X86Inst::kIdPsubd : case X86Inst::kIdPsubq :
case X86Inst::kIdPsubsb : case X86Inst::kIdPsubsw : case X86Inst::kIdPsubusb : case X86Inst::kIdPsubusw :
case X86Inst::kIdPcmpeqb : case X86Inst::kIdPcmpeqw : case X86Inst::kIdPcmpeqd : case X86Inst::kIdPcmpeqq :
case X86Inst::kIdPcmpgtb : case X86Inst::kIdPcmpgtw : case X86Inst::kIdPcmpgtd : case X86Inst::kIdPcmpgtq :
tr->flags &= ~TiedReg::kRReg;
break;
// - and reg, reg ; Nop.
// - or reg, reg ; Nop.
// - xchg reg, reg ; Nop.
case X86Inst::kIdAnd : case X86Inst::kIdAndpd : case X86Inst::kIdAndps : case X86Inst::kIdPand :
case X86Inst::kIdOr : case X86Inst::kIdOrpd : case X86Inst::kIdOrps : case X86Inst::kIdPor :
case X86Inst::kIdXchg :
tr->flags &= ~TiedReg::kWReg;
break;
}
}
// ============================================================================
// [asmjit::X86RAPass - Helpers]
// ============================================================================
static void X86RAPass_assignStackArgsRegId(X86RAPass* self, CCFunc* func) {
const FuncDetail& fd = func->getDetail();
FuncFrameInfo& ffi = func->getFrameInfo();
// Select some register which will contain the base address of function
// arguments and return address. The algorithm tries to select registers
// which are saved or not preserved by default, if not successful it picks
// any other register and adds it to `_savedRegs`.
uint32_t stackArgsRegId;
if (ffi.hasPreservedFP()) {
stackArgsRegId = X86Gp::kIdBp;
}
else {
// Passed registers as defined by the calling convention.
uint32_t passed = fd.getPassedRegs(X86Reg::kKindGp);
// Registers actually used to pass function arguments (related to this
// function signature) with ESP|RSP included as this register can't be
// used in general to hold anything bug stack pointer.
uint32_t used = fd.getUsedRegs(X86Reg::kKindGp) | Utils::mask(X86Gp::kIdSp);
// First try register that is defined to pass a function argument by the
// calling convention, but is not used by this function. This will most
// likely fail in 32-bit mode, but there is a high chance that it will
// pass in 64-bit mode if the function doesn't use so many arguments.
uint32_t regs = passed & ~used;
// Pick any other register if that didn't work out.
if (!regs) regs = ~passed & ~used;
stackArgsRegId = Utils::findFirstBit(regs);
ASMJIT_ASSERT(stackArgsRegId < self->cc()->getGpCount());
}
ffi.setStackArgsRegId(stackArgsRegId);
}
// ============================================================================
// [asmjit::X86RAPass - SArg Insertion]
// ============================================================================
struct SArgData {
VirtReg* sVd;
VirtReg* cVd;
CCPushArg* sArg;
uint32_t aType;
};
static ASMJIT_INLINE bool X86RAPass_mustConvertSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept{
bool dstFloatSize = dstTypeId == TypeId::kF32 ? 4 :
dstTypeId == TypeId::kF64 ? 8 : 0;
bool srcFloatSize = srcTypeId == TypeId::kF32 ? 4 :
srcTypeId == TypeId::kF32x1 ? 4 :
srcTypeId == TypeId::kF64 ? 8 :
srcTypeId == TypeId::kF64x1 ? 8 : 0;
if (dstFloatSize && srcFloatSize)
return dstFloatSize != srcFloatSize;
else
return false;
}
static ASMJIT_INLINE uint32_t X86RAPass_typeOfConvertedSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept {
ASMJIT_ASSERT(X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId));
return dstTypeId == TypeId::kF32 ? TypeId::kF32x1 : TypeId::kF64x1;
}
static ASMJIT_INLINE Error X86RAPass_insertPushArg(
X86RAPass* self, CCFuncCall* call,
VirtReg* sReg, const uint32_t* gaRegs,
const FuncDetail::Value& arg, uint32_t argIndex,
SArgData* sArgList, uint32_t& sArgCount) {
X86Compiler* cc = self->cc();
uint32_t i;
uint32_t dstTypeId = arg.getTypeId();
uint32_t srcTypeId = sReg->getTypeId();
// First locate or create sArgBase.
for (i = 0; i < sArgCount; i++)
if (sArgList[i].sVd == sReg && !sArgList[i].cVd)
break;
SArgData* sArgData = &sArgList[i];
if (i == sArgCount) {
sArgData->sVd = sReg;
sArgData->cVd = nullptr;
sArgData->sArg = nullptr;
sArgData->aType = 0xFF;
sArgCount++;
}
uint32_t srcRegKind = sReg->getKind();
// Only handles float<->double conversion.
if (X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId)) {
uint32_t cvtTypeId = X86RAPass_typeOfConvertedSArg(self, dstTypeId, srcTypeId);
uint32_t cvtRegKind = X86Reg::kKindVec;
while (++i < sArgCount) {
sArgData = &sArgList[i];
if (sArgData->sVd != sReg)
break;
if (sArgData->cVd->getTypeId() != cvtTypeId || sArgData->aType != dstTypeId)
continue;
sArgData->sArg->_args |= Utils::mask(argIndex);
return kErrorOk;
}
VirtReg* cReg = cc->newVirtReg(dstTypeId, x86OpData.archRegs.regInfo[X86Reg::kRegXmm].getSignature(), nullptr);
if (!cReg) return DebugUtils::errored(kErrorNoHeapMemory);
CCPushArg* sArg = cc->newNodeT<CCPushArg>(call, sReg, cReg);
if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory);
X86RAData* raData = self->newRAData(2);
if (!raData) return DebugUtils::errored(kErrorNoHeapMemory);
ASMJIT_PROPAGATE(self->assignRAId(cReg));
ASMJIT_PROPAGATE(self->assignRAId(sReg));
raData->tiedTotal = 2;
raData->tiedCount.reset();
raData->tiedCount.add(srcRegKind);
raData->tiedCount.add(cvtRegKind);
raData->tiedIndex.reset();
raData->inRegs.reset();
raData->outRegs.reset();
raData->clobberedRegs.reset();
if (srcRegKind <= cvtRegKind) {
raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
raData->tiedArray[1].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]);
raData->tiedIndex.set(cvtRegKind, srcRegKind != cvtRegKind);
}
else {
raData->tiedArray[0].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]);
raData->tiedArray[1].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
raData->tiedIndex.set(srcRegKind, 1);
}
sArg->setPassData(raData);
sArg->_args |= Utils::mask(argIndex);
cc->addBefore(sArg, call);
::memmove(sArgData + 1, sArgData, (sArgCount - i) * sizeof(SArgData));
sArgData->sVd = sReg;
sArgData->cVd = cReg;
sArgData->sArg = sArg;
sArgData->aType = dstTypeId;
sArgCount++;
return kErrorOk;
}
else {
CCPushArg* sArg = sArgData->sArg;
ASMJIT_PROPAGATE(self->assignRAId(sReg));
if (!sArg) {
sArg = cc->newNodeT<CCPushArg>(call, sReg, (VirtReg*)nullptr);
if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory);
X86RAData* raData = self->newRAData(1);
if (!raData) return DebugUtils::errored(kErrorNoHeapMemory);
raData->tiedTotal = 1;
raData->tiedIndex.reset();
raData->tiedCount.reset();
raData->tiedCount.add(srcRegKind);
raData->inRegs.reset();
raData->outRegs.reset();
raData->clobberedRegs.reset();
raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]);
sArg->setPassData(raData);
sArgData->sArg = sArg;
cc->addBefore(sArg, call);
}
sArg->_args |= Utils::mask(argIndex);
return kErrorOk;
}
}
// ============================================================================
// [asmjit::X86RAPass - Fetch]
// ============================================================================
//! \internal
//!
//! Prepare the given function `func`.
//!
//! For each node:
//! - Create and assign groupId and position.
//! - Collect all variables and merge them to vaList.
Error X86RAPass::fetch() {
uint32_t archType = cc()->getArchType();
CCFunc* func = getFunc();
CBNode* node_ = func;
CBNode* next = nullptr;
CBNode* stop = getStop();
TiedReg agTmp[80];
SArgData sArgList[80];
uint32_t position = 0;
ZoneList<CBNode*>::Link* jLink = nullptr;
// Global allocable registers.
uint32_t* gaRegs = _gaRegs;
if (func->getFrameInfo().hasPreservedFP())
gaRegs[X86Reg::kKindGp] &= ~Utils::mask(X86Gp::kIdBp);
// Allowed index registers (GP/XMM/YMM).
const uint32_t indexMask = Utils::bits(_regCount.getGp()) & ~(Utils::mask(4));
// --------------------------------------------------------------------------
// [VI Macros]
// --------------------------------------------------------------------------
#define RA_POPULATE(NODE) \
do { \
X86RAData* raData = newRAData(0); \
if (!raData) goto NoMem; \
NODE->setPassData(raData); \
} while (0)
#define RA_DECLARE() \
do { \
X86RegCount tiedCount; \
X86RegCount tiedIndex; \
uint32_t tiedTotal = 0; \
\
X86RegMask inRegs; \
X86RegMask outRegs; \
X86RegMask clobberedRegs; \
\
tiedCount.reset(); \
inRegs.reset(); \
outRegs.reset(); \
clobberedRegs.reset()
#define RA_FINALIZE(NODE) \
{ \
X86RAData* raData = newRAData(tiedTotal); \
if (!raData) goto NoMem; \
\
tiedIndex.indexFromRegCount(tiedCount); \
raData->tiedCount = tiedCount; \
raData->tiedIndex = tiedIndex; \
\
raData->inRegs = inRegs; \
raData->outRegs = outRegs; \
raData->clobberedRegs = clobberedRegs; \
\
TiedReg* tied = agTmp; \
while (tiedTotal) { \
VirtReg* vreg = tied->vreg; \
\
uint32_t _kind = vreg->getKind(); \
uint32_t _index = tiedIndex.get(_kind); \
\
tiedIndex.add(_kind); \
if (tied->inRegs) \
tied->allocableRegs = tied->inRegs; \
else if (tied->outPhysId != Globals::kInvalidRegId) \
tied->allocableRegs = Utils::mask(tied->outPhysId); \
else \
tied->allocableRegs &= ~inRegs.get(_kind); \
\
vreg->_tied = nullptr; \
raData->setTiedAt(_index, *tied); \
\
tied++; \
tiedTotal--; \
} \
NODE->setPassData(raData); \
} \
} while (0)
#define RA_INSERT(REG, TIED, FLAGS, NEW_ALLOCABLE) \
do { \
ASMJIT_ASSERT(REG->_tied == nullptr); \
TIED = &agTmp[tiedTotal++]; \
TIED->init(REG, FLAGS, 0, NEW_ALLOCABLE); \
TIED->refCount++; \
REG->_tied = TIED; \
\
if (assignRAId(REG) != kErrorOk) goto NoMem; \
tiedCount.add(REG->getKind()); \
} while (0)
#define RA_MERGE(REG, TIED, FLAGS, NEW_ALLOCABLE) \
do { \
TIED = REG->_tied; \
\
if (!TIED) { \
TIED = &agTmp[tiedTotal++]; \
TIED->init(REG, 0, 0, NEW_ALLOCABLE); \
REG->_tied = TIED; \
\
if (assignRAId(REG) != kErrorOk) goto NoMem; \
tiedCount.add(REG->getKind()); \
} \
\
TIED->flags |= FLAGS; \
TIED->refCount++; \
} while (0)
// --------------------------------------------------------------------------
// [Loop]
// --------------------------------------------------------------------------
do {
_Do:
while (node_->hasPassData()) {
_NextGroup:
if (!jLink)
jLink = _jccList.getFirst();
else
jLink = jLink->getNext();
if (!jLink) goto _Done;
node_ = X86RAPass_getOppositeJccFlow(static_cast<CBJump*>(jLink->getValue()));
}
position++;
next = node_->getNext();
node_->setPosition(position);
switch (node_->getType()) {
// ----------------------------------------------------------------------
// [Align/Embed]
// ----------------------------------------------------------------------
case CBNode::kNodeAlign:
case CBNode::kNodeData:
default:
RA_POPULATE(node_);
break;
// ----------------------------------------------------------------------
// [Hint]
// ----------------------------------------------------------------------
case CBNode::kNodeHint: {
CCHint* node = static_cast<CCHint*>(node_);
RA_DECLARE();
if (node->getHint() == CCHint::kHintAlloc) {
uint32_t remain[Globals::kMaxVRegKinds];
CCHint* cur = node;
remain[X86Reg::kKindGp ] = _regCount.getGp() - 1 - func->getFrameInfo().hasPreservedFP();
remain[X86Reg::kKindMm ] = _regCount.getMm();
remain[X86Reg::kKindK ] = _regCount.getK();
remain[X86Reg::kKindVec] = _regCount.getVec();
// Merge as many alloc-hints as possible.
for (;;) {
VirtReg* vreg = static_cast<VirtReg*>(cur->getVReg());
TiedReg* tied = vreg->_tied;
uint32_t kind = vreg->getKind();
uint32_t physId = cur->getValue();
uint32_t regMask = 0;
// We handle both kInvalidReg and kInvalidValue.
if (physId < Globals::kInvalidRegId)
regMask = Utils::mask(physId);
if (!tied) {
if (inRegs.has(kind, regMask) || remain[kind] == 0)
break;
RA_INSERT(vreg, tied, TiedReg::kRReg, gaRegs[kind]);
if (regMask != 0) {
inRegs.xor_(kind, regMask);
tied->inRegs = regMask;
tied->setInPhysId(physId);
}
remain[kind]--;
}
else if (regMask != 0) {
if (inRegs.has(kind, regMask) && tied->inRegs != regMask)
break;
inRegs.xor_(kind, tied->inRegs | regMask);
tied->inRegs = regMask;
tied->setInPhysId(physId);
}
if (cur != node)
cc()->removeNode(cur);
cur = static_cast<CCHint*>(node->getNext());
if (!cur || cur->getType() != CBNode::kNodeHint || cur->getHint() != CCHint::kHintAlloc)
break;
}
next = node->getNext();
}
else {
VirtReg* vreg = static_cast<VirtReg*>(node->getVReg());
TiedReg* tied;
uint32_t flags = 0;
switch (node->getHint()) {
case CCHint::kHintSpill : flags = TiedReg::kRMem | TiedReg::kSpill; break;
case CCHint::kHintSave : flags = TiedReg::kRMem ; break;
case CCHint::kHintSaveAndUnuse: flags = TiedReg::kRMem | TiedReg::kUnuse; break;
case CCHint::kHintUnuse : flags = TiedReg::kUnuse ; break;
}
RA_INSERT(vreg, tied, flags, 0);
}
RA_FINALIZE(node_);
break;
}
// ----------------------------------------------------------------------
// [Label]
// ----------------------------------------------------------------------
case CBNode::kNodeLabel: {
RA_POPULATE(node_);
if (node_ == func->getExitNode()) {
ASMJIT_PROPAGATE(addReturningNode(node_));
goto _NextGroup;
}
break;
}
// ----------------------------------------------------------------------
// [Inst]
// ----------------------------------------------------------------------
case CBNode::kNodeInst: {
CBInst* node = static_cast<CBInst*>(node_);
uint32_t instId = node->getInstId();
uint32_t flags = node->getFlags();
uint32_t options = node->getOptions();
uint32_t gpAllowedMask = 0xFFFFFFFF;
Operand* opArray = node->getOpArray();
uint32_t opCount = node->getOpCount();
RA_DECLARE();
if (opCount) {
const X86Inst& inst = X86Inst::getInst(instId);
const X86Inst::CommonData& commonData = inst.getCommonData();
const X86SpecialInst* special = nullptr;
// Collect instruction flags and merge all 'TiedReg's.
if (commonData.isFpu())
flags |= CBNode::kFlagIsFp;
if (commonData.hasFixedRM() && (special = X86SpecialInst_get(instId, opArray, opCount)) != nullptr)
flags |= CBNode::kFlagIsSpecial;
for (uint32_t i = 0; i < opCount; i++) {
Operand* op = &opArray[i];
VirtReg* vreg;
TiedReg* tied;
if (op->isVirtReg()) {
vreg = cc()->getVirtRegById(op->getId());
if (vreg->isFixed()) continue;
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
if (static_cast<X86Reg*>(op)->isGpb()) {
tied->flags |= static_cast<X86Gp*>(op)->isGpbLo() ? TiedReg::kX86GpbLo : TiedReg::kX86GpbHi;
if (archType == ArchInfo::kTypeX86) {
// If a byte register is accessed in 32-bit mode we have to limit
// all allocable registers for that variable to eax/ebx/ecx/edx.
// Other variables are not affected.
tied->allocableRegs &= 0x0F;
}
else {
// It's fine if lo-byte register is accessed in 64-bit mode;
// however, hi-byte has to be checked and if it's used all
// registers (GP/XMM) could be only allocated in the lower eight
// half. To do that, we patch 'allocableRegs' of all variables
// we collected until now and change the allocable restriction
// for variables that come after.
if (static_cast<X86Gp*>(op)->isGpbHi()) {
tied->allocableRegs &= 0x0F;
if (gpAllowedMask != 0xFF) {
for (uint32_t j = 0; j < i; j++)
agTmp[j].allocableRegs &= (agTmp[j].flags & TiedReg::kX86GpbHi) ? 0x0F : 0xFF;
gpAllowedMask = 0xFF;
}
}
}
}
if (special) {
uint32_t inReg = special[i].inReg;
uint32_t outReg = special[i].outReg;
uint32_t c;
if (static_cast<const X86Reg*>(op)->isGp())
c = X86Reg::kKindGp;
else
c = X86Reg::kKindVec;
if (inReg != Globals::kInvalidRegId) {
uint32_t mask = Utils::mask(inReg);
inRegs.or_(c, mask);
tied->inRegs |= mask;
}
if (outReg != Globals::kInvalidRegId) {
uint32_t mask = Utils::mask(outReg);
outRegs.or_(c, mask);
tied->setOutPhysId(outReg);
}
tied->flags |= special[i].flags;
}
else {
uint32_t inFlags = TiedReg::kRReg;
uint32_t outFlags = TiedReg::kWReg;
uint32_t combinedFlags;
if (i == 0) {
// Read/Write is usually the combination of the first operand.
combinedFlags = inFlags | outFlags;
if (node->getOptions() & CodeEmitter::kOptionOverwrite) {
// Manually forcing write-only.
combinedFlags = outFlags;
}
else if (commonData.isUseW()) {
// Write-only instruction.
uint32_t movSize = commonData.getWriteSize();
uint32_t regSize = vreg->getSize();
// Exception - If the source operand is a memory location
// promote move size into 16 bytes.
if (opArray[1].isMem() && inst.getOperationData().isMovSsSd())
movSize = 16;
if (static_cast<const X86Reg*>(op)->isGp()) {
uint32_t opSize = static_cast<const X86Reg*>(op)->getSize();
// Move size is zero in case that it should be determined
// from the destination register.
if (movSize == 0)
movSize = opSize;
// Handle the case that a 32-bit operation in 64-bit mode
// always clears the rest of the destination register and
// the case that move size is actually greater than or
// equal to the size of the variable.
if (movSize >= 4 || movSize >= regSize)
combinedFlags = outFlags;
}
else if (movSize == 0 || movSize >= regSize) {
// If move size is greater than or equal to the size of
// the variable there is nothing to do, because the move
// will overwrite the variable in all cases.
combinedFlags = outFlags;
}
}
else if (commonData.isUseR()) {
// Comparison/Test instructions don't modify any operand.
combinedFlags = inFlags;
}
else if (instId == X86Inst::kIdImul && opCount == 3) {
// Imul.
combinedFlags = outFlags;
}
}
else {
// Read-Only is usualy the combination of the second/third/fourth operands.
combinedFlags = inFlags;
// Idiv is a special instruction, never handled here.
ASMJIT_ASSERT(instId != X86Inst::kIdIdiv);
// Xchg/Xadd/Imul.
if (commonData.isUseXX() || (instId == X86Inst::kIdImul && opCount == 3 && i == 1))
combinedFlags = inFlags | outFlags;
}
tied->flags |= combinedFlags;
}
}
else if (op->isMem()) {
X86Mem* m = static_cast<X86Mem*>(op);
node->setMemOpIndex(i);
uint32_t specBase = special ? uint32_t(special[i].inReg) : uint32_t(Globals::kInvalidRegId);
if (m->hasBaseReg()) {
uint32_t id = m->getBaseId();
if (cc()->isVirtRegValid(id)) {
vreg = cc()->getVirtRegById(id);
if (!vreg->isStack() && !vreg->isFixed()) {
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
if (m->isRegHome()) {
uint32_t inFlags = TiedReg::kRMem;
uint32_t outFlags = TiedReg::kWMem;
uint32_t combinedFlags;
if (i == 0) {
// Default for the first operand.
combinedFlags = inFlags | outFlags;
if (commonData.isUseW()) {
// Move to memory - setting the right flags is important
// as if it's just move to the register. It's just a bit
// simpler as there are no special cases.
uint32_t movSize = std::max<uint32_t>(commonData.getWriteSize(), m->getSize());
uint32_t regSize = vreg->getSize();
if (movSize >= regSize)
combinedFlags = outFlags;
}
else if (commonData.isUseR()) {
// Comparison/Test instructions don't modify any operand.
combinedFlags = inFlags;
}
}
else {
// Default for the second operand.
combinedFlags = inFlags;
// Handle Xchg instruction (modifies both operands).
if (commonData.isUseXX())
combinedFlags = inFlags | outFlags;
}
tied->flags |= combinedFlags;
}
else {
if (specBase != Globals::kInvalidRegId) {
uint32_t mask = Utils::mask(specBase);
inRegs.or_(vreg->getKind(), mask);
outRegs.or_(vreg->getKind(), mask);
tied->inRegs |= mask;
tied->setOutPhysId(specBase);
tied->flags |= special[i].flags;
}
else {
tied->flags |= TiedReg::kRReg;
}
}
}
}
}
if (m->hasIndexReg()) {
uint32_t id = m->getIndexId();
if (cc()->isVirtRegValid(id)) {
// Restrict allocation to all registers except ESP|RSP.
vreg = cc()->getVirtRegById(m->getIndexId());
if (!vreg->isFixed()) {
// TODO: AVX vector operands support.
RA_MERGE(vreg, tied, 0, gaRegs[X86Reg::kKindGp] & gpAllowedMask);
tied->allocableRegs &= indexMask;
tied->flags |= TiedReg::kRReg;
}
}
}
}
}
node->setFlags(flags);
if (tiedTotal) {
// Handle instructions which result in zeros/ones or nop if used with the
// same destination and source operand.
if (tiedTotal == 1 && opCount >= 2 && opArray[0].isVirtReg() && opArray[1].isVirtReg() && !node->hasMemOp())
X86RAPass_prepareSingleVarInst(instId, &agTmp[0]);
}
// Turn on AVX if the instruction operates on XMM|YMM|ZMM registers and uses VEX|EVEX prefix.
if (tiedCount.getVec() && commonData.hasFlag(X86Inst::kFlagVex | X86Inst::kFlagEvex))
_avxEnabled = true;
}
const RegOnly& extraReg = node->getExtraReg();
if (extraReg.isValid()) {
uint32_t id = extraReg.getId();
if (cc()->isVirtRegValid(id)) {
VirtReg* vreg = cc()->getVirtRegById(id);
TiedReg* tied;
RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask);
if (options & (X86Inst::kOptionRep | X86Inst::kOptionRepnz)) {
tied->allocableRegs = Utils::mask(X86Gp::kIdCx);
tied->flags |= TiedReg::kXReg;
}
else {
tied->flags |= TiedReg::kRReg;
}
}
}
RA_FINALIZE(node_);
// Handle conditional/unconditional jump.
if (node->isJmpOrJcc()) {
CBJump* jNode = static_cast<CBJump*>(node);
CBLabel* jTarget = jNode->getTarget();
// If this jump is unconditional we put next node to unreachable node
// list so we can eliminate possible dead code. We have to do this in
// all cases since we are unable to translate without fetch() step.
//
// We also advance our node pointer to the target node to simulate
// natural flow of the function.
if (jNode->isJmp()) {
if (next && !next->hasPassData())
ASMJIT_PROPAGATE(addUnreachableNode(next));
// Jump not followed.
if (!jTarget) {
ASMJIT_PROPAGATE(addReturningNode(jNode));
goto _NextGroup;
}
node_ = jTarget;
goto _Do;
}
else {
// Jump not followed.
if (!jTarget) break;
if (jTarget->hasPassData()) {
uint32_t jTargetPosition = jTarget->getPosition();
// Update CBNode::kFlagIsTaken to true if this is a conditional
// backward jump. This behavior can be overridden by using
// `X86Inst::kOptionTaken` when the instruction is created.
if (!jNode->isTaken() && opCount == 1 && jTargetPosition <= position) {
jNode->_flags |= CBNode::kFlagIsTaken;
}
}
else if (next->hasPassData()) {
node_ = jTarget;
goto _Do;
}
else {
ASMJIT_PROPAGATE(addJccNode(jNode));
node_ = X86RAPass_getJccFlow(jNode);
goto _Do;
}
}
}
break;
}
// ----------------------------------------------------------------------
// [Func-Entry]
// ----------------------------------------------------------------------
case CBNode::kNodeFunc: {
ASMJIT_ASSERT(node_ == func);
X86RAPass_assignStackArgsRegId(this, func);
FuncDetail& fd = func->getDetail();
TiedReg* tied;
RA_DECLARE();
cc()->setCursor(node_);
X86Gp saReg;
uint32_t argCount = fd.getArgCount();
for (uint32_t i = 0; i < argCount; i++) {
const FuncDetail::Value& arg = fd.getArg(i);
VirtReg* vReg = func->getArg(i);
if (!vReg) continue;
// Overlapped function arguments.
if (vReg->_tied)
return DebugUtils::errored(kErrorOverlappedRegs);
uint32_t aKind = X86Reg::kindOf(arg.getRegType());
uint32_t vKind = vReg->getKind();
if (arg.byReg()) {
if (aKind == vKind) {
RA_INSERT(vReg, tied, TiedReg::kWReg, 0);
tied->setOutPhysId(arg.getRegId());
}
else {
X86Reg rTmp = cc()->newReg(arg.getTypeId(), "arg%u", i);
VirtReg* vTmp = cc()->getVirtReg(rTmp);
RA_INSERT(vTmp, tied, TiedReg::kWReg, 0);
tied->setOutPhysId(arg.getRegId());
X86Reg dstReg(X86Reg::fromSignature(vReg->getSignature(), vReg->getId()));
X86Reg srcReg(X86Reg::fromSignature(vTmp->getSignature(), vTmp->getId()));
// Emit conversion after the prolog.
return X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(cc()),
dstReg, vReg->getTypeId(),
srcReg, vTmp->getTypeId(), _avxEnabled);
}
}
else {
// Instead of complicating the prolog allocation we create a virtual
// register that holds the base address to all arguments passed by
// stack and then insert nodes that copy these arguments to registers.
if (!saReg.isValid()) {
saReg = cc()->newGpz("__args");
if (!saReg.isValid()) goto NoMem;
VirtReg* saBase = cc()->getVirtReg(saReg);
RA_INSERT(saBase, tied, TiedReg::kWReg, 0);
if (func->getFrameInfo().hasPreservedFP())
saBase->_isFixed = true;
tied->setOutPhysId(func->getFrameInfo().getStackArgsRegId());
}
// Argument passed by stack is handled after the prolog.
X86Gp aReg = X86Gp::fromSignature(vReg->getSignature(), vReg->getId());
X86Mem aMem = x86::ptr(saReg, arg.getStackOffset());
aMem.setArgHome();
ASMJIT_PROPAGATE(
X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(cc()),
aReg, vReg->getTypeId(), aMem, arg.getTypeId(), _avxEnabled));
}
}
// If saReg is not needed, clear it also from FuncFrameInfo.
if (!saReg.isValid())
func->getFrameInfo().setStackArgsRegId(Globals::kInvalidRegId);
RA_FINALIZE(node_);
next = node_->getNext();
break;
}
// ----------------------------------------------------------------------
// [End]
// ----------------------------------------------------------------------
case CBNode::kNodeSentinel: {
RA_POPULATE(node_);
ASMJIT_PROPAGATE(addReturningNode(node_));
goto _NextGroup;
}
// ----------------------------------------------------------------------
// [Func-Exit]
// ----------------------------------------------------------------------
case CBNode::kNodeFuncExit: {
CCFuncRet* node = static_cast<CCFuncRet*>(node_);
ASMJIT_PROPAGATE(addReturningNode(node));
FuncDetail& fd = func->getDetail();
RA_DECLARE();
if (fd.hasRet()) {
const FuncDetail::Value& ret = fd.getRet(0);
uint32_t retKind = X86Reg::kindOf(ret.getRegType());
for (uint32_t i = 0; i < 2; i++) {
Operand_* op = &node->_ret[i];
if (op->isVirtReg()) {
VirtReg* vreg = cc()->getVirtRegById(op->getId());
TiedReg* tied;
RA_MERGE(vreg, tied, 0, 0);
if (retKind == vreg->getKind()) {
tied->flags |= TiedReg::kRReg;
tied->inRegs = Utils::mask(ret.getRegId());
inRegs.or_(retKind, tied->inRegs);
}
else if (retKind == X86Reg::kKindFp) {
uint32_t fldFlag = ret.getTypeId() == TypeId::kF32 ? TiedReg::kX86Fld4 : TiedReg::kX86Fld8;
tied->flags |= TiedReg::kRMem | fldFlag;
}
else {
// TODO: Fix possible other return type conversions.
ASMJIT_NOT_REACHED();
}
}
}
}
RA_FINALIZE(node_);
if (!next->hasPassData())
ASMJIT_PROPAGATE(addUnreachableNode(next));
goto _NextGroup;
}
// ----------------------------------------------------------------------
// [Func-Call]
// ----------------------------------------------------------------------
case CBNode::kNodeFuncCall: {
CCFuncCall* node = static_cast<CCFuncCall*>(node_);
FuncDetail& fd = node->getDetail();
Operand_* target = node->_opArray;
Operand_* args = node->_args;
Operand_* rets = node->_ret;
func->getFrameInfo().enableCalls();
func->getFrameInfo().mergeCallFrameSize(fd.getArgStackSize());
// TODO: Each function frame should also define its stack arguments' alignment.
// func->getFrameInfo().mergeCallFrameAlignment();
uint32_t i;
uint32_t argCount = fd.getArgCount();
uint32_t sArgCount = 0;
uint32_t gpAllocableMask = gaRegs[X86Reg::kKindGp] & ~node->getDetail().getUsedRegs(X86Reg::kKindGp);
VirtReg* vreg;
TiedReg* tied;
RA_DECLARE();
// Function-call operand.
if (target->isVirtReg()) {
vreg = cc()->getVirtRegById(target->getId());
RA_MERGE(vreg, tied, 0, 0);
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
if (tied->inRegs == 0)
tied->allocableRegs |= gpAllocableMask;
}
else if (target->isMem()) {
X86Mem* m = static_cast<X86Mem*>(target);
if (m->hasBaseReg() && Operand::isPackedId(m->getBaseId())) {
vreg = cc()->getVirtRegById(m->getBaseId());
if (!vreg->isStack()) {
RA_MERGE(vreg, tied, 0, 0);
if (m->isRegHome()) {
tied->flags |= TiedReg::kRMem | TiedReg::kRCall;
}
else {
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
if (tied->inRegs == 0)
tied->allocableRegs |= gpAllocableMask;
}
}
}
if (m->hasIndexReg() && Operand::isPackedId(m->getIndexId())) {
// Restrict allocation to all registers except ESP/RSP.
vreg = cc()->getVirtRegById(m->getIndexId());
RA_MERGE(vreg, tied, 0, 0);
tied->flags |= TiedReg::kRReg | TiedReg::kRCall;
if ((tied->inRegs & ~indexMask) == 0)
tied->allocableRegs &= gpAllocableMask & indexMask;
}
}
// Function-call arguments.
for (i = 0; i < argCount; i++) {
Operand_* op = &args[i];
if (!op->isVirtReg()) continue;
vreg = cc()->getVirtRegById(op->getId());
const FuncDetail::Value& arg = fd.getArg(i);
if (arg.byReg()) {
RA_MERGE(vreg, tied, 0, 0);
uint32_t argClass = X86Reg::kindOf(arg.getRegType());
if (vreg->getKind() == argClass) {
tied->inRegs |= Utils::mask(arg.getRegId());
tied->flags |= TiedReg::kRReg | TiedReg::kRFunc;
}
else {
// TODO: Function-call argument conversion.
}
}
// If this is a stack-based argument we insert CCPushArg instead of
// using TiedReg. It improves the code, because the argument can be
// moved onto stack as soon as it is ready and the register used by
// the variable can be reused for something else. It is also much
// easier to handle argument conversions, because there will be at
// most only one node per conversion.
else {
if (X86RAPass_insertPushArg(this, node, vreg, gaRegs, arg, i, sArgList, sArgCount) != kErrorOk)
goto NoMem;
}
}
// Function-call returns.
for (i = 0; i < 2; i++) {
Operand_* op = &rets[i];
if (!op->isVirtReg()) continue;
const FuncDetail::Value& ret = fd.getRet(i);
if (ret.byReg()) {
uint32_t retKind = X86Reg::kindOf(ret.getRegType());
vreg = cc()->getVirtRegById(op->getId());
RA_MERGE(vreg, tied, 0, 0);
if (vreg->getKind() == retKind) {
tied->setOutPhysId(ret.getRegId());
tied->flags |= TiedReg::kWReg | TiedReg::kWFunc;
}
else {
// TODO: Function-call return value conversion.
}
}
}
// Init clobbered.
clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (fd.getPassedRegs(X86Reg::kKindGp ) | ~fd.getPreservedRegs(X86Reg::kKindGp )));
clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (fd.getPassedRegs(X86Reg::kKindMm ) | ~fd.getPreservedRegs(X86Reg::kKindMm )));
clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (fd.getPassedRegs(X86Reg::kKindK ) | ~fd.getPreservedRegs(X86Reg::kKindK )));
clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (fd.getPassedRegs(X86Reg::kKindVec) | ~fd.getPreservedRegs(X86Reg::kKindVec)));
RA_FINALIZE(node_);
break;
}
}
node_ = next;
} while (node_ != stop);
_Done:
// Mark exit label and end node as fetched, otherwise they can be removed by
// `removeUnreachableCode()`, which could lead to a crash in some later step.
node_ = func->getEnd();
if (!node_->hasPassData()) {
CBLabel* fExit = func->getExitNode();
RA_POPULATE(fExit);
fExit->setPosition(++position);
RA_POPULATE(node_);
node_->setPosition(++position);
}
return kErrorOk;
// --------------------------------------------------------------------------
// [Failure]
// --------------------------------------------------------------------------
NoMem:
return DebugUtils::errored(kErrorNoHeapMemory);
}
// ============================================================================
// [asmjit::X86RAPass - Annotate]
// ============================================================================
Error X86RAPass::annotate() {
#if !defined(ASMJIT_DISABLE_LOGGING)
CCFunc* func = getFunc();
CBNode* node_ = func;
CBNode* end = func->getEnd();
Zone& dataZone = cc()->_cbDataZone;
StringBuilderTmp<256> sb;
uint32_t maxLen = 0;
while (node_ && node_ != end) {
if (!node_->hasInlineComment()) {
if (node_->getType() == CBNode::kNodeInst) {
CBInst* node = static_cast<CBInst*>(node_);
Logging::formatInstruction(
sb,
0,
cc(),
cc()->getArchType(),
node->getInstDetail(), node->getOpArray(), node->getOpCount());
node_->setInlineComment(
static_cast<char*>(dataZone.dup(sb.getData(), sb.getLength(), true)));
maxLen = std::max<uint32_t>(maxLen, static_cast<uint32_t>(sb.getLength()));
sb.clear();
}
}
node_ = node_->getNext();
}
_annotationLength = maxLen + 1;
#endif // !ASMJIT_DISABLE_LOGGING
return kErrorOk;
}
// ============================================================================
// [asmjit::X86BaseAlloc]
// ============================================================================
struct X86BaseAlloc {
// --------------------------------------------------------------------------
// [Construction / Destruction]
// --------------------------------------------------------------------------
ASMJIT_INLINE X86BaseAlloc(X86RAPass* context) {
_context = context;
_cc = context->cc();
}
ASMJIT_INLINE ~X86BaseAlloc() {}
// --------------------------------------------------------------------------
// [Accessors]
// --------------------------------------------------------------------------
//! Get the context.
ASMJIT_INLINE X86RAPass* getContext() const { return _context; }
//! Get the current state (always the same instance as X86RAPass::_x86State).
ASMJIT_INLINE X86RAState* getState() const { return _context->getState(); }
//! Get the node.
ASMJIT_INLINE CBNode* getNode() const { return _node; }
//! Get TiedReg list (all).
ASMJIT_INLINE TiedReg* getTiedArray() const { return _tiedArray[0]; }
//! Get TiedReg list (per class).
ASMJIT_INLINE TiedReg* getTiedArrayByKind(uint32_t kind) const { return _tiedArray[kind]; }
//! Get TiedReg count (all).
ASMJIT_INLINE uint32_t getTiedCount() const { return _tiedTotal; }
//! Get TiedReg count (per class).
ASMJIT_INLINE uint32_t getTiedCountByKind(uint32_t kind) const { return _tiedCount.get(kind); }
//! Get if all variables of the given register `kind` are done.
ASMJIT_INLINE bool isTiedDone(uint32_t kind) const { return _tiedDone.get(kind) == _tiedCount.get(kind); }
//! Get how many variables have been allocated.
ASMJIT_INLINE uint32_t getTiedDone(uint32_t kind) const { return _tiedDone.get(kind); }
//! Add to the count of variables allocated.
ASMJIT_INLINE void addTiedDone(uint32_t kind, uint32_t n = 1) { _tiedDone.add(kind, n); }
//! Get number of allocable registers per class.
ASMJIT_INLINE uint32_t getGaRegs(uint32_t kind) const {
return _context->_gaRegs[kind];
}
// --------------------------------------------------------------------------
// [Init / Cleanup]
// --------------------------------------------------------------------------
protected:
// Just to prevent calling these methods by X86RAPass::translate().
ASMJIT_INLINE void init(CBNode* node, X86RAData* map);
ASMJIT_INLINE void cleanup();
// --------------------------------------------------------------------------
// [Unuse]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void unuseBefore();
template<int C>
ASMJIT_INLINE void unuseAfter();
// --------------------------------------------------------------------------
// [Members]
// --------------------------------------------------------------------------
//! RA context.
X86RAPass* _context;
//! Compiler.
X86Compiler* _cc;
//! Node.
CBNode* _node;
//! Register allocator (RA) data.
X86RAData* _raData;
//! TiedReg list (per register kind).
TiedReg* _tiedArray[Globals::kMaxVRegKinds];
//! Count of all TiedReg's.
uint32_t _tiedTotal;
//! TiedReg's total counter.
X86RegCount _tiedCount;
//! TiedReg's done counter.
X86RegCount _tiedDone;
};
// ============================================================================
// [asmjit::X86BaseAlloc - Init / Cleanup]
// ============================================================================
ASMJIT_INLINE void X86BaseAlloc::init(CBNode* node, X86RAData* raData) {
_node = node;
_raData = raData;
// We have to set the correct cursor in case any instruction is emitted
// during the allocation phase; it has to be emitted before the current
// instruction.
_cc->_setCursor(node->getPrev());
// Setup the lists of variables.
{
TiedReg* tied = raData->getTiedArray();
_tiedArray[X86Reg::kKindGp ] = tied;
_tiedArray[X86Reg::kKindMm ] = tied + raData->getTiedStart(X86Reg::kKindMm );
_tiedArray[X86Reg::kKindK ] = tied + raData->getTiedStart(X86Reg::kKindK );
_tiedArray[X86Reg::kKindVec] = tied + raData->getTiedStart(X86Reg::kKindVec);
}
// Setup counters.
_tiedTotal = raData->tiedTotal;
_tiedCount = raData->tiedCount;
_tiedDone.reset();
// Connect VREG->TIED.
for (uint32_t i = 0; i < _tiedTotal; i++) {
TiedReg* tied = &_tiedArray[0][i];
VirtReg* vreg = tied->vreg;
vreg->_tied = tied;
}
}
ASMJIT_INLINE void X86BaseAlloc::cleanup() {
// Disconnect VREG->TIED.
for (uint32_t i = 0; i < _tiedTotal; i++) {
TiedReg* tied = &_tiedArray[0][i];
VirtReg* vreg = tied->vreg;
vreg->_tied = nullptr;
}
}
// ============================================================================
// [asmjit::X86BaseAlloc - Unuse]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86BaseAlloc::unuseBefore() {
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
const uint32_t checkFlags = TiedReg::kXReg |
TiedReg::kRMem |
TiedReg::kRFunc |
TiedReg::kRCall ;
for (uint32_t i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
if ((tied->flags & checkFlags) == TiedReg::kWReg)
_context->unuse<C>(tied->vreg);
}
}
template<int C>
ASMJIT_INLINE void X86BaseAlloc::unuseAfter() {
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
for (uint32_t i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
if (tied->flags & TiedReg::kUnuse)
_context->unuse<C>(tied->vreg);
}
}
// ============================================================================
// [asmjit::X86VarAlloc]
// ============================================================================
//! \internal
//!
//! Register allocator context (asm instructions).
struct X86VarAlloc : public X86BaseAlloc {
// --------------------------------------------------------------------------
// [Construction / Destruction]
// --------------------------------------------------------------------------
ASMJIT_INLINE X86VarAlloc(X86RAPass* context) : X86BaseAlloc(context) {}
ASMJIT_INLINE ~X86VarAlloc() {}
// --------------------------------------------------------------------------
// [Run]
// --------------------------------------------------------------------------
Error run(CBNode* node);
// --------------------------------------------------------------------------
// [Init / Cleanup]
// --------------------------------------------------------------------------
protected:
// Just to prevent calling these methods by X86RAPass::translate().
ASMJIT_INLINE void init(CBNode* node, X86RAData* map);
ASMJIT_INLINE void cleanup();
// --------------------------------------------------------------------------
// [Plan / Spill / Alloc]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void plan();
template<int C>
ASMJIT_INLINE void spill();
template<int C>
ASMJIT_INLINE void alloc();
// --------------------------------------------------------------------------
// [GuessAlloc / GuessSpill]
// --------------------------------------------------------------------------
//! Guess which register is the best candidate for `vreg` from `allocableRegs`.
//!
//! The guess is based on looking ahead and inspecting register allocator
//! instructions. The main reason is to prevent allocation to a register
//! which is needed by next instruction(s). The guess look tries to go as far
//! as possible, after the remaining registers are zero, the mask of previous
//! registers (called 'safeRegs') is returned.
template<int C>
ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs);
//! Guess whether to move the given `vreg` instead of spill.
template<int C>
ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs);
// --------------------------------------------------------------------------
// [Modified]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void modified();
// --------------------------------------------------------------------------
// [Members]
// --------------------------------------------------------------------------
//! Will alloc to these registers.
X86RegMask _willAlloc;
//! Will spill these registers.
X86RegMask _willSpill;
};
// ============================================================================
// [asmjit::X86VarAlloc - Run]
// ============================================================================
Error X86VarAlloc::run(CBNode* node_) {
// Initialize.
X86RAData* raData = node_->getPassData<X86RAData>();
// Initialize the allocator; connect Vd->Va.
init(node_, raData);
if (raData->tiedTotal != 0) {
// Unuse overwritten variables.
unuseBefore<X86Reg::kKindGp>();
unuseBefore<X86Reg::kKindMm>();
unuseBefore<X86Reg::kKindVec>();
// Plan the allocation. Planner assigns input/output registers for each
// variable and decides whether to allocate it in register or stack.
plan<X86Reg::kKindGp>();
plan<X86Reg::kKindMm>();
plan<X86Reg::kKindVec>();
// Spill all variables marked by plan().
spill<X86Reg::kKindGp>();
spill<X86Reg::kKindMm>();
spill<X86Reg::kKindVec>();
// Alloc all variables marked by plan().
alloc<X86Reg::kKindGp>();
alloc<X86Reg::kKindMm>();
alloc<X86Reg::kKindVec>();
// Translate node operands.
if (node_->getType() == CBNode::kNodeInst) {
CBInst* node = static_cast<CBInst*>(node_);
if (node->hasExtraReg()) {
Reg reg = node->getExtraReg().toReg<Reg>();
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, &reg, 1));
node->setExtraReg(reg);
}
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount()));
}
else if (node_->getType() == CBNode::kNodePushArg) {
CCPushArg* node = static_cast<CCPushArg*>(node_);
CCFuncCall* call = static_cast<CCFuncCall*>(node->getCall());
FuncDetail& fd = call->getDetail();
uint32_t argIndex = 0;
uint32_t argMask = node->_args;
VirtReg* cvtReg = node->getCvtReg();
VirtReg* srcReg = node->getSrcReg();
// Convert first.
ASMJIT_ASSERT(srcReg->getPhysId() != Globals::kInvalidRegId);
if (cvtReg) {
ASMJIT_ASSERT(cvtReg->getPhysId() != Globals::kInvalidRegId);
X86Reg dstOp(X86Reg::fromSignature(cvtReg->getSignature(), cvtReg->getId()));
X86Reg srcOp(X86Reg::fromSignature(srcReg->getSignature(), srcReg->getId()));
// Emit conversion after the prolog.
X86Internal::emitArgMove(reinterpret_cast<X86Emitter*>(_context->cc()),
dstOp, cvtReg->getTypeId(),
srcOp, srcReg->getTypeId(), _context->_avxEnabled);
srcReg = cvtReg;
}
while (argMask != 0) {
if (argMask & 0x1) {
FuncDetail::Value& arg = fd.getArg(argIndex);
ASMJIT_ASSERT(arg.byStack());
X86Mem dst = x86::ptr(_context->_zsp, -static_cast<int>(_context->getGpSize()) + arg.getStackOffset());
_context->emitRegToStack(arg.getTypeId(), &dst, srcReg->getTypeId(), srcReg->getPhysId());
}
argIndex++;
argMask >>= 1;
}
}
// Mark variables as modified.
modified<X86Reg::kKindGp>();
modified<X86Reg::kKindMm>();
modified<X86Reg::kKindVec>();
// Cleanup; disconnect Vd->Va.
cleanup();
// Update clobbered mask.
_context->_clobberedRegs.or_(_willAlloc);
}
// Update clobbered mask.
_context->_clobberedRegs.or_(raData->clobberedRegs);
// Unuse.
if (raData->tiedTotal != 0) {
unuseAfter<X86Reg::kKindGp>();
unuseAfter<X86Reg::kKindMm>();
unuseAfter<X86Reg::kKindVec>();
}
return kErrorOk;
}
// ============================================================================
// [asmjit::X86VarAlloc - Init / Cleanup]
// ============================================================================
ASMJIT_INLINE void X86VarAlloc::init(CBNode* node, X86RAData* raData) {
X86BaseAlloc::init(node, raData);
// These will block planner from assigning them during planning. Planner will
// add more registers when assigning registers to variables that don't need
// any specific register.
_willAlloc = raData->inRegs;
_willAlloc.or_(raData->outRegs);
_willSpill.reset();
}
ASMJIT_INLINE void X86VarAlloc::cleanup() {
X86BaseAlloc::cleanup();
}
// ============================================================================
// [asmjit::X86VarAlloc - Plan / Spill / Alloc]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86VarAlloc::plan() {
if (isTiedDone(C)) return;
uint32_t i;
uint32_t willAlloc = _willAlloc.get(C);
uint32_t willFree = 0;
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
X86RAState* state = getState();
// Calculate 'willAlloc' and 'willFree' masks based on mandatory masks.
for (i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
uint32_t vaFlags = tied->flags;
uint32_t physId = vreg->getPhysId();
uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0;
if ((vaFlags & TiedReg::kXReg) != 0) {
// Planning register allocation. First check whether the variable is
// already allocated in register and if it can stay allocated there.
//
// The following conditions may happen:
//
// a) Allocated register is one of the mandatoryRegs.
// b) Allocated register is one of the allocableRegs.
uint32_t mandatoryRegs = tied->inRegs;
uint32_t allocableRegs = tied->allocableRegs;
if (regMask != 0) {
// Special path for planning output-only registers.
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
uint32_t outPhysId = tied->outPhysId;
mandatoryRegs = (outPhysId != Globals::kInvalidRegId) ? Utils::mask(outPhysId) : 0;
if ((mandatoryRegs | allocableRegs) & regMask) {
tied->setOutPhysId(physId);
tied->flags |= TiedReg::kWDone;
if (mandatoryRegs & regMask) {
// Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's.
ASMJIT_ASSERT((willAlloc & regMask) != 0);
}
else {
// Case 'b'.
tied->setOutPhysId(physId);
willAlloc |= regMask;
}
addTiedDone(C);
continue;
}
}
else {
if ((mandatoryRegs | allocableRegs) & regMask) {
tied->setInPhysId(physId);
tied->flags |= TiedReg::kRDone;
if (mandatoryRegs & regMask) {
// Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's.
ASMJIT_ASSERT((willAlloc & regMask) != 0);
}
else {
// Case 'b'.
tied->inRegs |= regMask;
willAlloc |= regMask;
}
addTiedDone(C);
continue;
}
}
}
// Variable is not allocated or allocated in register that doesn't
// match inRegs or allocableRegs. The next step is to pick the best
// register for this variable. If `inRegs` contains any register the
// decision is simple - we have to follow, in other case will use
// the advantage of `guessAlloc()` to find a register (or registers)
// by looking ahead. But the best way to find a good register is not
// here since now we have no information about the registers that
// will be freed. So instead of finding register here, we just mark
// the current register (if variable is allocated) as `willFree` so
// the planner can use this information in the second step to plan the
// allocation as a whole.
willFree |= regMask;
continue;
}
else {
if (regMask != 0) {
willFree |= regMask;
continue;
}
else {
tied->flags |= TiedReg::kRDone;
addTiedDone(C);
continue;
}
}
}
// Occupied registers without 'willFree' registers; contains basically
// all the registers we can use to allocate variables without inRegs
// specified.
uint32_t occupied = state->_occupied.get(C) & ~willFree;
uint32_t willSpill = 0;
// Find the best registers for variables that are not allocated yet.
for (i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
uint32_t vaFlags = tied->flags;
if ((vaFlags & TiedReg::kXReg) != 0) {
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
if (vaFlags & TiedReg::kWDone)
continue;
// Skip all registers that have assigned outPhysId. Spill if occupied.
if (tied->hasOutPhysId()) {
uint32_t outRegs = Utils::mask(tied->outPhysId);
willSpill |= occupied & outRegs;
continue;
}
}
else {
if (vaFlags & TiedReg::kRDone)
continue;
// We skip all registers that have assigned inPhysId, indicates that
// the register to allocate in is known.
if (tied->hasInPhysId()) {
uint32_t inRegs = tied->inRegs;
willSpill |= occupied & inRegs;
continue;
}
}
uint32_t m = tied->inRegs;
if (tied->hasOutPhysId())
m |= Utils::mask(tied->outPhysId);
m = tied->allocableRegs & ~(willAlloc ^ m);
m = guessAlloc<C>(vreg, m);
ASMJIT_ASSERT(m != 0);
uint32_t candidateRegs = m & ~occupied;
uint32_t homeMask = vreg->getHomeMask();
uint32_t physId;
uint32_t regMask;
if (candidateRegs == 0) {
candidateRegs = m & occupied & ~state->_modified.get(C);
if (candidateRegs == 0)
candidateRegs = m;
}
if (candidateRegs & homeMask) candidateRegs &= homeMask;
physId = Utils::findFirstBit(candidateRegs);
regMask = Utils::mask(physId);
if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) {
tied->setOutPhysId(physId);
}
else {
tied->setInPhysId(physId);
tied->inRegs = regMask;
}
willAlloc |= regMask;
willSpill |= regMask & occupied;
willFree &=~regMask;
occupied |= regMask;
continue;
}
else if ((vaFlags & TiedReg::kXMem) != 0) {
uint32_t physId = vreg->getPhysId();
if (physId != Globals::kInvalidRegId && (vaFlags & TiedReg::kXMem) != TiedReg::kWMem) {
willSpill |= Utils::mask(physId);
}
}
}
// Set calculated masks back to the allocator; needed by spill() and alloc().
_willSpill.set(C, willSpill);
_willAlloc.set(C, willAlloc);
}
template<int C>
ASMJIT_INLINE void X86VarAlloc::spill() {
uint32_t m = _willSpill.get(C);
uint32_t i = static_cast<uint32_t>(0) - 1;
if (m == 0) return;
X86RAState* state = getState();
VirtReg** vregs = state->getListByKind(C);
// Available registers for decision if move has any benefit over spill.
uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C));
do {
// We always advance one more to destroy the bit that we have found.
uint32_t bitIndex = Utils::findFirstBit(m) + 1;
i += bitIndex;
m >>= bitIndex;
VirtReg* vreg = vregs[i];
ASMJIT_ASSERT(vreg);
TiedReg* tied = vreg->_tied;
ASMJIT_ASSERT(!tied || (tied->flags & TiedReg::kXReg) == 0);
if (vreg->isModified() && availableRegs) {
// Don't check for alternatives if the variable has to be spilled.
if (!tied || (tied->flags & TiedReg::kSpill) == 0) {
uint32_t altRegs = guessSpill<C>(vreg, availableRegs);
if (altRegs != 0) {
uint32_t physId = Utils::findFirstBit(altRegs);
uint32_t regMask = Utils::mask(physId);
_context->move<C>(vreg, physId);
availableRegs ^= regMask;
continue;
}
}
}
_context->spill<C>(vreg);
} while (m != 0);
}
template<int C>
ASMJIT_INLINE void X86VarAlloc::alloc() {
if (isTiedDone(C)) return;
uint32_t i;
bool didWork;
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
// Alloc `in` regs.
do {
didWork = false;
for (i = 0; i < tiedCount; i++) {
TiedReg* aTied = &tiedArray[i];
VirtReg* aVReg = aTied->vreg;
if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg)
continue;
uint32_t aPhysId = aVReg->getPhysId();
uint32_t bPhysId = aTied->inPhysId;
// Shouldn't be the same.
ASMJIT_ASSERT(aPhysId != bPhysId);
VirtReg* bVReg = getState()->getListByKind(C)[bPhysId];
if (bVReg) {
// Gp registers only - Swap two registers if we can solve two
// allocation tasks by a single 'xchg' instruction, swapping
// two registers required by the instruction/node or one register
// required with another non-required.
// Uses xor swap for Vec registers.
if ((C == X86Reg::kKindGp || C == X86Reg::kKindVec) && aPhysId != Globals::kInvalidRegId) {
TiedReg* bTied = bVReg->_tied;
if (C == X86Reg::kKindGp)
_context->swapGp(aVReg, bVReg);
else
_context->swapVec(aVReg, bVReg);
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
// Double-hit, two registers allocated by a single xchg.
if (bTied && bTied->inPhysId == aPhysId) {
bTied->flags |= TiedReg::kRDone;
addTiedDone(C);
}
didWork = true;
continue;
}
}
else if (aPhysId != Globals::kInvalidRegId) {
_context->move<C>(aVReg, bPhysId);
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
didWork = true;
continue;
}
else {
_context->alloc<C>(aVReg, bPhysId);
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
didWork = true;
continue;
}
}
} while (didWork);
// Alloc 'out' regs.
for (i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
if ((tied->flags & (TiedReg::kXReg | TiedReg::kWDone)) != TiedReg::kWReg)
continue;
uint32_t physId = tied->outPhysId;
ASMJIT_ASSERT(physId != Globals::kInvalidRegId);
if (vreg->getPhysId() != physId) {
ASMJIT_ASSERT(getState()->getListByKind(C)[physId] == nullptr);
_context->attach<C>(vreg, physId, false);
}
tied->flags |= TiedReg::kWDone;
addTiedDone(C);
}
}
// ============================================================================
// [asmjit::X86VarAlloc - GuessAlloc / GuessSpill]
// ============================================================================
template<int C>
ASMJIT_INLINE uint32_t X86VarAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) {
ASMJIT_ASSERT(allocableRegs != 0);
// Stop now if there is only one bit (register) set in `allocableRegs` mask.
if (Utils::isPowerOf2(allocableRegs)) return allocableRegs;
uint32_t raId = vreg->_raId;
uint32_t safeRegs = allocableRegs;
uint32_t i;
uint32_t maxLookAhead = kCompilerDefaultLookAhead;
// Look ahead and calculate mask of special registers on both - input/output.
CBNode* node = _node;
for (i = 0; i < maxLookAhead; i++) {
X86RAData* raData = node->getPassData<X86RAData>();
RABits* liveness = raData ? raData->liveness : static_cast<RABits*>(nullptr);
// If the variable becomes dead it doesn't make sense to continue.
if (liveness && !liveness->getBit(raId)) break;
// Stop on `CBSentinel` and `CCFuncRet`.
if (node->hasFlag(CBNode::kFlagIsRet)) break;
// Stop on conditional jump, we don't follow them.
if (node->hasFlag(CBNode::kFlagIsJcc)) break;
// Advance on non-conditional jump.
if (node->hasFlag(CBNode::kFlagIsJmp)) {
node = static_cast<CBJump*>(node)->getTarget();
// Stop on jump that is not followed.
if (!node) break;
}
node = node->getNext();
ASMJIT_ASSERT(node != nullptr);
raData = node->getPassData<X86RAData>();
if (raData) {
TiedReg* tied = raData->findTiedByKind(C, vreg);
uint32_t mask;
if (tied) {
// If the variable is overwritten it doesn't make sense to continue.
if ((tied->flags & TiedReg::kRAll) == 0)
break;
mask = tied->allocableRegs;
if (mask != 0) {
allocableRegs &= mask;
if (allocableRegs == 0) break;
safeRegs = allocableRegs;
}
mask = tied->inRegs;
if (mask != 0) {
allocableRegs &= mask;
if (allocableRegs == 0) break;
safeRegs = allocableRegs;
break;
}
allocableRegs &= ~(raData->outRegs.get(C) | raData->clobberedRegs.get(C));
if (allocableRegs == 0) break;
}
else {
allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C));
if (allocableRegs == 0) break;
}
safeRegs = allocableRegs;
}
}
return safeRegs;
}
template<int C>
ASMJIT_INLINE uint32_t X86VarAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) {
ASMJIT_ASSERT(allocableRegs != 0);
return 0;
}
// ============================================================================
// [asmjit::X86VarAlloc - Modified]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86VarAlloc::modified() {
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
for (uint32_t i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
if (tied->flags & TiedReg::kWReg) {
VirtReg* vreg = tied->vreg;
uint32_t physId = vreg->getPhysId();
uint32_t regMask = Utils::mask(physId);
vreg->setModified(true);
_context->_x86State._modified.or_(C, regMask);
}
}
}
// ============================================================================
// [asmjit::X86CallAlloc]
// ============================================================================
//! \internal
//!
//! Register allocator context (function call).
struct X86CallAlloc : public X86BaseAlloc {
// --------------------------------------------------------------------------
// [Construction / Destruction]
// --------------------------------------------------------------------------
ASMJIT_INLINE X86CallAlloc(X86RAPass* context) : X86BaseAlloc(context) {}
ASMJIT_INLINE ~X86CallAlloc() {}
// --------------------------------------------------------------------------
// [Accessors]
// --------------------------------------------------------------------------
//! Get the node.
ASMJIT_INLINE CCFuncCall* getNode() const { return static_cast<CCFuncCall*>(_node); }
// --------------------------------------------------------------------------
// [Run]
// --------------------------------------------------------------------------
Error run(CCFuncCall* node);
// --------------------------------------------------------------------------
// [Init / Cleanup]
// --------------------------------------------------------------------------
protected:
// Just to prevent calling these methods from X86RAPass::translate().
ASMJIT_INLINE void init(CCFuncCall* node, X86RAData* raData);
ASMJIT_INLINE void cleanup();
// --------------------------------------------------------------------------
// [Plan / Alloc / Spill / Move]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void plan();
template<int C>
ASMJIT_INLINE void spill();
template<int C>
ASMJIT_INLINE void alloc();
// --------------------------------------------------------------------------
// [AllocImmsOnStack]
// --------------------------------------------------------------------------
ASMJIT_INLINE void allocImmsOnStack();
// --------------------------------------------------------------------------
// [Duplicate]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void duplicate();
// --------------------------------------------------------------------------
// [GuessAlloc / GuessSpill]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs);
template<int C>
ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs);
// --------------------------------------------------------------------------
// [Save]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void save();
// --------------------------------------------------------------------------
// [Clobber]
// --------------------------------------------------------------------------
template<int C>
ASMJIT_INLINE void clobber();
// --------------------------------------------------------------------------
// [Ret]
// --------------------------------------------------------------------------
ASMJIT_INLINE void ret();
// --------------------------------------------------------------------------
// [Members]
// --------------------------------------------------------------------------
//! Will alloc to these registers.
X86RegMask _willAlloc;
//! Will spill these registers.
X86RegMask _willSpill;
};
// ============================================================================
// [asmjit::X86CallAlloc - Run]
// ============================================================================
Error X86CallAlloc::run(CCFuncCall* node) {
// Initialize the allocator; prepare basics and connect Vd->Va.
X86RAData* raData = node->getPassData<X86RAData>();
init(node, raData);
// Plan register allocation. Planner is only able to assign one register per
// variable. If any variable is used multiple times it will be handled later.
plan<X86Reg::kKindGp >();
plan<X86Reg::kKindMm >();
plan<X86Reg::kKindVec>();
// Spill.
spill<X86Reg::kKindGp >();
spill<X86Reg::kKindMm >();
spill<X86Reg::kKindVec>();
// Alloc.
alloc<X86Reg::kKindGp >();
alloc<X86Reg::kKindMm >();
alloc<X86Reg::kKindVec>();
// Unuse clobbered registers that are not used to pass function arguments and
// save variables used to pass function arguments that will be reused later on.
save<X86Reg::kKindGp >();
save<X86Reg::kKindMm >();
save<X86Reg::kKindVec>();
// Allocate immediates in registers and on the stack.
allocImmsOnStack();
// Duplicate.
duplicate<X86Reg::kKindGp >();
duplicate<X86Reg::kKindMm >();
duplicate<X86Reg::kKindVec>();
// Translate call operand.
ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount()));
// To emit instructions after call.
_cc->_setCursor(node);
// If the callee pops stack it has to be manually adjusted back.
FuncDetail& fd = node->getDetail();
if (fd.hasFlag(CallConv::kFlagCalleePopsStack) && fd.getArgStackSize() != 0)
_cc->emit(X86Inst::kIdSub, _context->_zsp, static_cast<int>(fd.getArgStackSize()));
// Clobber.
clobber<X86Reg::kKindGp >();
clobber<X86Reg::kKindMm >();
clobber<X86Reg::kKindVec>();
// Return.
ret();
// Unuse.
unuseAfter<X86Reg::kKindGp >();
unuseAfter<X86Reg::kKindMm >();
unuseAfter<X86Reg::kKindVec>();
// Cleanup; disconnect Vd->Va.
cleanup();
return kErrorOk;
}
// ============================================================================
// [asmjit::X86CallAlloc - Init / Cleanup]
// ============================================================================
ASMJIT_INLINE void X86CallAlloc::init(CCFuncCall* node, X86RAData* raData) {
X86BaseAlloc::init(node, raData);
// Create mask of all registers that will be used to pass function arguments.
_willAlloc.reset();
_willAlloc.set(X86Reg::kKindGp , node->getDetail().getUsedRegs(X86Reg::kKindGp ));
_willAlloc.set(X86Reg::kKindMm , node->getDetail().getUsedRegs(X86Reg::kKindMm ));
_willAlloc.set(X86Reg::kKindK , node->getDetail().getUsedRegs(X86Reg::kKindK ));
_willAlloc.set(X86Reg::kKindVec, node->getDetail().getUsedRegs(X86Reg::kKindVec));
_willSpill.reset();
}
ASMJIT_INLINE void X86CallAlloc::cleanup() {
X86BaseAlloc::cleanup();
}
// ============================================================================
// [asmjit::X86CallAlloc - Plan / Spill / Alloc]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86CallAlloc::plan() {
uint32_t i;
uint32_t clobbered = _raData->clobberedRegs.get(C);
uint32_t willAlloc = _willAlloc.get(C);
uint32_t willFree = clobbered & ~willAlloc;
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
X86RAState* state = getState();
// Calculate 'willAlloc' and 'willFree' masks based on mandatory masks.
for (i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
uint32_t vaFlags = tied->flags;
uint32_t physId = vreg->getPhysId();
uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0;
if ((vaFlags & TiedReg::kRReg) != 0) {
// Planning register allocation. First check whether the variable is
// already allocated in register and if it can stay there. Function
// arguments are passed either in a specific register or in stack so
// we care mostly of mandatory registers.
uint32_t inRegs = tied->inRegs;
if (inRegs == 0) {
inRegs = tied->allocableRegs;
}
// Optimize situation where the variable has to be allocated in a
// mandatory register, but it's already allocated in register that
// is not clobbered (i.e. it will survive function call).
if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & TiedReg::kUnuse) == 0)) {
tied->setInPhysId(physId);
tied->flags |= TiedReg::kRDone;
addTiedDone(C);
}
else {
willFree |= regMask;
}
}
else {
// Memory access - if variable is allocated it has to be freed.
if (regMask != 0) {
willFree |= regMask;
}
else {
tied->flags |= TiedReg::kRDone;
addTiedDone(C);
}
}
}
// Occupied registers without 'willFree' registers; contains basically
// all the registers we can use to allocate variables without inRegs
// speficied.
uint32_t occupied = state->_occupied.get(C) & ~willFree;
uint32_t willSpill = 0;
// Find the best registers for variables that are not allocated yet. Only
// useful for Gp registers used as call operand.
for (i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
uint32_t vaFlags = tied->flags;
if ((vaFlags & TiedReg::kRDone) != 0 || (vaFlags & TiedReg::kRReg) == 0)
continue;
// All registers except Gp used by call itself must have inPhysId.
uint32_t m = tied->inRegs;
if (C != X86Reg::kKindGp || m) {
ASMJIT_ASSERT(m != 0);
tied->setInPhysId(Utils::findFirstBit(m));
willSpill |= occupied & m;
continue;
}
m = tied->allocableRegs & ~(willAlloc ^ m);
m = guessAlloc<C>(vreg, m);
ASMJIT_ASSERT(m != 0);
uint32_t candidateRegs = m & ~occupied;
if (candidateRegs == 0) {
candidateRegs = m & occupied & ~state->_modified.get(C);
if (candidateRegs == 0)
candidateRegs = m;
}
if (!(vaFlags & (TiedReg::kWReg | TiedReg::kUnuse)) && (candidateRegs & ~clobbered))
candidateRegs &= ~clobbered;
uint32_t physId = Utils::findFirstBit(candidateRegs);
uint32_t regMask = Utils::mask(physId);
tied->setInPhysId(physId);
tied->inRegs = regMask;
willAlloc |= regMask;
willSpill |= regMask & occupied;
willFree &= ~regMask;
occupied |= regMask;
continue;
}
// Set calculated masks back to the allocator; needed by spill() and alloc().
_willSpill.set(C, willSpill);
_willAlloc.set(C, willAlloc);
}
template<int C>
ASMJIT_INLINE void X86CallAlloc::spill() {
uint32_t m = _willSpill.get(C);
uint32_t i = static_cast<uint32_t>(0) - 1;
if (m == 0)
return;
X86RAState* state = getState();
VirtReg** sVars = state->getListByKind(C);
// Available registers for decision if move has any benefit over spill.
uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C));
do {
// We always advance one more to destroy the bit that we have found.
uint32_t bitIndex = Utils::findFirstBit(m) + 1;
i += bitIndex;
m >>= bitIndex;
VirtReg* vreg = sVars[i];
ASMJIT_ASSERT(vreg && !vreg->_tied);
if (vreg->isModified() && availableRegs) {
uint32_t available = guessSpill<C>(vreg, availableRegs);
if (available != 0) {
uint32_t physId = Utils::findFirstBit(available);
uint32_t regMask = Utils::mask(physId);
_context->move<C>(vreg, physId);
availableRegs ^= regMask;
continue;
}
}
_context->spill<C>(vreg);
} while (m != 0);
}
template<int C>
ASMJIT_INLINE void X86CallAlloc::alloc() {
if (isTiedDone(C)) return;
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
uint32_t i;
bool didWork;
do {
didWork = false;
for (i = 0; i < tiedCount; i++) {
TiedReg* aTied = &tiedArray[i];
VirtReg* aVReg = aTied->vreg;
if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg) continue;
uint32_t sPhysId = aVReg->getPhysId();
uint32_t bPhysId = aTied->inPhysId;
// Shouldn't be the same.
ASMJIT_ASSERT(sPhysId != bPhysId);
VirtReg* bVReg = getState()->getListByKind(C)[bPhysId];
if (bVReg) {
TiedReg* bTied = bVReg->_tied;
// GP registers only - Swap two registers if we can solve two
// allocation tasks by a single 'xchg' instruction, swapping
// two registers required by the instruction/node or one register
// required with another non-required.
if ((C == X86Reg::kKindGp || C == X86Reg::kKindVec) && sPhysId != Globals::kInvalidRegId) {
if (C == X86Reg::kKindGp)
_context->swapGp(aVReg, bVReg);
else
_context->swapVec(aVReg, bVReg);
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
// Double-hit, two registers allocated by a single swap.
if (bTied && bTied->inPhysId == sPhysId) {
bTied->flags |= TiedReg::kRDone;
addTiedDone(C);
}
didWork = true;
continue;
}
}
else if (sPhysId != Globals::kInvalidRegId) {
_context->move<C>(aVReg, bPhysId);
_context->_clobberedRegs.or_(C, Utils::mask(bPhysId));
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
didWork = true;
continue;
}
else {
_context->alloc<C>(aVReg, bPhysId);
_context->_clobberedRegs.or_(C, Utils::mask(bPhysId));
aTied->flags |= TiedReg::kRDone;
addTiedDone(C);
didWork = true;
continue;
}
}
} while (didWork);
}
// ============================================================================
// [asmjit::X86CallAlloc - AllocImmsOnStack]
// ============================================================================
ASMJIT_INLINE void X86CallAlloc::allocImmsOnStack() {
CCFuncCall* node = getNode();
FuncDetail& fd = node->getDetail();
uint32_t argCount = fd.getArgCount();
Operand_* args = node->_args;
for (uint32_t i = 0; i < argCount; i++) {
Operand_& op = args[i];
if (!op.isImm()) continue;
const Imm& imm = static_cast<const Imm&>(op);
const FuncDetail::Value& arg = fd.getArg(i);
uint32_t varType = arg.getTypeId();
if (arg.byReg()) {
_context->emitImmToReg(varType, arg.getRegId(), &imm);
}
else {
X86Mem dst = x86::ptr(_context->_zsp, -static_cast<int>(_context->getGpSize()) + arg.getStackOffset());
_context->emitImmToStack(varType, &dst, &imm);
}
}
}
// ============================================================================
// [asmjit::X86CallAlloc - Duplicate]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86CallAlloc::duplicate() {
TiedReg* tiedArray = getTiedArrayByKind(C);
uint32_t tiedCount = getTiedCountByKind(C);
for (uint32_t i = 0; i < tiedCount; i++) {
TiedReg* tied = &tiedArray[i];
if ((tied->flags & TiedReg::kRReg) == 0) continue;
uint32_t inRegs = tied->inRegs;
if (!inRegs) continue;
VirtReg* vreg = tied->vreg;
uint32_t physId = vreg->getPhysId();
ASMJIT_ASSERT(physId != Globals::kInvalidRegId);
inRegs &= ~Utils::mask(physId);
if (!inRegs) continue;
for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) {
if (inRegs & 0x1) {
_context->emitMove(vreg, dupIndex, physId, "Duplicate");
_context->_clobberedRegs.or_(C, Utils::mask(dupIndex));
}
}
}
}
// ============================================================================
// [asmjit::X86CallAlloc - GuessAlloc / GuessSpill]
// ============================================================================
template<int C>
ASMJIT_INLINE uint32_t X86CallAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) {
ASMJIT_ASSERT(allocableRegs != 0);
// Stop now if there is only one bit (register) set in 'allocableRegs' mask.
if (Utils::isPowerOf2(allocableRegs))
return allocableRegs;
uint32_t i;
uint32_t safeRegs = allocableRegs;
uint32_t maxLookAhead = kCompilerDefaultLookAhead;
// Look ahead and calculate mask of special registers on both - input/output.
CBNode* node = _node;
for (i = 0; i < maxLookAhead; i++) {
// Stop on `CCFuncRet` and `CBSentinel`.
if (node->hasFlag(CBNode::kFlagIsRet))
break;
// Stop on conditional jump, we don't follow them.
if (node->hasFlag(CBNode::kFlagIsJcc))
break;
// Advance on non-conditional jump.
if (node->hasFlag(CBNode::kFlagIsJmp)) {
node = static_cast<CBJump*>(node)->getTarget();
// Stop on jump that is not followed.
if (!node) break;
}
node = node->getNext();
ASMJIT_ASSERT(node != nullptr);
X86RAData* raData = node->getPassData<X86RAData>();
if (raData) {
TiedReg* tied = raData->findTiedByKind(C, vreg);
if (tied) {
uint32_t inRegs = tied->inRegs;
if (inRegs != 0) {
safeRegs = allocableRegs;
allocableRegs &= inRegs;
if (allocableRegs == 0)
goto _UseSafeRegs;
else
return allocableRegs;
}
}
safeRegs = allocableRegs;
allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C));
if (allocableRegs == 0)
break;
}
}
_UseSafeRegs:
return safeRegs;
}
template<int C>
ASMJIT_INLINE uint32_t X86CallAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) {
ASMJIT_ASSERT(allocableRegs != 0);
return 0;
}
// ============================================================================
// [asmjit::X86CallAlloc - Save]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86CallAlloc::save() {
X86RAState* state = getState();
VirtReg** sVars = state->getListByKind(C);
uint32_t i;
uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C) & state->_modified.get(C);
for (i = 0; affected != 0; i++, affected >>= 1) {
if (affected & 0x1) {
VirtReg* vreg = sVars[i];
ASMJIT_ASSERT(vreg != nullptr);
ASMJIT_ASSERT(vreg->isModified());
TiedReg* tied = vreg->_tied;
if (!tied || (tied->flags & (TiedReg::kWReg | TiedReg::kUnuse)) == 0)
_context->save<C>(vreg);
}
}
}
// ============================================================================
// [asmjit::X86CallAlloc - Clobber]
// ============================================================================
template<int C>
ASMJIT_INLINE void X86CallAlloc::clobber() {
X86RAState* state = getState();
VirtReg** sVars = state->getListByKind(C);
uint32_t i;
uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C);
for (i = 0; affected != 0; i++, affected >>= 1) {
if (affected & 0x1) {
VirtReg* vreg = sVars[i];
ASMJIT_ASSERT(vreg != nullptr);
TiedReg* tied = vreg->_tied;
uint32_t vdState = VirtReg::kStateNone;
if (!vreg->isModified() || (tied && (tied->flags & (TiedReg::kWAll | TiedReg::kUnuse)) != 0))
vdState = VirtReg::kStateMem;
_context->unuse<C>(vreg, vdState);
}
}
}
// ============================================================================
// [asmjit::X86CallAlloc - Ret]
// ============================================================================
ASMJIT_INLINE void X86CallAlloc::ret() {
CCFuncCall* node = getNode();
FuncDetail& fd = node->getDetail();
Operand_* rets = node->_ret;
for (uint32_t i = 0; i < 2; i++) {
const FuncDetail::Value& ret = fd.getRet(i);
Operand_* op = &rets[i];
if (!ret.byReg() || !op->isVirtReg())
continue;
VirtReg* vreg = _cc->getVirtRegById(op->getId());
uint32_t regId = ret.getRegId();
switch (vreg->getKind()) {
case X86Reg::kKindGp:
_context->unuse<X86Reg::kKindGp>(vreg);
_context->attach<X86Reg::kKindGp>(vreg, regId, true);
break;
case X86Reg::kKindMm:
_context->unuse<X86Reg::kKindMm>(vreg);
_context->attach<X86Reg::kKindMm>(vreg, regId, true);
break;
case X86Reg::kKindVec:
if (X86Reg::kindOf(ret.getRegType()) == X86Reg::kKindVec) {
_context->unuse<X86Reg::kKindVec>(vreg);
_context->attach<X86Reg::kKindVec>(vreg, regId, true);
}
else {
uint32_t elementId = TypeId::elementOf(vreg->getTypeId());
uint32_t size = (elementId == TypeId::kF32) ? 4 : 8;
X86Mem m = _context->getVarMem(vreg);
m.setSize(size);
_context->unuse<X86Reg::kKindVec>(vreg, VirtReg::kStateMem);
_cc->fstp(m);
}
break;
}
}
}
// ============================================================================
// [asmjit::X86RAPass - TranslateOperands]
// ============================================================================
//! \internal
static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount) {
X86Compiler* cc = self->cc();
// Translate variables into registers.
for (uint32_t i = 0; i < opCount; i++) {
Operand_* op = &opArray[i];
if (op->isVirtReg()) {
VirtReg* vreg = cc->getVirtRegById(op->getId());
ASMJIT_ASSERT(vreg != nullptr);
ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId);
op->_reg.id = vreg->getPhysId();
}
else if (op->isMem()) {
X86Mem* m = static_cast<X86Mem*>(op);
if (m->hasBaseReg() && cc->isVirtRegValid(m->getBaseId())) {
VirtReg* vreg = cc->getVirtRegById(m->getBaseId());
if (m->isRegHome()) {
self->getVarCell(vreg);
}
else {
ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId);
op->_mem.base = vreg->getPhysId();
}
}
if (m->hasIndexReg() && cc->isVirtRegValid(m->getIndexId())) {
VirtReg* vreg = cc->getVirtRegById(m->getIndexId());
op->_mem.index = vreg->getPhysId();
}
}
}
return kErrorOk;
}
// ============================================================================
// [asmjit::X86RAPass - TranslatePrologEpilog]
// ============================================================================
//! \internal
static Error X86RAPass_prepareFuncFrame(X86RAPass* self, CCFunc* func) {
FuncFrameInfo& ffi = func->getFrameInfo();
X86RegMask& clobberedRegs = self->_clobberedRegs;
// Initialize dirty registers.
ffi.setDirtyRegs(X86Reg::kKindGp , clobberedRegs.get(X86Reg::kKindGp ));
ffi.setDirtyRegs(X86Reg::kKindMm , clobberedRegs.get(X86Reg::kKindMm ));
ffi.setDirtyRegs(X86Reg::kKindK , clobberedRegs.get(X86Reg::kKindK ));
ffi.setDirtyRegs(X86Reg::kKindVec, clobberedRegs.get(X86Reg::kKindVec));
// Initialize stack size & alignment.
ffi.setStackFrameSize(self->_memAllTotal);
ffi.setStackFrameAlignment(self->_memMaxAlign);
return kErrorOk;
}
//! \internal
static Error X86RAPass_patchFuncMem(X86RAPass* self, CCFunc* func, CBNode* stop, FuncFrameLayout& layout) {
X86Compiler* cc = self->cc();
CBNode* node = func;
do {
if (node->getType() == CBNode::kNodeInst) {
CBInst* iNode = static_cast<CBInst*>(node);
if (iNode->hasMemOp()) {
X86Mem* m = iNode->getMemOp<X86Mem>();
if (m->isArgHome()) {
m->addOffsetLo32(layout.getStackArgsOffset());
m->clearArgHome();
}
if (m->isRegHome() && Operand::isPackedId(m->getBaseId())) {
VirtReg* vreg = cc->getVirtRegById(m->getBaseId());
ASMJIT_ASSERT(vreg != nullptr);
RACell* cell = vreg->getMemCell();
ASMJIT_ASSERT(cell != nullptr);
m->_setBase(cc->_nativeGpReg.getType(), self->_varBaseRegId);
m->addOffsetLo32(self->_varBaseOffset + cell->offset);
m->clearRegHome();
}
}
}
node = node->getNext();
} while (node != stop);
return kErrorOk;
}
// ============================================================================
// [asmjit::X86RAPass - Translate - Jump]
// ============================================================================
//! \internal
static void X86RAPass_translateJump(X86RAPass* self, CBJump* jNode, CBLabel* jTarget) {
X86Compiler* cc = self->cc();
CBNode* injectRef = self->getFunc()->getEnd()->getPrev();
CBNode* prevCursor = cc->setCursor(injectRef);
self->switchState(jTarget->getPassData<RAData>()->state);
// Any code necessary to `switchState()` will be added at the end of the function.
if (cc->getCursor() != injectRef) {
// TODO: Can fail.
CBLabel* injectLabel = cc->newLabelNode();
// Add the jump to the target.
cc->jmp(jTarget->getLabel());
// Inject the label.
cc->_setCursor(injectRef);
cc->addNode(injectLabel);
// Finally, patch `jNode` target.
ASMJIT_ASSERT(jNode->getOpCount() > 0);
jNode->_opArray[jNode->getOpCount() - 1] = injectLabel->getLabel();
jNode->_target = injectLabel;
// If we injected any code it may not satisfy short form anymore.
jNode->delOptions(X86Inst::kOptionShortForm);
}
cc->_setCursor(prevCursor);
self->loadState(jNode->getPassData<RAData>()->state);
}
// ============================================================================
// [asmjit::X86RAPass - Translate - Ret]
// ============================================================================
static Error X86RAPass_translateRet(X86RAPass* self, CCFuncRet* rNode, CBLabel* exitTarget) {
X86Compiler* cc = self->cc();
CBNode* node = rNode->getNext();
// 32-bit mode requires to push floating point return value(s), handle it
// here as it's a special case.
X86RAData* raData = rNode->getPassData<X86RAData>();
if (raData) {
TiedReg* tiedArray = raData->tiedArray;
uint32_t tiedTotal = raData->tiedTotal;
for (uint32_t i = 0; i < tiedTotal; i++) {
TiedReg* tied = &tiedArray[i];
if (tied->flags & (TiedReg::kX86Fld4 | TiedReg::kX86Fld8)) {
VirtReg* vreg = tied->vreg;
X86Mem m(self->getVarMem(vreg));
uint32_t elementId = TypeId::elementOf(vreg->getTypeId());
m.setSize(elementId == TypeId::kF32 ? 4 :
elementId == TypeId::kF64 ? 8 :
(tied->flags & TiedReg::kX86Fld4) ? 4 : 8);
cc->fld(m);
}
}
}
// Decide whether to `jmp` or not in case we are next to the return label.
while (node) {
switch (node->getType()) {
// If we have found an exit label we just return, there is no need to
// emit jump to that.
case CBNode::kNodeLabel:
if (static_cast<CBLabel*>(node) == exitTarget)
return kErrorOk;
goto _EmitRet;
case CBNode::kNodeData:
case CBNode::kNodeInst:
case CBNode::kNodeFuncCall:
case CBNode::kNodeFuncExit:
goto _EmitRet;
// Continue iterating.
case CBNode::kNodeComment:
case CBNode::kNodeAlign:
case CBNode::kNodeHint:
break;
// Invalid node to be here.
case CBNode::kNodeFunc:
return DebugUtils::errored(kErrorInvalidState);
// We can't go forward from here.
case CBNode::kNodeSentinel:
return kErrorOk;
}
node = node->getNext();
}
_EmitRet:
{
cc->_setCursor(rNode);
cc->jmp(exitTarget->getLabel());
}
return kErrorOk;
}
// ============================================================================
// [asmjit::X86RAPass - Translate - Func]
// ============================================================================
Error X86RAPass::translate() {
X86Compiler* cc = this->cc();
CCFunc* func = getFunc();
// Register allocator contexts.
X86VarAlloc vAlloc(this);
X86CallAlloc cAlloc(this);
// Flow.
CBNode* node_ = func;
CBNode* next = nullptr;
CBNode* stop = getStop();
ZoneList<CBNode*>::Link* jLink = _jccList.getFirst();
for (;;) {
while (node_->isTranslated()) {
// Switch state if we went to a node that is already translated.
if (node_->getType() == CBNode::kNodeLabel) {
CBLabel* node = static_cast<CBLabel*>(node_);
cc->_setCursor(node->getPrev());
switchState(node->getPassData<RAData>()->state);
}
_NextGroup:
if (!jLink) {
goto _Done;
}
else {
node_ = jLink->getValue();
jLink = jLink->getNext();
CBNode* jFlow = X86RAPass_getOppositeJccFlow(static_cast<CBJump*>(node_));
loadState(node_->getPassData<RAData>()->state);
if (jFlow->hasPassData() && jFlow->getPassData<RAData>()->state) {
X86RAPass_translateJump(this, static_cast<CBJump*>(node_), static_cast<CBLabel*>(jFlow));
node_ = jFlow;
if (node_->isTranslated())
goto _NextGroup;
}
else {
node_ = jFlow;
}
break;
}
}
next = node_->getNext();
node_->_flags |= CBNode::kFlagIsTranslated;
if (node_->hasPassData()) {
switch (node_->getType()) {
// --------------------------------------------------------------------
// [Align / Embed]
// --------------------------------------------------------------------
case CBNode::kNodeAlign:
case CBNode::kNodeData:
break;
// --------------------------------------------------------------------
// [Label]
// --------------------------------------------------------------------
case CBNode::kNodeLabel: {
CBLabel* node = static_cast<CBLabel*>(node_);
ASMJIT_ASSERT(node->getPassData<RAData>()->state == nullptr);
node->getPassData<RAData>()->state = saveState();
if (node == func->getExitNode())
goto _NextGroup;
break;
}
// --------------------------------------------------------------------
// [Inst/Call/SArg/Ret]
// --------------------------------------------------------------------
case CBNode::kNodeInst:
case CBNode::kNodeFunc:
case CBNode::kNodeFuncCall:
case CBNode::kNodePushArg:
// Update TiedReg's unuse flags based on liveness of the next node.
if (!node_->isJcc()) {
X86RAData* raData = node_->getPassData<X86RAData>();
RABits* liveness;
if (raData && next && next->hasPassData() && (liveness = next->getPassData<RAData>()->liveness)) {
TiedReg* tiedArray = raData->tiedArray;
uint32_t tiedTotal = raData->tiedTotal;
for (uint32_t i = 0; i < tiedTotal; i++) {
TiedReg* tied = &tiedArray[i];
VirtReg* vreg = tied->vreg;
if (!liveness->getBit(vreg->_raId) && !vreg->isFixed())
tied->flags |= TiedReg::kUnuse;
}
}
}
if (node_->getType() == CBNode::kNodeFuncCall) {
ASMJIT_PROPAGATE(cAlloc.run(static_cast<CCFuncCall*>(node_)));
break;
}
ASMJIT_FALLTHROUGH;
case CBNode::kNodeHint:
case CBNode::kNodeFuncExit: {
ASMJIT_PROPAGATE(vAlloc.run(node_));
// Handle conditional/unconditional jump.
if (node_->isJmpOrJcc()) {
CBJump* node = static_cast<CBJump*>(node_);
CBLabel* jTarget = node->getTarget();
// Target not followed.
if (!jTarget) {
if (node->isJmp())
goto _NextGroup;
else
break;
}
if (node->isJmp()) {
if (jTarget->hasPassData() && jTarget->getPassData<RAData>()->state) {
cc->_setCursor(node->getPrev());
switchState(jTarget->getPassData<RAData>()->state);
goto _NextGroup;
}
else {
next = jTarget;
}
}
else {
CBNode* jNext = node->getNext();
if (jTarget->isTranslated()) {
if (jNext->isTranslated()) {
ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel);
cc->_setCursor(node->getPrev());
intersectStates(
jTarget->getPassData<RAData>()->state,
jNext->getPassData<RAData>()->state);
}
RAState* savedState = saveState();
node->getPassData<RAData>()->state = savedState;
X86RAPass_translateJump(this, node, jTarget);
next = jNext;
}
else if (jNext->isTranslated()) {
ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel);
RAState* savedState = saveState();
node->getPassData<RAData>()->state = savedState;
cc->_setCursor(node);
switchState(jNext->getPassData<RAData>()->state);
next = jTarget;
}
else {
node->getPassData<RAData>()->state = saveState();
next = X86RAPass_getJccFlow(node);
}
}
}
else if (node_->isRet()) {
ASMJIT_PROPAGATE(
X86RAPass_translateRet(this, static_cast<CCFuncRet*>(node_), func->getExitNode()));
goto _NextGroup;
}
break;
}
// --------------------------------------------------------------------
// [End]
// --------------------------------------------------------------------
case CBNode::kNodeSentinel: {
goto _NextGroup;
}
default:
break;
}
}
if (next == stop)
goto _NextGroup;
node_ = next;
}
_Done:
{
ASMJIT_PROPAGATE(resolveCellOffsets());
ASMJIT_PROPAGATE(X86RAPass_prepareFuncFrame(this, func));
FuncFrameLayout layout;
ASMJIT_PROPAGATE(layout.init(func->getDetail(), func->getFrameInfo()));
_varBaseRegId = layout._stackBaseRegId;
_varBaseOffset = layout._stackBaseOffset;
ASMJIT_PROPAGATE(X86RAPass_patchFuncMem(this, func, stop, layout));
cc->_setCursor(func);
ASMJIT_PROPAGATE(FuncUtils::emitProlog(this->cc(), layout));
cc->_setCursor(func->getExitNode());
ASMJIT_PROPAGATE(FuncUtils::emitEpilog(this->cc(), layout));
}
return kErrorOk;
}
} // asmjit namespace
// [Api-End]
#include "../asmjit_apiend.h"
// [Guard]
#endif // ASMJIT_BUILD_X86 && !ASMJIT_DISABLE_COMPILER