[gamecode] Add unsigned divide and remainder instructions

I had forgotten that unsigned division was different from signed
division (rather silly of me). However, with some testing and analysis,
unsigned true modulo is not needed as it's not possible to have
negative inputs and thus it's the same as remainder.
This commit is contained in:
Bill Currie 2022-02-06 12:20:17 +09:00
parent 5f2fd3cac0
commit e1ecda9221
3 changed files with 244 additions and 23 deletions

View file

@ -8,7 +8,7 @@ bitmap_txt = """
0 0000 0000 noop
0 0000 0001 adjstk
0 0000 0010 constant
0 1011 nnnn
0 1011 otss udivops
0 1111 s0mm load64
0 1111 s1mm store64
0 1111 n000
@ -483,6 +483,18 @@ return_formats = {
"format": "%Mc5",
"types": "ev_void, ev_void, ev_void",
}
udivops_formats = {
"opcode": "OP_{op_udiv[o].upper()}_{udiv_type[t]}_{ss+1}",
"mnemonic": "{op_udiv[o]}.{udiv_type[t]}",
"opname": "{op_udiv[o]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"types": "{udiv_types[t]}, {udiv_types[t]}, {udiv_types[t]}",
"args": {
"op_udiv": ["div", "rem"],
"udiv_type": ['u', 'U'],
"udiv_types": ["ev_uint", "ev_ulong"],
},
}
vecops_formats = {
"opcode": "OP_{op_vop[ooo].upper()}_{vop_type[t]}",
"mnemonic": "{op_vop[ooo]}.{vop_type[t]}",
@ -556,6 +568,7 @@ group_map = {
"string": string_formats,
"swizzle": swizzle_formats,
"return": return_formats,
"udivops": udivops_formats,
"vecops": vecops_formats,
"vecops2": vecops2_formats,
"with": with_formats,

View file

@ -2974,8 +2974,33 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
OP_cmp(LT, <);
// 0 1010
OP_cmp(GT, >);
#define OP_op_1(OP, T, t, op) \
case OP_##OP##_##T##_1: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_2(OP, T, t, op) \
case OP_##OP##_##T##_2: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_3(OP, T, t, op) \
case OP_##OP##_##T##_3: \
VectorCompOp (&OPC(t), &OPA(t), op, &OPB(t)); \
break;
#define OP_op_4(OP, T, t, op) \
case OP_##OP##_##T##_4: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_T(OP, T, t1, t2, t4, op) \
OP_op_1 (OP, T, t1, op); \
OP_op_2 (OP, T, t2, op); \
OP_op_3 (OP, T, t1, op); \
OP_op_4 (OP, T, t4, op)
// 0 1011
// spare
OP_op_T (DIV, u, uint, uivec2, uivec4, /);
OP_op_T (DIV, U, ulong, ulvec2, ulvec4, /);
OP_op_T (REM, u, uint, uivec2, uivec4, %);
OP_op_T (REM, U, ulong, ulvec2, ulvec4, %);
// 0 1100
OP_cmp(NE, !=);
// 0 1101
@ -3011,27 +3036,6 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
break;
// spare
#define OP_op_1(OP, T, t, op) \
case OP_##OP##_##T##_1: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_2(OP, T, t, op) \
case OP_##OP##_##T##_2: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_3(OP, T, t, op) \
case OP_##OP##_##T##_3: \
VectorCompOp (&OPC(t), &OPA(t), op, &OPB(t)); \
break;
#define OP_op_4(OP, T, t, op) \
case OP_##OP##_##T##_4: \
OPC(t) = (OPA(t) op OPB(t)); \
break
#define OP_op_T(OP, T, t1, t2, t4, op) \
OP_op_1 (OP, T, t1, op); \
OP_op_2 (OP, T, t2, op); \
OP_op_3 (OP, T, t1, op); \
OP_op_4 (OP, T, t4, op)
#define OP_op(OP, op) \
OP_op_T (OP, I, int, ivec2, ivec4, op); \
OP_op_T (OP, F, float, vec2, vec4, op); \

View file

@ -2,6 +2,63 @@
#include "QF/mathlib.h"
static pr_uivec4_t uint_divop_init[] = {
{ 5, -5, 5, -5},
{ 3, 3, -3, -3},
{ 0, 0, 0, 0},
{ 0, 0, 0, 0},
};
static pr_uivec4_t uint_divop_expect[] = {
{ 5, -5, 5, -5},
{ 3, 3, -3, -3},
{ 1, 0x55555553, 0, 0},
{ 2, 2, 5, -5},
};
static dstatement_t uint_divop_1_statements[] = {
{ OP(0, 0, 0, OP_LEA_A), 4, 0, 32 }, // init index
//loop:
{ OP(0, 0, 0, OP_LEA_C), 32, -1, 32 }, // dec index
{ OP(0, 0, 0, OP_IFAE), 2, 0, 32 },
{ OP(0, 0, 0, OP_BREAK), 0, 0, 0 },
{ OP(0, 0, 0, OP_WITH), 4, 32, 1 },
{ OP(1, 1, 1, OP_DIV_u_1), 0, 4, 8 },
{ OP(1, 1, 1, OP_REM_u_1), 0, 4, 12 },
{ OP(1, 1, 1, OP_JUMP_A), -6, 0, 0 },
};
static dstatement_t uint_divop_2_statements[] = {
{ OP(0, 0, 0, OP_LEA_A), 4, 0, 32 }, // index
//loop:
{ OP(0, 0, 0, OP_LEA_C), 32, -2, 32 }, // dec index
{ OP(0, 0, 0, OP_IFAE), 2, 0, 32 },
{ OP(0, 0, 0, OP_BREAK), 0, 0, 0 },
{ OP(0, 0, 0, OP_WITH), 4, 32, 1 },
{ OP(1, 1, 1, OP_DIV_u_2), 0, 4, 8 },
{ OP(1, 1, 1, OP_REM_u_2), 0, 4, 12 },
{ OP(1, 1, 1, OP_JUMP_A), -6, 0, 0 },
};
static dstatement_t uint_divop_3a_statements[] = {
{ OP(1, 1, 1, OP_DIV_u_3), 0, 4, 8 },
{ OP(1, 1, 1, OP_DIV_u_1), 3, 7, 11 },
{ OP(1, 1, 1, OP_REM_u_3), 0, 4, 12 },
{ OP(1, 1, 1, OP_REM_u_1), 3, 7, 15 },
};
static dstatement_t uint_divop_3b_statements[] = {
{ OP(1, 1, 1, OP_DIV_u_1), 0, 4, 8 },
{ OP(1, 1, 1, OP_DIV_u_3), 1, 5, 9 },
{ OP(1, 1, 1, OP_REM_u_1), 0, 4, 12 },
{ OP(1, 1, 1, OP_REM_u_3), 1, 5, 13 },
};
static dstatement_t uint_divop_4_statements[] = {
{ OP(1, 1, 1, OP_DIV_u_4), 0, 4, 8 },
{ OP(1, 1, 1, OP_REM_u_4), 0, 4, 12 },
};
static pr_uivec4_t uint_cmpop_init[] = {
{ 5, -5, 5, -5},
{ 5, 5, -5, -5},
@ -95,6 +152,63 @@ static dstatement_t uint_cmpop_4_statements[] = {
{ OP(1, 1, 1, OP_LE_u_4), 0, 4, 28 },
};
static pr_ulvec4_t ulong_divop_init[] = {
{ 5, -5, 5, -5},
{ 3, 3, -3, -3},
{ 0, 0, 0, 0},
{ 0, 0, 0, 0},
};
static pr_ulvec4_t ulong_divop_expect[] = {
{ 5, -5, 5, -5},
{ 3, 3, -3, -3},
{ 1, UINT64_C(0x5555555555555553), 0, 0},
{ 2, 2, 5, -5},
};
static dstatement_t ulong_divop_1_statements[] = {
{ OP(0, 0, 0, OP_LEA_A), 8, 0, 64 }, // init index
//loop:
{ OP(0, 0, 0, OP_LEA_C), 64, -2, 64 }, // dec index
{ OP(0, 0, 0, OP_IFAE), 2, 0, 64 },
{ OP(0, 0, 0, OP_BREAK), 0, 0, 0 },
{ OP(0, 0, 0, OP_WITH), 4, 64, 1 },
{ OP(1, 1, 1, OP_DIV_U_1), 0, 8, 16 },
{ OP(1, 1, 1, OP_REM_U_1), 0, 8, 24 },
{ OP(1, 1, 1, OP_JUMP_A), -6, 0, 0 },
};
static dstatement_t ulong_divop_2_statements[] = {
{ OP(0, 0, 0, OP_LEA_A), 8, 0, 64 }, // init index
//loop:
{ OP(0, 0, 0, OP_LEA_C), 64, -4, 64 }, // dec index
{ OP(0, 0, 0, OP_IFAE), 2, 0, 64 },
{ OP(0, 0, 0, OP_BREAK), 0, 0, 0 },
{ OP(0, 0, 0, OP_WITH), 4, 64, 1 },
{ OP(1, 1, 1, OP_DIV_U_2), 0, 8, 16 },
{ OP(1, 1, 1, OP_REM_U_2), 0, 8, 24 },
{ OP(1, 1, 1, OP_JUMP_A), -6, 0, 0 },
};
static dstatement_t ulong_divop_3a_statements[] = {
{ OP(1, 1, 1, OP_DIV_U_3), 0, 8, 16 },
{ OP(1, 1, 1, OP_DIV_U_1), 6, 14, 22 },
{ OP(1, 1, 1, OP_REM_U_3), 0, 8, 24 },
{ OP(1, 1, 1, OP_REM_U_1), 6, 14, 30 },
};
static dstatement_t ulong_divop_3b_statements[] = {
{ OP(1, 1, 1, OP_DIV_U_1), 0, 8, 16 },
{ OP(1, 1, 1, OP_DIV_U_3), 2, 10, 18 },
{ OP(1, 1, 1, OP_REM_U_1), 0, 8, 24 },
{ OP(1, 1, 1, OP_REM_U_3), 2, 10, 26 },
};
static dstatement_t ulong_divop_4_statements[] = {
{ OP(1, 1, 1, OP_DIV_U_4), 0, 8, 16 },
{ OP(1, 1, 1, OP_REM_U_4), 0, 8, 24 },
};
static pr_ulvec4_t ulong_cmpop_init[] = {
{ 5, -5, 5, -5},
{ 5, 5, -5, -5},
@ -387,6 +501,51 @@ static dstatement_t ulong_shiftop_4_statements[] = {
};
test_t tests[] = {
{
.desc = "uint divop 1",
.extra_globals = 4 * 1,
.num_globals = num_globals(uint_divop_init,uint_divop_expect),
.num_statements = num_statements (uint_divop_1_statements),
.statements = uint_divop_1_statements,
.init_globals = (pr_int_t *) uint_divop_init,
.expect_globals = (pr_int_t *) uint_divop_expect,
},
{
.desc = "uint divop 2",
.extra_globals = 4 * 1,
.num_globals = num_globals(uint_divop_init,uint_divop_expect),
.num_statements = num_statements (uint_divop_2_statements),
.statements = uint_divop_2_statements,
.init_globals = (pr_int_t *) uint_divop_init,
.expect_globals = (pr_int_t *) uint_divop_expect,
},
{
.desc = "uint divop 3a",
.extra_globals = 4 * 1,
.num_globals = num_globals(uint_divop_init,uint_divop_expect),
.num_statements = num_statements (uint_divop_3a_statements),
.statements = uint_divop_3a_statements,
.init_globals = (pr_int_t *) uint_divop_init,
.expect_globals = (pr_int_t *) uint_divop_expect,
},
{
.desc = "uint divop 3b",
.extra_globals = 4 * 1,
.num_globals = num_globals(uint_divop_init,uint_divop_expect),
.num_statements = num_statements (uint_divop_3b_statements),
.statements = uint_divop_3b_statements,
.init_globals = (pr_int_t *) uint_divop_init,
.expect_globals = (pr_int_t *) uint_divop_expect,
},
{
.desc = "uint divop 4",
.extra_globals = 4 * 1,
.num_globals = num_globals(uint_divop_init,uint_divop_expect),
.num_statements = num_statements (uint_divop_4_statements),
.statements = uint_divop_4_statements,
.init_globals = (pr_int_t *) uint_divop_init,
.expect_globals = (pr_int_t *) uint_divop_expect,
},
{
.desc = "uint cmpop 1",
.extra_globals = 4 * 1,
@ -432,6 +591,51 @@ test_t tests[] = {
.init_globals = (pr_int_t *) uint_cmpop_init,
.expect_globals = (pr_int_t *) uint_cmpop_expect,
},
{
.desc = "ulong divop 1",
.extra_globals = 4 * 1,
.num_globals = num_globals(ulong_divop_init,ulong_divop_expect),
.num_statements = num_statements (ulong_divop_1_statements),
.statements = ulong_divop_1_statements,
.init_globals = (pr_int_t *) ulong_divop_init,
.expect_globals = (pr_int_t *) ulong_divop_expect,
},
{
.desc = "ulong divop 2",
.extra_globals = 4 * 1,
.num_globals = num_globals(ulong_divop_init,ulong_divop_expect),
.num_statements = num_statements (ulong_divop_2_statements),
.statements = ulong_divop_2_statements,
.init_globals = (pr_int_t *) ulong_divop_init,
.expect_globals = (pr_int_t *) ulong_divop_expect,
},
{
.desc = "ulong divop 3a",
.extra_globals = 4 * 1,
.num_globals = num_globals(ulong_divop_init,ulong_divop_expect),
.num_statements = num_statements (ulong_divop_3a_statements),
.statements = ulong_divop_3a_statements,
.init_globals = (pr_int_t *) ulong_divop_init,
.expect_globals = (pr_int_t *) ulong_divop_expect,
},
{
.desc = "ulong divop 3b",
.extra_globals = 4 * 1,
.num_globals = num_globals(ulong_divop_init,ulong_divop_expect),
.num_statements = num_statements (ulong_divop_3b_statements),
.statements = ulong_divop_3b_statements,
.init_globals = (pr_int_t *) ulong_divop_init,
.expect_globals = (pr_int_t *) ulong_divop_expect,
},
{
.desc = "ulong divop 4",
.extra_globals = 4 * 1,
.num_globals = num_globals(ulong_divop_init,ulong_divop_expect),
.num_statements = num_statements (ulong_divop_4_statements),
.statements = ulong_divop_4_statements,
.init_globals = (pr_int_t *) ulong_divop_init,
.expect_globals = (pr_int_t *) ulong_divop_expect,
},
{
.desc = "ulong cmpop 1",
.extra_globals = 4 * 1,