mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-04-11 11:51:50 +00:00
[gamecode] Add matrix instructions
Only matrix-vector, vector-matrix and vector-vector outer products (no more room), but that's enough to get decent performance out of matrix-matrix and matrix-scalar (both of which can be done as a set of matrix-vector or vertex-scalar products). Progs version bumped because I found that I'd put the swizzle and 2d wedge ops in the wrong spot (compared to both intention and docs) and rather than adjust the docs, I took advantage of the opportunity to get a nicer layout for the wedge products (nestled into the spare slots left by the 2x2 matrix ops, which seems fitting as the 2d wedge is the determinant of a 2x2 matrix).
This commit is contained in:
parent
a4eefa6204
commit
efc1fdbd29
3 changed files with 273 additions and 39 deletions
|
@ -482,6 +482,7 @@ typedef struct opcode_s {
|
|||
const char *opname;
|
||||
const char *mnemonic;
|
||||
int widths[3]; ///< component count for each argument (1-4)
|
||||
int columns[3]; ///< column count for each argument (1-4)
|
||||
etype_t types[3]; ///< component type for each argument
|
||||
const char *fmt;
|
||||
} opcode_t;
|
||||
|
@ -563,7 +564,7 @@ typedef struct pr_va_list_s {
|
|||
|(((0x##c) & 0xfff) << 0) )
|
||||
#define PROG_ID_VERSION 6
|
||||
#define PROG_V6P_VERSION PROG_VERSION_ENCODE(0,fff,00a)
|
||||
#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,011)
|
||||
#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,012)
|
||||
|
||||
typedef struct pr_chunk_s {
|
||||
pr_uint_t offset;
|
||||
|
|
|
@ -3,8 +3,12 @@ bitmap_txt = """
|
|||
0 0001 mmss store
|
||||
0 0010 mmss push
|
||||
0 0011 mmss pop
|
||||
0 0111 00ts swizzle2
|
||||
0 0111 01t0 wedge2
|
||||
0 0100 dtdd matvec
|
||||
0 0101 dtdd vecmat
|
||||
0 0110 dtdd outer
|
||||
0 0111 0too matmul
|
||||
0 0111 0t11 wedge2
|
||||
0 0111 10ts swizzle2
|
||||
0 1ccc ttss compare
|
||||
0 0000 00nn
|
||||
0 0000 0000 noop
|
||||
|
@ -58,6 +62,17 @@ address_widths = [
|
|||
[ "4, 0", "1, 1", "1, 0", "1, 1", ],
|
||||
[ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"],
|
||||
]
|
||||
address_columns = [
|
||||
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
|
||||
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
|
||||
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
|
||||
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
|
||||
[ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"],
|
||||
]
|
||||
move_widths = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
|
||||
move_columns = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
|
||||
memset_widths = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
|
||||
memset_columns = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
|
||||
#store, pop, lea
|
||||
store_fmt = [
|
||||
"%ga",
|
||||
|
@ -92,6 +107,7 @@ adjstk_formats = {
|
|||
"opname": "adjstk",
|
||||
"format": "%sa, %sb",
|
||||
"widths": "0, 0, 0",
|
||||
"columns": "0, 0, 0",
|
||||
"types": "ev_short, ev_short, ev_invalid",
|
||||
}
|
||||
bitops_formats = {
|
||||
|
@ -100,6 +116,7 @@ bitops_formats = {
|
|||
"opname": "{op_bit[oo]}",
|
||||
"format": "{bit_fmt[oo]}",
|
||||
"widths": "{ss+1}, { oo < 3 and ss+1 or 0}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{bit_types[t]}, {oo < 3 and bit_types[t] or 'ev_invalid'}, {bit_types[t]}",
|
||||
"args": {
|
||||
"op_bit": ["bitand", "bitor", "bitxor", "bitnot"],
|
||||
|
@ -119,6 +136,7 @@ branch_formats = {
|
|||
"opname": "{op_cond[c*4+cc]}",
|
||||
"format": "{cond_fmt[c*4+cc]}{branch_fmt[0]}",
|
||||
"widths": "0, 0, 1",
|
||||
"columns": "0, 0, 1",
|
||||
"types": "ev_short, ev_invalid, ev_int",
|
||||
"args": {
|
||||
"op_mode": "ABCD",
|
||||
|
@ -134,6 +152,7 @@ call_formats = {
|
|||
"opname": "call",
|
||||
"format": "{call_fmt[mm]}",
|
||||
"widths": "{call_widths[mm]}, -1",
|
||||
"columns": "{call_columns[mm]}, -1",
|
||||
"types": "{call_types[mm]}, ev_void",
|
||||
"args": {
|
||||
"op_mode": ".BCD",
|
||||
|
@ -149,7 +168,8 @@ call_formats = {
|
|||
"ev_ptr, ev_short",
|
||||
"ev_ptr, ev_int",
|
||||
],
|
||||
"call_widths": [ None, "1, 0", "1, 0", "1, 1" ]
|
||||
"call_widths": [ None, "1, 0", "1, 0", "1, 1" ],
|
||||
"call_columns": [ None, "1, 0", "1, 0", "1, 1" ],
|
||||
},
|
||||
}
|
||||
compare_formats = {
|
||||
|
@ -157,6 +177,7 @@ compare_formats = {
|
|||
"mnemonic": "{op_cmp[ccc]}.{cmp_type[tt]}",
|
||||
"opname": "{op_cmp[ccc]}",
|
||||
"widths": "{ss+1}, {ss+1}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{cmp_types[tt]}, {cmp_types[tt]}, {res_types[tt & 2]}",
|
||||
"args": {
|
||||
"op_cmp": compare_ccc,
|
||||
|
@ -170,6 +191,7 @@ compare2_formats = {
|
|||
"mnemonic": "{op_cmp[ccc]}.{cmp_type[t]}",
|
||||
"opname": "{op_cmp[ccc]}",
|
||||
"widths": "{ss+1}, {ss+1}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{cmp_types[t]}, {cmp_types[t]}, ev_int",
|
||||
"args": {
|
||||
"op_cmp": compare_ccc,
|
||||
|
@ -183,6 +205,7 @@ constant_formats = {
|
|||
"opname": "ldconst",
|
||||
"format": "%sa, %sb, %gc",
|
||||
"widths": "0, 0, -1",
|
||||
"columns": "0, 0, -1",
|
||||
"types": "ev_short, ev_short, ev_void",
|
||||
}
|
||||
convert_formats = {
|
||||
|
@ -191,6 +214,7 @@ convert_formats = {
|
|||
"opname": "conv",
|
||||
"format": "%Ga %Cb %gc",
|
||||
"widths": "-1, 0, -1",
|
||||
"columns": "1, 0, 1",
|
||||
"types": "ev_void, ev_short, ev_void",
|
||||
}
|
||||
fbitops_formats = {
|
||||
|
@ -199,6 +223,7 @@ fbitops_formats = {
|
|||
"opname": "{op_fbit[oo]}",
|
||||
"format": "{fbit_fmt[oo]}",
|
||||
"widths": "1, 1, 1",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{fbit_types[0]}, {fbit_types[oo==3]}, {fbit_types[0]}",
|
||||
"args": {
|
||||
"op_fbit": ["bitand", "bitor", "bitxor", "bitnot"],
|
||||
|
@ -217,6 +242,7 @@ extend_formats = {
|
|||
"opname": "extend",
|
||||
"format": "%Ga%Xb, %gc",
|
||||
"widths": "-1, 0, -1",
|
||||
"columns": "1, 0, 1",
|
||||
"types": "ev_void, ev_short, ev_void",
|
||||
}
|
||||
hops_formats = {
|
||||
|
@ -225,6 +251,7 @@ hops_formats = {
|
|||
"opname": "hops",
|
||||
"format": "%Hb %Ga, %gc",
|
||||
"widths": "-1, 0, 1",
|
||||
"columns": "1, 0, 1",
|
||||
"types": "ev_void, ev_short, ev_void",
|
||||
}
|
||||
jump_formats = {
|
||||
|
@ -233,6 +260,7 @@ jump_formats = {
|
|||
"opname": "jump",
|
||||
"format": "{jump_fmt[mm]}",
|
||||
"widths": "{jump_widths[mm]}, 0",
|
||||
"columns": "{jump_columns[mm]}, 0",
|
||||
"types": "{jump_types[mm]}",
|
||||
"args": {
|
||||
"op_mode": "ABCD",
|
||||
|
@ -243,7 +271,8 @@ jump_formats = {
|
|||
"ev_ptr, ev_short, ev_invalid",
|
||||
"ev_ptr, ev_int, ev_invalid",
|
||||
],
|
||||
"jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ]
|
||||
"jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ],
|
||||
"jump_columns": [ "0, 0", "1, 1", "1, 0", "1, 1" ],
|
||||
},
|
||||
}
|
||||
load64_formats = {
|
||||
|
@ -252,12 +281,14 @@ load64_formats = {
|
|||
"opname": "load64",
|
||||
"format": "{load_fmt[mm]}, %gc",
|
||||
"widths": "{load_widths[s+2][mm]}, {s+3}",
|
||||
"columns": "{load_columns[s+2][mm]}, {s+3}",
|
||||
"types": "{load_types[mm]}, ev_void",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
"load_fmt": load_fmt,
|
||||
"load_types": address_types,
|
||||
"load_widths": address_widths,
|
||||
"load_columns": address_columns,
|
||||
},
|
||||
}
|
||||
lea_formats = {
|
||||
|
@ -266,12 +297,14 @@ lea_formats = {
|
|||
"opname": "lea",
|
||||
"format": "{lea_fmt[mm]}, %gc",
|
||||
"widths": "{lea_widths[mm]}, 1",
|
||||
"columns": "{lea_columns[mm]}, 1",
|
||||
"types": "{lea_types[mm]}, ev_ptr",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
"lea_fmt": store_fmt,
|
||||
"lea_types": address_types,
|
||||
"lea_widths": address_widths[4],
|
||||
"lea_columns": address_columns[4],
|
||||
},
|
||||
}
|
||||
lea2_formats = {
|
||||
|
@ -280,12 +313,14 @@ lea2_formats = {
|
|||
"opname": "lea",
|
||||
"format": "{lea_fmt[m+4]}, %gc",
|
||||
"widths": "{lea_widths[m+4]}, 1",
|
||||
"columns": "{lea_columns[m+4]}, 1",
|
||||
"types": "{lea_types[m+4]}, ev_ptr",
|
||||
"args": {
|
||||
"op_mode": "EF",
|
||||
"lea_fmt": store_fmt,
|
||||
"lea_types": address_types,
|
||||
"lea_widths": address_widths[4],
|
||||
"lea_columns": address_columns[4],
|
||||
},
|
||||
}
|
||||
load_formats = {
|
||||
|
@ -294,12 +329,14 @@ load_formats = {
|
|||
"opname": "load",
|
||||
"format": "{load_fmt[mm]}, %gc",
|
||||
"widths": "{load_widths[ss][mm]}, {ss+1}",
|
||||
"columns": "{load_columns[ss][mm]}, {ss+1}",
|
||||
"types": "{load_types[mm]}, ev_void",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
"load_fmt": load_fmt,
|
||||
"load_types": address_types,
|
||||
"load_widths": address_widths,
|
||||
"load_columns": address_columns,
|
||||
},
|
||||
}
|
||||
mathops_formats = {
|
||||
|
@ -307,6 +344,7 @@ mathops_formats = {
|
|||
"mnemonic": "{op_math[ooo]}.{math_type[tt]}",
|
||||
"opname": "{op_math[ooo]}",
|
||||
"widths": "{ss+1}, {ss+1}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{math_types[tt]}, {math_types[tt]}, {math_types[tt]}",
|
||||
"args": {
|
||||
"op_math": ["mul", "div", "rem", "mod", "add", "sub", None, None],
|
||||
|
@ -314,23 +352,50 @@ mathops_formats = {
|
|||
"math_types": etype_tt,
|
||||
},
|
||||
}
|
||||
matmul_formats = {
|
||||
"opcode": "OP_{op_matmul[oo].upper()}_22_{mat_type[t]}",
|
||||
"mnemonic": "{op_matmul[oo]}",
|
||||
"opname": "{op_matmul[oo]}",
|
||||
"widths": "2, 2, 2",
|
||||
"columns": "{cols_matmul[oo]}",
|
||||
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
|
||||
"args": {
|
||||
"op_matmul": ["mvmul", "vmmul", "outer"],
|
||||
"cols_matmul": ["2, 1, 1", "1, 2, 1", "1, 1, 1", None],
|
||||
"mat_type": ['F', 'D'],
|
||||
"mat_types": float_t,
|
||||
"mat_dim": ((2, 3, 4, 3, 2, 3, 4, 4),
|
||||
(3, 3, 3, 2, 4, 4, 4, 2)),
|
||||
},
|
||||
}
|
||||
matvec_formats = {
|
||||
"opcode": "OP_MVMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
|
||||
"mnemonic": "mvmul",
|
||||
"opname": "mvmul",
|
||||
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}",
|
||||
"columns": "{mat_dim[1][d][dd]}, 1, 1",
|
||||
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
|
||||
"args": {
|
||||
"mat_type": ['F', 'D'],
|
||||
"mat_types": float_t,
|
||||
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
|
||||
((3, 3, 3, 2), (4, 4, 4, 2))),
|
||||
},
|
||||
}
|
||||
memset_formats = {
|
||||
"opcode": "OP_MEMSET_{op_memset[oo].upper()}",
|
||||
"mnemonic": "memset.{op_memset[oo]}",
|
||||
"opname": "memset{suff_memset[oo]}",
|
||||
"format": "{memset_fmt[oo]}",
|
||||
"widths": "{memset_widths[oo]}",
|
||||
"columns": "{memset_columns[oo]}",
|
||||
"types": "{memset_types[oo]}",
|
||||
"args": {
|
||||
"op_memset": ["i", "p", "pi", None],
|
||||
"suff_memset": ["", "p", "p", None],
|
||||
"memset_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None],
|
||||
"memset_widths": [
|
||||
"1, 0, -1",
|
||||
"1, 1, 1",
|
||||
"1, 0, 1",
|
||||
None,
|
||||
],
|
||||
"memset_widths": memset_widths,
|
||||
"memset_columns": memset_columns,
|
||||
"memset_types": [
|
||||
"ev_int, ev_short, ev_void",
|
||||
"ev_int, ev_int, ev_ptr",
|
||||
|
@ -344,17 +409,14 @@ move_formats = {
|
|||
"opname": "move{suff_move[oo]}",
|
||||
"format": "{move_fmt[oo]}",
|
||||
"widths": "{move_widths[oo]}",
|
||||
"columns": "{move_columns[oo]}",
|
||||
"types": "{move_types[oo]}",
|
||||
"args": {
|
||||
"op_move": ["i", "p", "pi", None],
|
||||
"suff_move": ["", "p", "p", None],
|
||||
"move_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None],
|
||||
"move_widths": [
|
||||
"-1, 0, -1",
|
||||
"1, 1, 1",
|
||||
"1, 0, 1",
|
||||
None,
|
||||
],
|
||||
"move_widths": move_widths,
|
||||
"move_columns": move_columns,
|
||||
"move_types": [
|
||||
"ev_void, ev_short, ev_void",
|
||||
"ev_ptr, ev_int, ev_ptr",
|
||||
|
@ -368,14 +430,30 @@ noop_formats = {
|
|||
"opname": "nop",
|
||||
"format": "there were plums...",
|
||||
"widths": "0, 0, 0",
|
||||
"columns": "0, 0, 0",
|
||||
"types": "ev_invalid, ev_invalid, ev_invalid",
|
||||
}
|
||||
outer_formats = {
|
||||
"opcode": "OP_OUTER_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
|
||||
"mnemonic": "outer",
|
||||
"opname": "outer",
|
||||
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}",
|
||||
"columns": "1, 1, {mat_dim[1][d][dd]}",
|
||||
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
|
||||
"args": {
|
||||
"mat_type": ['F', 'D'],
|
||||
"mat_types": float_t,
|
||||
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
|
||||
((3, 3, 3, 2), (4, 4, 4, 2))),
|
||||
},
|
||||
}
|
||||
push_formats = {
|
||||
"opcode": "OP_PUSH_{op_mode[mm]}_{ss+1}",
|
||||
"mnemonic": "push",
|
||||
"opname": "push",
|
||||
"format": "{push_fmt[mm]}",
|
||||
"widths": "{ss+1}, 0, 0",
|
||||
"columns": "1, 0, 0",
|
||||
"types": "{push_types[mm]}, ev_invalid",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
|
@ -389,6 +467,7 @@ pop_formats = {
|
|||
"opname": "pop",
|
||||
"format": "{pop_fmt[mm]}",
|
||||
"widths": "{ss+1}, 0, 0",
|
||||
"columns": "1, 0, 0",
|
||||
"types": "{pop_types[mm]}, ev_invalid",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
|
@ -401,6 +480,7 @@ scale_formats = {
|
|||
"mnemonic": "scale.{scale_type[t]}",
|
||||
"opname": "scale",
|
||||
"widths": "{ss+1}, 1, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{scale_types[t]}, {scale_types[t]}, {scale_types[t]}",
|
||||
"args": {
|
||||
"scale_type": ['F', 'D'],
|
||||
|
@ -412,6 +492,7 @@ shiftops_formats = {
|
|||
"mnemonic": "{mn_shift[u*2+r]}.{shift_type[u*2+t]}",
|
||||
"opname": "{op_shift[u*2+r]}",
|
||||
"widths": "{ss+1}, {ss+1}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{shift_types[t][u]}, {shift_types[t][0]}, {shift_types[t][u]}",
|
||||
"args": {
|
||||
"mn_shift": ["shl", "asr", "shl", "shr"],
|
||||
|
@ -429,6 +510,7 @@ statef_formats = {
|
|||
"opname": "state",
|
||||
"format": "{state_fmt[c]}",
|
||||
"widths": "1, 1, {c}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "ev_float, ev_func, {state_types[c]}",
|
||||
"args": {
|
||||
"state": ["ft", "ftt"],
|
||||
|
@ -442,6 +524,7 @@ stated_formats = {
|
|||
"opname": "state",
|
||||
"format": "{state_fmt[c]}",
|
||||
"widths": "1, 1, {c}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "ev_int, ev_func, {state_types[c]}",
|
||||
"args": {
|
||||
"state": ["dt", "dtt"],
|
||||
|
@ -455,6 +538,7 @@ store_formats = {
|
|||
"opname": "{store_op[mm]}",
|
||||
"format": "%Gc, {store_fmt[mm]}",
|
||||
"widths": "{store_widths[ss][mm]}, {ss+1}",
|
||||
"columns": "{store_columns[ss][mm]}, {ss+1}",
|
||||
"types": "{store_types[mm]}, ev_void",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
|
@ -462,6 +546,7 @@ store_formats = {
|
|||
"store_op": ["assign", "store", "store", "store"],
|
||||
"store_types": address_types,
|
||||
"store_widths": address_widths,
|
||||
"store_columns": address_columns,
|
||||
},
|
||||
}
|
||||
store64_formats = {
|
||||
|
@ -470,6 +555,7 @@ store64_formats = {
|
|||
"opname": "{store_op[mm]}64",
|
||||
"format": "%Gc, {store_fmt[mm]}",
|
||||
"widths": "{store_widths[s+2][mm]}, {s+3}",
|
||||
"columns": "{store_columns[s+2][mm]}, 1",
|
||||
"types": "{store_types[mm]}, ev_void",
|
||||
"args": {
|
||||
"op_mode": address_mode,
|
||||
|
@ -477,6 +563,7 @@ store64_formats = {
|
|||
"store_op": ["assign", "store", "store", "store"],
|
||||
"store_types": address_types,
|
||||
"store_widths": address_widths,
|
||||
"store_columns": address_columns,
|
||||
},
|
||||
}
|
||||
string_formats = {
|
||||
|
@ -485,6 +572,7 @@ string_formats = {
|
|||
"opname": "{op_str[o*4+oo]}",
|
||||
"format": "{str_fmt[o*4+oo]}",
|
||||
"widths": "1, {(o*4+oo)<7 and 1 or 0}, 1",
|
||||
"columns": "1, {(o*4+oo)<7 and 1 or 0}, 1",
|
||||
"types": "{str_types[o*4+oo]}",
|
||||
"args": {
|
||||
"op_str": ["eq", "lt", "gt", "add", "cmp", "ge", "le", "not"],
|
||||
|
@ -516,6 +604,7 @@ swizzle_formats = {
|
|||
"opname": "swizzle",
|
||||
"format": "%Ga.%Sb %gc",
|
||||
"widths": "4, 0, 4",
|
||||
"columns": "1, 0, 1",
|
||||
"types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}",
|
||||
"args": {
|
||||
"swiz_type": ['F', 'D'],
|
||||
|
@ -528,6 +617,7 @@ swizzle2_formats = {
|
|||
"opname": "swizzle",
|
||||
"format": "%Ga.%Sb %gc",
|
||||
"widths": "{s+2}, 0, {s+2}",
|
||||
"columns": "1, 0, 1",
|
||||
"types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}",
|
||||
"args": {
|
||||
"swiz_type": ['F', 'D'],
|
||||
|
@ -540,6 +630,7 @@ wedge2_formats = {
|
|||
"opname": "wedge",
|
||||
"format": "%Ga, %Gb, %gc",
|
||||
"widths": "2, 2, 1",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{wedge_types[t]}",
|
||||
"args": {
|
||||
"wedge_type": ['F', 'D'],
|
||||
|
@ -551,6 +642,7 @@ return_formats = {
|
|||
"mnemonic": "return",
|
||||
"opname": "return",
|
||||
"widths": "-1, -1, 0", # width specified by st->c
|
||||
"columns": "-1, -1, 1",
|
||||
"format": "%Mc5",
|
||||
"types": "ev_void, ev_void, ev_void",
|
||||
}
|
||||
|
@ -559,6 +651,7 @@ udivops_formats = {
|
|||
"mnemonic": "{op_udiv[o]}.{udiv_type[t]}",
|
||||
"opname": "{op_udiv[o]}",
|
||||
"widths": "{ss+1}, {ss+1}, {ss+1}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{udiv_types[t]}, {udiv_types[t]}, {udiv_types[t]}",
|
||||
"args": {
|
||||
"op_udiv": ["div", "rem"],
|
||||
|
@ -566,11 +659,26 @@ udivops_formats = {
|
|||
"udiv_types": ["ev_uint", "ev_ulong"],
|
||||
},
|
||||
}
|
||||
vecmat_formats = {
|
||||
"opcode": "OP_VMMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
|
||||
"mnemonic": "outer",
|
||||
"opname": "outer",
|
||||
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, 0",
|
||||
"columns": "1, {mat_dim[1][d][dd]}, 1",
|
||||
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
|
||||
"args": {
|
||||
"mat_type": ['F', 'D'],
|
||||
"mat_types": float_t,
|
||||
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
|
||||
((3, 3, 3, 2), (4, 4, 4, 2))),
|
||||
},
|
||||
}
|
||||
vecops_formats = {
|
||||
"opcode": "OP_{op_vop[ooo].upper()}_{vop_type[t]}",
|
||||
"mnemonic": "{op_vop[ooo]}.{vop_type[t]}",
|
||||
"opname": "{op_vop[ooo]}",
|
||||
"widths": "{vec_widths[ooo]}",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}",
|
||||
"args": {
|
||||
"op_vop": ["cross", "cdot", "vdot", "qdot",
|
||||
|
@ -594,6 +702,7 @@ vecops2_formats = {
|
|||
"mnemonic": "{op_vop[d]}.{vop_type[t]}",
|
||||
"opname": "{op_vop[d]}",
|
||||
"widths": "4, 4, 4",
|
||||
"columns": "1, 1, 1",
|
||||
"types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}",
|
||||
"args": {
|
||||
"op_vop": ["qv4mul", "v4qmul"],
|
||||
|
@ -607,6 +716,7 @@ with_formats = {
|
|||
"opname": "with",
|
||||
"format": "%sa, %sb, %sc",
|
||||
"widths": "0, -1, 0",
|
||||
"columns": "0, -1, 0",
|
||||
"types": "ev_short, ev_void, ev_short",
|
||||
}
|
||||
|
||||
|
@ -628,9 +738,12 @@ group_map = {
|
|||
"load": load_formats,
|
||||
"load64": load64_formats,
|
||||
"mathops": mathops_formats,
|
||||
"matmul": matmul_formats,
|
||||
"matvec": matvec_formats,
|
||||
"memset": memset_formats,
|
||||
"move": move_formats,
|
||||
"noop": noop_formats,
|
||||
"outer": outer_formats,
|
||||
"push": push_formats,
|
||||
"pop": pop_formats,
|
||||
"scale": scale_formats,
|
||||
|
@ -644,6 +757,7 @@ group_map = {
|
|||
"swizzle2": swizzle2_formats,
|
||||
"return": return_formats,
|
||||
"udivops": udivops_formats,
|
||||
"vecmat": vecmat_formats,
|
||||
"vecops": vecops_formats,
|
||||
"vecops2": vecops2_formats,
|
||||
"wedge2": wedge2_formats,
|
||||
|
@ -710,12 +824,9 @@ def process_opcode(opcode, group):
|
|||
fmt = eval(f'''f"{gm['format']}"''', params)
|
||||
else:
|
||||
fmt = None
|
||||
if fmt is None:
|
||||
fmt = "0"
|
||||
else:
|
||||
fmt = f'"{fmt}"'
|
||||
inst["fmt"] = fmt
|
||||
inst["fmt"] = "0" if fmt is None else f'"{fmt}"'
|
||||
inst["wd"] = "{%s}" % eval(f'''f"{gm['widths']}"''', params)
|
||||
inst["cl"] = "{%s}" % eval(f'''f"{gm['columns']}"''', params)
|
||||
inst["ty"] = "{%s}" % eval(f'''f"{gm['types']}"''', params)
|
||||
|
||||
import sys
|
||||
|
@ -759,6 +870,7 @@ elif sys.argv[1] == "table":
|
|||
'\\t.opname = {on},\\n'
|
||||
'\\t.mnemonic = {mn},\\n'
|
||||
'\\t.widths = {wd},\\n'
|
||||
'\\t.columns = {cl},\\n'
|
||||
'\\t.types = {ty},\\n'
|
||||
'\\t.fmt = {fmt},\\n'
|
||||
'}},"', group))
|
||||
|
|
|
@ -2291,13 +2291,146 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
|
|||
stk = pr_stack_pop (pr);
|
||||
MM(ivec4) = STK(ivec4);
|
||||
break;
|
||||
|
||||
#define OP_mvmul_T_3(T,cols,rows,t) \
|
||||
case OP_MVMUL_##cols##3##_##T: \
|
||||
{ \
|
||||
auto a = &OPA(t##vec##rows); \
|
||||
auto b = OPB(t##vec##rows); \
|
||||
auto c = OPC(t##vec##rows); \
|
||||
VectorScale (a[0], b[0], c); \
|
||||
for (int i = 1; i < cols; i++) { \
|
||||
VectorMultAdd(c, b[i], a[i], c); \
|
||||
} \
|
||||
} \
|
||||
break
|
||||
#define OP_mvmul_T(T,cols,rows,t) \
|
||||
case OP_MVMUL_##cols##rows##_##T: \
|
||||
{ \
|
||||
auto a = &OPA(t##vec##rows); \
|
||||
auto b = OPB(t##vec##rows); \
|
||||
pr_##t##vec##rows##_t c = a[0] * b[0]; \
|
||||
for (int i = 1; i < cols; i++) { \
|
||||
c += a[i] * b[i]; \
|
||||
} \
|
||||
OPC(t##vec##rows) = c; \
|
||||
} \
|
||||
break
|
||||
// 0 0100
|
||||
// spare
|
||||
OP_mvmul_T (F,3,2,);
|
||||
OP_mvmul_T_3(F,3,3,);
|
||||
OP_mvmul_T (F,3,4,);
|
||||
OP_mvmul_T_3(F,2,3,);
|
||||
OP_mvmul_T (D,3,2,d);
|
||||
OP_mvmul_T_3(D,3,3,d);
|
||||
OP_mvmul_T (D,3,4,d);
|
||||
OP_mvmul_T_3(D,2,3,d);
|
||||
OP_mvmul_T (F,4,2,);
|
||||
OP_mvmul_T_3(F,4,3,);
|
||||
OP_mvmul_T (F,4,4,);
|
||||
OP_mvmul_T (F,2,4,);
|
||||
OP_mvmul_T (D,4,2,d);
|
||||
OP_mvmul_T_3(D,4,3,d);
|
||||
OP_mvmul_T (D,4,4,d);
|
||||
OP_mvmul_T (D,2,4,d);
|
||||
|
||||
#define OP_vmmul_T_3(T,cols,rows,t) \
|
||||
case OP_VMMUL_##cols##3##_##T: \
|
||||
{ \
|
||||
auto a = OPA(t##vec##rows); \
|
||||
auto b = &OPB(t##vec##rows); \
|
||||
auto c = &OPC(t##vec##cols)[0]; \
|
||||
for (int i = 0; i < cols; i++) { \
|
||||
c[i] = DotProduct (a, b[i]); \
|
||||
} \
|
||||
} \
|
||||
break
|
||||
#define OP_vmmul_T(T,cols,rows,t,t2) \
|
||||
case OP_VMMUL_##cols##rows##_##T: \
|
||||
{ \
|
||||
auto a = OPA(t##vec##rows); \
|
||||
auto b = &OPB(t##vec##rows); \
|
||||
pr_##t##vec##rows##_t c; \
|
||||
for (int i = 0; i < cols; i++) { \
|
||||
c[i] = dot##rows##t2(a, b[i])[0]; \
|
||||
} \
|
||||
OPC(t##vec##rows) = c; \
|
||||
} \
|
||||
break
|
||||
#define dot4f dotf
|
||||
#define dot4d dotd
|
||||
// 0 0101
|
||||
// spare
|
||||
OP_vmmul_T (F,3,2,,f);
|
||||
OP_vmmul_T_3(F,3,3,);
|
||||
OP_vmmul_T (F,3,4,,f);
|
||||
OP_vmmul_T_3(F,2,3,);
|
||||
OP_vmmul_T (D,3,2,d,d);
|
||||
OP_vmmul_T_3(D,3,3,d);
|
||||
OP_vmmul_T (D,3,4,d,d);
|
||||
OP_vmmul_T_3(D,2,3,d);
|
||||
OP_vmmul_T (F,4,2,,f);
|
||||
OP_vmmul_T_3(F,4,3,);
|
||||
OP_vmmul_T (F,4,4,,f);
|
||||
OP_vmmul_T (F,2,4,,f);
|
||||
OP_vmmul_T (D,4,2,d,d);
|
||||
OP_vmmul_T_3(D,4,3,d);
|
||||
OP_vmmul_T (D,4,4,d,d);
|
||||
OP_vmmul_T (D,2,4,d,d);
|
||||
#undef dot4f
|
||||
#undef dot4d
|
||||
|
||||
#define OP_outer_T(T,cols,rows,t) \
|
||||
case OP_OUTER_##cols##rows##_##T: \
|
||||
{ \
|
||||
auto a = OPA(t##vec##rows); \
|
||||
auto b = OPB(t##vec##rows); \
|
||||
auto c = &OPC(t##vec##rows); \
|
||||
for (int i = 0; i < cols; i++) { \
|
||||
for (int j = 0; j < rows; j++) { \
|
||||
c[i][j] = a[i] * b[j]; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
break
|
||||
// 0 0110
|
||||
// spare
|
||||
OP_outer_T(F,3,2,);
|
||||
OP_outer_T(F,3,3,);
|
||||
OP_outer_T(F,3,4,);
|
||||
OP_outer_T(F,2,3,);
|
||||
OP_outer_T(D,3,2,d);
|
||||
OP_outer_T(D,3,3,d);
|
||||
OP_outer_T(D,3,4,d);
|
||||
OP_outer_T(D,2,3,d);
|
||||
OP_outer_T(F,4,2,);
|
||||
OP_outer_T(F,4,3,);
|
||||
OP_outer_T(F,4,4,);
|
||||
OP_outer_T(F,2,4,);
|
||||
OP_outer_T(D,4,2,d);
|
||||
OP_outer_T(D,4,3,d);
|
||||
OP_outer_T(D,4,4,d);
|
||||
OP_outer_T(D,2,4,d);
|
||||
|
||||
// 0 0111
|
||||
OP_mvmul_T(F,2,2,);
|
||||
OP_vmmul_T(F,2,2,,f);
|
||||
OP_outer_T(F,2,2,);
|
||||
case OP_WEDGE_F_2:
|
||||
{
|
||||
auto a = OPA(vec2);
|
||||
auto b = OPB(vec2);
|
||||
OPC(float) = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
break;
|
||||
OP_mvmul_T(D,2,2,d);
|
||||
OP_vmmul_T(D,2,2,d,d);
|
||||
OP_outer_T(D,2,2,d);
|
||||
case OP_WEDGE_D_2:
|
||||
{
|
||||
auto a = OPA(dvec2);
|
||||
auto b = OPB(dvec2);
|
||||
OPC(double) = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
break;
|
||||
case OP_SWIZZLE_F_2:
|
||||
{
|
||||
auto s2 = OPA(ivec2);
|
||||
|
@ -2328,21 +2461,9 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
|
|||
storevec3l (&OPC(long), s4);
|
||||
}
|
||||
break;
|
||||
case OP_WEDGE_F_2:
|
||||
{
|
||||
auto a = OPA(vec2);
|
||||
auto b = OPB(vec2);
|
||||
OPC(float) = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
break;
|
||||
// spare
|
||||
case OP_WEDGE_D_2:
|
||||
{
|
||||
auto a = OPA(dvec2);
|
||||
auto b = OPB(dvec2);
|
||||
OPC(double) = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
break;
|
||||
// spare
|
||||
// spare
|
||||
// spare
|
||||
|
||||
#define OP_cmp_1(OP, T, rt, cmp, ct) \
|
||||
|
|
Loading…
Reference in a new issue