0
0
Fork 0
mirror of https://git.code.sf.net/p/quake/quakeforge synced 2025-04-11 11:51:50 +00:00

[gamecode] Add matrix instructions

Only matrix-vector, vector-matrix and vector-vector outer products (no
more room), but that's enough to get decent performance out of
matrix-matrix and matrix-scalar (both of which can be done as a set of
matrix-vector or vertex-scalar products).

Progs version bumped because I found that I'd put the swizzle and 2d
wedge ops in the wrong spot (compared to both intention and docs) and
rather than adjust the docs, I took advantage of the opportunity to get
a nicer layout for the wedge products (nestled into the spare slots left
by the 2x2 matrix ops, which seems fitting as the 2d wedge is the
determinant of a 2x2 matrix).
This commit is contained in:
Bill Currie 2024-11-26 00:22:56 +09:00
parent a4eefa6204
commit efc1fdbd29
3 changed files with 273 additions and 39 deletions
include/QF/progs
libs/gamecode

View file

@ -482,6 +482,7 @@ typedef struct opcode_s {
const char *opname;
const char *mnemonic;
int widths[3]; ///< component count for each argument (1-4)
int columns[3]; ///< column count for each argument (1-4)
etype_t types[3]; ///< component type for each argument
const char *fmt;
} opcode_t;
@ -563,7 +564,7 @@ typedef struct pr_va_list_s {
|(((0x##c) & 0xfff) << 0) )
#define PROG_ID_VERSION 6
#define PROG_V6P_VERSION PROG_VERSION_ENCODE(0,fff,00a)
#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,011)
#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,012)
typedef struct pr_chunk_s {
pr_uint_t offset;

View file

@ -3,8 +3,12 @@ bitmap_txt = """
0 0001 mmss store
0 0010 mmss push
0 0011 mmss pop
0 0111 00ts swizzle2
0 0111 01t0 wedge2
0 0100 dtdd matvec
0 0101 dtdd vecmat
0 0110 dtdd outer
0 0111 0too matmul
0 0111 0t11 wedge2
0 0111 10ts swizzle2
0 1ccc ttss compare
0 0000 00nn
0 0000 0000 noop
@ -58,6 +62,17 @@ address_widths = [
[ "4, 0", "1, 1", "1, 0", "1, 1", ],
[ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"],
]
address_columns = [
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
[ "1, 0", "1, 1", "1, 0", "1, 1", ],
[ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"],
]
move_widths = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
move_columns = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
memset_widths = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
memset_columns = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ]
#store, pop, lea
store_fmt = [
"%ga",
@ -92,6 +107,7 @@ adjstk_formats = {
"opname": "adjstk",
"format": "%sa, %sb",
"widths": "0, 0, 0",
"columns": "0, 0, 0",
"types": "ev_short, ev_short, ev_invalid",
}
bitops_formats = {
@ -100,6 +116,7 @@ bitops_formats = {
"opname": "{op_bit[oo]}",
"format": "{bit_fmt[oo]}",
"widths": "{ss+1}, { oo < 3 and ss+1 or 0}, {ss+1}",
"columns": "1, 1, 1",
"types": "{bit_types[t]}, {oo < 3 and bit_types[t] or 'ev_invalid'}, {bit_types[t]}",
"args": {
"op_bit": ["bitand", "bitor", "bitxor", "bitnot"],
@ -119,6 +136,7 @@ branch_formats = {
"opname": "{op_cond[c*4+cc]}",
"format": "{cond_fmt[c*4+cc]}{branch_fmt[0]}",
"widths": "0, 0, 1",
"columns": "0, 0, 1",
"types": "ev_short, ev_invalid, ev_int",
"args": {
"op_mode": "ABCD",
@ -134,6 +152,7 @@ call_formats = {
"opname": "call",
"format": "{call_fmt[mm]}",
"widths": "{call_widths[mm]}, -1",
"columns": "{call_columns[mm]}, -1",
"types": "{call_types[mm]}, ev_void",
"args": {
"op_mode": ".BCD",
@ -149,7 +168,8 @@ call_formats = {
"ev_ptr, ev_short",
"ev_ptr, ev_int",
],
"call_widths": [ None, "1, 0", "1, 0", "1, 1" ]
"call_widths": [ None, "1, 0", "1, 0", "1, 1" ],
"call_columns": [ None, "1, 0", "1, 0", "1, 1" ],
},
}
compare_formats = {
@ -157,6 +177,7 @@ compare_formats = {
"mnemonic": "{op_cmp[ccc]}.{cmp_type[tt]}",
"opname": "{op_cmp[ccc]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"columns": "1, 1, 1",
"types": "{cmp_types[tt]}, {cmp_types[tt]}, {res_types[tt & 2]}",
"args": {
"op_cmp": compare_ccc,
@ -170,6 +191,7 @@ compare2_formats = {
"mnemonic": "{op_cmp[ccc]}.{cmp_type[t]}",
"opname": "{op_cmp[ccc]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"columns": "1, 1, 1",
"types": "{cmp_types[t]}, {cmp_types[t]}, ev_int",
"args": {
"op_cmp": compare_ccc,
@ -183,6 +205,7 @@ constant_formats = {
"opname": "ldconst",
"format": "%sa, %sb, %gc",
"widths": "0, 0, -1",
"columns": "0, 0, -1",
"types": "ev_short, ev_short, ev_void",
}
convert_formats = {
@ -191,6 +214,7 @@ convert_formats = {
"opname": "conv",
"format": "%Ga %Cb %gc",
"widths": "-1, 0, -1",
"columns": "1, 0, 1",
"types": "ev_void, ev_short, ev_void",
}
fbitops_formats = {
@ -199,6 +223,7 @@ fbitops_formats = {
"opname": "{op_fbit[oo]}",
"format": "{fbit_fmt[oo]}",
"widths": "1, 1, 1",
"columns": "1, 1, 1",
"types": "{fbit_types[0]}, {fbit_types[oo==3]}, {fbit_types[0]}",
"args": {
"op_fbit": ["bitand", "bitor", "bitxor", "bitnot"],
@ -217,6 +242,7 @@ extend_formats = {
"opname": "extend",
"format": "%Ga%Xb, %gc",
"widths": "-1, 0, -1",
"columns": "1, 0, 1",
"types": "ev_void, ev_short, ev_void",
}
hops_formats = {
@ -225,6 +251,7 @@ hops_formats = {
"opname": "hops",
"format": "%Hb %Ga, %gc",
"widths": "-1, 0, 1",
"columns": "1, 0, 1",
"types": "ev_void, ev_short, ev_void",
}
jump_formats = {
@ -233,6 +260,7 @@ jump_formats = {
"opname": "jump",
"format": "{jump_fmt[mm]}",
"widths": "{jump_widths[mm]}, 0",
"columns": "{jump_columns[mm]}, 0",
"types": "{jump_types[mm]}",
"args": {
"op_mode": "ABCD",
@ -243,7 +271,8 @@ jump_formats = {
"ev_ptr, ev_short, ev_invalid",
"ev_ptr, ev_int, ev_invalid",
],
"jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ]
"jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ],
"jump_columns": [ "0, 0", "1, 1", "1, 0", "1, 1" ],
},
}
load64_formats = {
@ -252,12 +281,14 @@ load64_formats = {
"opname": "load64",
"format": "{load_fmt[mm]}, %gc",
"widths": "{load_widths[s+2][mm]}, {s+3}",
"columns": "{load_columns[s+2][mm]}, {s+3}",
"types": "{load_types[mm]}, ev_void",
"args": {
"op_mode": address_mode,
"load_fmt": load_fmt,
"load_types": address_types,
"load_widths": address_widths,
"load_columns": address_columns,
},
}
lea_formats = {
@ -266,12 +297,14 @@ lea_formats = {
"opname": "lea",
"format": "{lea_fmt[mm]}, %gc",
"widths": "{lea_widths[mm]}, 1",
"columns": "{lea_columns[mm]}, 1",
"types": "{lea_types[mm]}, ev_ptr",
"args": {
"op_mode": address_mode,
"lea_fmt": store_fmt,
"lea_types": address_types,
"lea_widths": address_widths[4],
"lea_columns": address_columns[4],
},
}
lea2_formats = {
@ -280,12 +313,14 @@ lea2_formats = {
"opname": "lea",
"format": "{lea_fmt[m+4]}, %gc",
"widths": "{lea_widths[m+4]}, 1",
"columns": "{lea_columns[m+4]}, 1",
"types": "{lea_types[m+4]}, ev_ptr",
"args": {
"op_mode": "EF",
"lea_fmt": store_fmt,
"lea_types": address_types,
"lea_widths": address_widths[4],
"lea_columns": address_columns[4],
},
}
load_formats = {
@ -294,12 +329,14 @@ load_formats = {
"opname": "load",
"format": "{load_fmt[mm]}, %gc",
"widths": "{load_widths[ss][mm]}, {ss+1}",
"columns": "{load_columns[ss][mm]}, {ss+1}",
"types": "{load_types[mm]}, ev_void",
"args": {
"op_mode": address_mode,
"load_fmt": load_fmt,
"load_types": address_types,
"load_widths": address_widths,
"load_columns": address_columns,
},
}
mathops_formats = {
@ -307,6 +344,7 @@ mathops_formats = {
"mnemonic": "{op_math[ooo]}.{math_type[tt]}",
"opname": "{op_math[ooo]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"columns": "1, 1, 1",
"types": "{math_types[tt]}, {math_types[tt]}, {math_types[tt]}",
"args": {
"op_math": ["mul", "div", "rem", "mod", "add", "sub", None, None],
@ -314,23 +352,50 @@ mathops_formats = {
"math_types": etype_tt,
},
}
matmul_formats = {
"opcode": "OP_{op_matmul[oo].upper()}_22_{mat_type[t]}",
"mnemonic": "{op_matmul[oo]}",
"opname": "{op_matmul[oo]}",
"widths": "2, 2, 2",
"columns": "{cols_matmul[oo]}",
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
"args": {
"op_matmul": ["mvmul", "vmmul", "outer"],
"cols_matmul": ["2, 1, 1", "1, 2, 1", "1, 1, 1", None],
"mat_type": ['F', 'D'],
"mat_types": float_t,
"mat_dim": ((2, 3, 4, 3, 2, 3, 4, 4),
(3, 3, 3, 2, 4, 4, 4, 2)),
},
}
matvec_formats = {
"opcode": "OP_MVMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
"mnemonic": "mvmul",
"opname": "mvmul",
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}",
"columns": "{mat_dim[1][d][dd]}, 1, 1",
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
"args": {
"mat_type": ['F', 'D'],
"mat_types": float_t,
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
((3, 3, 3, 2), (4, 4, 4, 2))),
},
}
memset_formats = {
"opcode": "OP_MEMSET_{op_memset[oo].upper()}",
"mnemonic": "memset.{op_memset[oo]}",
"opname": "memset{suff_memset[oo]}",
"format": "{memset_fmt[oo]}",
"widths": "{memset_widths[oo]}",
"columns": "{memset_columns[oo]}",
"types": "{memset_types[oo]}",
"args": {
"op_memset": ["i", "p", "pi", None],
"suff_memset": ["", "p", "p", None],
"memset_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None],
"memset_widths": [
"1, 0, -1",
"1, 1, 1",
"1, 0, 1",
None,
],
"memset_widths": memset_widths,
"memset_columns": memset_columns,
"memset_types": [
"ev_int, ev_short, ev_void",
"ev_int, ev_int, ev_ptr",
@ -344,17 +409,14 @@ move_formats = {
"opname": "move{suff_move[oo]}",
"format": "{move_fmt[oo]}",
"widths": "{move_widths[oo]}",
"columns": "{move_columns[oo]}",
"types": "{move_types[oo]}",
"args": {
"op_move": ["i", "p", "pi", None],
"suff_move": ["", "p", "p", None],
"move_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None],
"move_widths": [
"-1, 0, -1",
"1, 1, 1",
"1, 0, 1",
None,
],
"move_widths": move_widths,
"move_columns": move_columns,
"move_types": [
"ev_void, ev_short, ev_void",
"ev_ptr, ev_int, ev_ptr",
@ -368,14 +430,30 @@ noop_formats = {
"opname": "nop",
"format": "there were plums...",
"widths": "0, 0, 0",
"columns": "0, 0, 0",
"types": "ev_invalid, ev_invalid, ev_invalid",
}
outer_formats = {
"opcode": "OP_OUTER_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
"mnemonic": "outer",
"opname": "outer",
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}",
"columns": "1, 1, {mat_dim[1][d][dd]}",
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
"args": {
"mat_type": ['F', 'D'],
"mat_types": float_t,
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
((3, 3, 3, 2), (4, 4, 4, 2))),
},
}
push_formats = {
"opcode": "OP_PUSH_{op_mode[mm]}_{ss+1}",
"mnemonic": "push",
"opname": "push",
"format": "{push_fmt[mm]}",
"widths": "{ss+1}, 0, 0",
"columns": "1, 0, 0",
"types": "{push_types[mm]}, ev_invalid",
"args": {
"op_mode": address_mode,
@ -389,6 +467,7 @@ pop_formats = {
"opname": "pop",
"format": "{pop_fmt[mm]}",
"widths": "{ss+1}, 0, 0",
"columns": "1, 0, 0",
"types": "{pop_types[mm]}, ev_invalid",
"args": {
"op_mode": address_mode,
@ -401,6 +480,7 @@ scale_formats = {
"mnemonic": "scale.{scale_type[t]}",
"opname": "scale",
"widths": "{ss+1}, 1, {ss+1}",
"columns": "1, 1, 1",
"types": "{scale_types[t]}, {scale_types[t]}, {scale_types[t]}",
"args": {
"scale_type": ['F', 'D'],
@ -412,6 +492,7 @@ shiftops_formats = {
"mnemonic": "{mn_shift[u*2+r]}.{shift_type[u*2+t]}",
"opname": "{op_shift[u*2+r]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"columns": "1, 1, 1",
"types": "{shift_types[t][u]}, {shift_types[t][0]}, {shift_types[t][u]}",
"args": {
"mn_shift": ["shl", "asr", "shl", "shr"],
@ -429,6 +510,7 @@ statef_formats = {
"opname": "state",
"format": "{state_fmt[c]}",
"widths": "1, 1, {c}",
"columns": "1, 1, 1",
"types": "ev_float, ev_func, {state_types[c]}",
"args": {
"state": ["ft", "ftt"],
@ -442,6 +524,7 @@ stated_formats = {
"opname": "state",
"format": "{state_fmt[c]}",
"widths": "1, 1, {c}",
"columns": "1, 1, 1",
"types": "ev_int, ev_func, {state_types[c]}",
"args": {
"state": ["dt", "dtt"],
@ -455,6 +538,7 @@ store_formats = {
"opname": "{store_op[mm]}",
"format": "%Gc, {store_fmt[mm]}",
"widths": "{store_widths[ss][mm]}, {ss+1}",
"columns": "{store_columns[ss][mm]}, {ss+1}",
"types": "{store_types[mm]}, ev_void",
"args": {
"op_mode": address_mode,
@ -462,6 +546,7 @@ store_formats = {
"store_op": ["assign", "store", "store", "store"],
"store_types": address_types,
"store_widths": address_widths,
"store_columns": address_columns,
},
}
store64_formats = {
@ -470,6 +555,7 @@ store64_formats = {
"opname": "{store_op[mm]}64",
"format": "%Gc, {store_fmt[mm]}",
"widths": "{store_widths[s+2][mm]}, {s+3}",
"columns": "{store_columns[s+2][mm]}, 1",
"types": "{store_types[mm]}, ev_void",
"args": {
"op_mode": address_mode,
@ -477,6 +563,7 @@ store64_formats = {
"store_op": ["assign", "store", "store", "store"],
"store_types": address_types,
"store_widths": address_widths,
"store_columns": address_columns,
},
}
string_formats = {
@ -485,6 +572,7 @@ string_formats = {
"opname": "{op_str[o*4+oo]}",
"format": "{str_fmt[o*4+oo]}",
"widths": "1, {(o*4+oo)<7 and 1 or 0}, 1",
"columns": "1, {(o*4+oo)<7 and 1 or 0}, 1",
"types": "{str_types[o*4+oo]}",
"args": {
"op_str": ["eq", "lt", "gt", "add", "cmp", "ge", "le", "not"],
@ -516,6 +604,7 @@ swizzle_formats = {
"opname": "swizzle",
"format": "%Ga.%Sb %gc",
"widths": "4, 0, 4",
"columns": "1, 0, 1",
"types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}",
"args": {
"swiz_type": ['F', 'D'],
@ -528,6 +617,7 @@ swizzle2_formats = {
"opname": "swizzle",
"format": "%Ga.%Sb %gc",
"widths": "{s+2}, 0, {s+2}",
"columns": "1, 0, 1",
"types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}",
"args": {
"swiz_type": ['F', 'D'],
@ -540,6 +630,7 @@ wedge2_formats = {
"opname": "wedge",
"format": "%Ga, %Gb, %gc",
"widths": "2, 2, 1",
"columns": "1, 1, 1",
"types": "{wedge_types[t]}",
"args": {
"wedge_type": ['F', 'D'],
@ -551,6 +642,7 @@ return_formats = {
"mnemonic": "return",
"opname": "return",
"widths": "-1, -1, 0", # width specified by st->c
"columns": "-1, -1, 1",
"format": "%Mc5",
"types": "ev_void, ev_void, ev_void",
}
@ -559,6 +651,7 @@ udivops_formats = {
"mnemonic": "{op_udiv[o]}.{udiv_type[t]}",
"opname": "{op_udiv[o]}",
"widths": "{ss+1}, {ss+1}, {ss+1}",
"columns": "1, 1, 1",
"types": "{udiv_types[t]}, {udiv_types[t]}, {udiv_types[t]}",
"args": {
"op_udiv": ["div", "rem"],
@ -566,11 +659,26 @@ udivops_formats = {
"udiv_types": ["ev_uint", "ev_ulong"],
},
}
vecmat_formats = {
"opcode": "OP_VMMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}",
"mnemonic": "outer",
"opname": "outer",
"widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, 0",
"columns": "1, {mat_dim[1][d][dd]}, 1",
"types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}",
"args": {
"mat_type": ['F', 'D'],
"mat_types": float_t,
"mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)),
((3, 3, 3, 2), (4, 4, 4, 2))),
},
}
vecops_formats = {
"opcode": "OP_{op_vop[ooo].upper()}_{vop_type[t]}",
"mnemonic": "{op_vop[ooo]}.{vop_type[t]}",
"opname": "{op_vop[ooo]}",
"widths": "{vec_widths[ooo]}",
"columns": "1, 1, 1",
"types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}",
"args": {
"op_vop": ["cross", "cdot", "vdot", "qdot",
@ -594,6 +702,7 @@ vecops2_formats = {
"mnemonic": "{op_vop[d]}.{vop_type[t]}",
"opname": "{op_vop[d]}",
"widths": "4, 4, 4",
"columns": "1, 1, 1",
"types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}",
"args": {
"op_vop": ["qv4mul", "v4qmul"],
@ -607,6 +716,7 @@ with_formats = {
"opname": "with",
"format": "%sa, %sb, %sc",
"widths": "0, -1, 0",
"columns": "0, -1, 0",
"types": "ev_short, ev_void, ev_short",
}
@ -628,9 +738,12 @@ group_map = {
"load": load_formats,
"load64": load64_formats,
"mathops": mathops_formats,
"matmul": matmul_formats,
"matvec": matvec_formats,
"memset": memset_formats,
"move": move_formats,
"noop": noop_formats,
"outer": outer_formats,
"push": push_formats,
"pop": pop_formats,
"scale": scale_formats,
@ -644,6 +757,7 @@ group_map = {
"swizzle2": swizzle2_formats,
"return": return_formats,
"udivops": udivops_formats,
"vecmat": vecmat_formats,
"vecops": vecops_formats,
"vecops2": vecops2_formats,
"wedge2": wedge2_formats,
@ -710,12 +824,9 @@ def process_opcode(opcode, group):
fmt = eval(f'''f"{gm['format']}"''', params)
else:
fmt = None
if fmt is None:
fmt = "0"
else:
fmt = f'"{fmt}"'
inst["fmt"] = fmt
inst["fmt"] = "0" if fmt is None else f'"{fmt}"'
inst["wd"] = "{%s}" % eval(f'''f"{gm['widths']}"''', params)
inst["cl"] = "{%s}" % eval(f'''f"{gm['columns']}"''', params)
inst["ty"] = "{%s}" % eval(f'''f"{gm['types']}"''', params)
import sys
@ -759,6 +870,7 @@ elif sys.argv[1] == "table":
'\\t.opname = {on},\\n'
'\\t.mnemonic = {mn},\\n'
'\\t.widths = {wd},\\n'
'\\t.columns = {cl},\\n'
'\\t.types = {ty},\\n'
'\\t.fmt = {fmt},\\n'
'}},"', group))

View file

@ -2291,13 +2291,146 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
stk = pr_stack_pop (pr);
MM(ivec4) = STK(ivec4);
break;
#define OP_mvmul_T_3(T,cols,rows,t) \
case OP_MVMUL_##cols##3##_##T: \
{ \
auto a = &OPA(t##vec##rows); \
auto b = OPB(t##vec##rows); \
auto c = OPC(t##vec##rows); \
VectorScale (a[0], b[0], c); \
for (int i = 1; i < cols; i++) { \
VectorMultAdd(c, b[i], a[i], c); \
} \
} \
break
#define OP_mvmul_T(T,cols,rows,t) \
case OP_MVMUL_##cols##rows##_##T: \
{ \
auto a = &OPA(t##vec##rows); \
auto b = OPB(t##vec##rows); \
pr_##t##vec##rows##_t c = a[0] * b[0]; \
for (int i = 1; i < cols; i++) { \
c += a[i] * b[i]; \
} \
OPC(t##vec##rows) = c; \
} \
break
// 0 0100
// spare
OP_mvmul_T (F,3,2,);
OP_mvmul_T_3(F,3,3,);
OP_mvmul_T (F,3,4,);
OP_mvmul_T_3(F,2,3,);
OP_mvmul_T (D,3,2,d);
OP_mvmul_T_3(D,3,3,d);
OP_mvmul_T (D,3,4,d);
OP_mvmul_T_3(D,2,3,d);
OP_mvmul_T (F,4,2,);
OP_mvmul_T_3(F,4,3,);
OP_mvmul_T (F,4,4,);
OP_mvmul_T (F,2,4,);
OP_mvmul_T (D,4,2,d);
OP_mvmul_T_3(D,4,3,d);
OP_mvmul_T (D,4,4,d);
OP_mvmul_T (D,2,4,d);
#define OP_vmmul_T_3(T,cols,rows,t) \
case OP_VMMUL_##cols##3##_##T: \
{ \
auto a = OPA(t##vec##rows); \
auto b = &OPB(t##vec##rows); \
auto c = &OPC(t##vec##cols)[0]; \
for (int i = 0; i < cols; i++) { \
c[i] = DotProduct (a, b[i]); \
} \
} \
break
#define OP_vmmul_T(T,cols,rows,t,t2) \
case OP_VMMUL_##cols##rows##_##T: \
{ \
auto a = OPA(t##vec##rows); \
auto b = &OPB(t##vec##rows); \
pr_##t##vec##rows##_t c; \
for (int i = 0; i < cols; i++) { \
c[i] = dot##rows##t2(a, b[i])[0]; \
} \
OPC(t##vec##rows) = c; \
} \
break
#define dot4f dotf
#define dot4d dotd
// 0 0101
// spare
OP_vmmul_T (F,3,2,,f);
OP_vmmul_T_3(F,3,3,);
OP_vmmul_T (F,3,4,,f);
OP_vmmul_T_3(F,2,3,);
OP_vmmul_T (D,3,2,d,d);
OP_vmmul_T_3(D,3,3,d);
OP_vmmul_T (D,3,4,d,d);
OP_vmmul_T_3(D,2,3,d);
OP_vmmul_T (F,4,2,,f);
OP_vmmul_T_3(F,4,3,);
OP_vmmul_T (F,4,4,,f);
OP_vmmul_T (F,2,4,,f);
OP_vmmul_T (D,4,2,d,d);
OP_vmmul_T_3(D,4,3,d);
OP_vmmul_T (D,4,4,d,d);
OP_vmmul_T (D,2,4,d,d);
#undef dot4f
#undef dot4d
#define OP_outer_T(T,cols,rows,t) \
case OP_OUTER_##cols##rows##_##T: \
{ \
auto a = OPA(t##vec##rows); \
auto b = OPB(t##vec##rows); \
auto c = &OPC(t##vec##rows); \
for (int i = 0; i < cols; i++) { \
for (int j = 0; j < rows; j++) { \
c[i][j] = a[i] * b[j]; \
} \
} \
} \
break
// 0 0110
// spare
OP_outer_T(F,3,2,);
OP_outer_T(F,3,3,);
OP_outer_T(F,3,4,);
OP_outer_T(F,2,3,);
OP_outer_T(D,3,2,d);
OP_outer_T(D,3,3,d);
OP_outer_T(D,3,4,d);
OP_outer_T(D,2,3,d);
OP_outer_T(F,4,2,);
OP_outer_T(F,4,3,);
OP_outer_T(F,4,4,);
OP_outer_T(F,2,4,);
OP_outer_T(D,4,2,d);
OP_outer_T(D,4,3,d);
OP_outer_T(D,4,4,d);
OP_outer_T(D,2,4,d);
// 0 0111
OP_mvmul_T(F,2,2,);
OP_vmmul_T(F,2,2,,f);
OP_outer_T(F,2,2,);
case OP_WEDGE_F_2:
{
auto a = OPA(vec2);
auto b = OPB(vec2);
OPC(float) = a[0] * b[1] - a[1] * b[0];
}
break;
OP_mvmul_T(D,2,2,d);
OP_vmmul_T(D,2,2,d,d);
OP_outer_T(D,2,2,d);
case OP_WEDGE_D_2:
{
auto a = OPA(dvec2);
auto b = OPB(dvec2);
OPC(double) = a[0] * b[1] - a[1] * b[0];
}
break;
case OP_SWIZZLE_F_2:
{
auto s2 = OPA(ivec2);
@ -2328,21 +2461,9 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth)
storevec3l (&OPC(long), s4);
}
break;
case OP_WEDGE_F_2:
{
auto a = OPA(vec2);
auto b = OPB(vec2);
OPC(float) = a[0] * b[1] - a[1] * b[0];
}
break;
// spare
case OP_WEDGE_D_2:
{
auto a = OPA(dvec2);
auto b = OPB(dvec2);
OPC(double) = a[0] * b[1] - a[1] * b[0];
}
break;
// spare
// spare
// spare
#define OP_cmp_1(OP, T, rt, cmp, ct) \