diff --git a/include/QF/progs/pr_comp.h b/include/QF/progs/pr_comp.h index 5342a02fa..1bc807a61 100644 --- a/include/QF/progs/pr_comp.h +++ b/include/QF/progs/pr_comp.h @@ -482,6 +482,7 @@ typedef struct opcode_s { const char *opname; const char *mnemonic; int widths[3]; ///< component count for each argument (1-4) + int columns[3]; ///< column count for each argument (1-4) etype_t types[3]; ///< component type for each argument const char *fmt; } opcode_t; @@ -563,7 +564,7 @@ typedef struct pr_va_list_s { |(((0x##c) & 0xfff) << 0) ) #define PROG_ID_VERSION 6 #define PROG_V6P_VERSION PROG_VERSION_ENCODE(0,fff,00a) -#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,011) +#define PROG_VERSION PROG_VERSION_ENCODE(0,fff,012) typedef struct pr_chunk_s { pr_uint_t offset; diff --git a/libs/gamecode/opcodes.py b/libs/gamecode/opcodes.py index 64d3570cb..8fcdbfd55 100644 --- a/libs/gamecode/opcodes.py +++ b/libs/gamecode/opcodes.py @@ -3,8 +3,12 @@ bitmap_txt = """ 0 0001 mmss store 0 0010 mmss push 0 0011 mmss pop -0 0111 00ts swizzle2 -0 0111 01t0 wedge2 +0 0100 dtdd matvec +0 0101 dtdd vecmat +0 0110 dtdd outer +0 0111 0too matmul +0 0111 0t11 wedge2 +0 0111 10ts swizzle2 0 1ccc ttss compare 0 0000 00nn 0 0000 0000 noop @@ -58,6 +62,17 @@ address_widths = [ [ "4, 0", "1, 1", "1, 0", "1, 1", ], [ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"], ] +address_columns = [ + [ "1, 0", "1, 1", "1, 0", "1, 1", ], + [ "1, 0", "1, 1", "1, 0", "1, 1", ], + [ "1, 0", "1, 1", "1, 0", "1, 1", ], + [ "1, 0", "1, 1", "1, 0", "1, 1", ], + [ "-1, 0", "1, 1", "1, 0", "1, 1", "-1, 0", "-1, 1"], +] +move_widths = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ] +move_columns = [ "-1, 0, -1", "1, 1, 1", "1, 0, 1", None, ] +memset_widths = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ] +memset_columns = [ "1, 0, -1", "1, 1, 1", "1, 0, 1", None, ] #store, pop, lea store_fmt = [ "%ga", @@ -92,6 +107,7 @@ adjstk_formats = { "opname": "adjstk", "format": "%sa, %sb", "widths": "0, 0, 0", + "columns": "0, 0, 0", "types": "ev_short, ev_short, ev_invalid", } bitops_formats = { @@ -100,6 +116,7 @@ bitops_formats = { "opname": "{op_bit[oo]}", "format": "{bit_fmt[oo]}", "widths": "{ss+1}, { oo < 3 and ss+1 or 0}, {ss+1}", + "columns": "1, 1, 1", "types": "{bit_types[t]}, {oo < 3 and bit_types[t] or 'ev_invalid'}, {bit_types[t]}", "args": { "op_bit": ["bitand", "bitor", "bitxor", "bitnot"], @@ -119,6 +136,7 @@ branch_formats = { "opname": "{op_cond[c*4+cc]}", "format": "{cond_fmt[c*4+cc]}{branch_fmt[0]}", "widths": "0, 0, 1", + "columns": "0, 0, 1", "types": "ev_short, ev_invalid, ev_int", "args": { "op_mode": "ABCD", @@ -134,6 +152,7 @@ call_formats = { "opname": "call", "format": "{call_fmt[mm]}", "widths": "{call_widths[mm]}, -1", + "columns": "{call_columns[mm]}, -1", "types": "{call_types[mm]}, ev_void", "args": { "op_mode": ".BCD", @@ -149,7 +168,8 @@ call_formats = { "ev_ptr, ev_short", "ev_ptr, ev_int", ], - "call_widths": [ None, "1, 0", "1, 0", "1, 1" ] + "call_widths": [ None, "1, 0", "1, 0", "1, 1" ], + "call_columns": [ None, "1, 0", "1, 0", "1, 1" ], }, } compare_formats = { @@ -157,6 +177,7 @@ compare_formats = { "mnemonic": "{op_cmp[ccc]}.{cmp_type[tt]}", "opname": "{op_cmp[ccc]}", "widths": "{ss+1}, {ss+1}, {ss+1}", + "columns": "1, 1, 1", "types": "{cmp_types[tt]}, {cmp_types[tt]}, {res_types[tt & 2]}", "args": { "op_cmp": compare_ccc, @@ -170,6 +191,7 @@ compare2_formats = { "mnemonic": "{op_cmp[ccc]}.{cmp_type[t]}", "opname": "{op_cmp[ccc]}", "widths": "{ss+1}, {ss+1}, {ss+1}", + "columns": "1, 1, 1", "types": "{cmp_types[t]}, {cmp_types[t]}, ev_int", "args": { "op_cmp": compare_ccc, @@ -183,6 +205,7 @@ constant_formats = { "opname": "ldconst", "format": "%sa, %sb, %gc", "widths": "0, 0, -1", + "columns": "0, 0, -1", "types": "ev_short, ev_short, ev_void", } convert_formats = { @@ -191,6 +214,7 @@ convert_formats = { "opname": "conv", "format": "%Ga %Cb %gc", "widths": "-1, 0, -1", + "columns": "1, 0, 1", "types": "ev_void, ev_short, ev_void", } fbitops_formats = { @@ -199,6 +223,7 @@ fbitops_formats = { "opname": "{op_fbit[oo]}", "format": "{fbit_fmt[oo]}", "widths": "1, 1, 1", + "columns": "1, 1, 1", "types": "{fbit_types[0]}, {fbit_types[oo==3]}, {fbit_types[0]}", "args": { "op_fbit": ["bitand", "bitor", "bitxor", "bitnot"], @@ -217,6 +242,7 @@ extend_formats = { "opname": "extend", "format": "%Ga%Xb, %gc", "widths": "-1, 0, -1", + "columns": "1, 0, 1", "types": "ev_void, ev_short, ev_void", } hops_formats = { @@ -225,6 +251,7 @@ hops_formats = { "opname": "hops", "format": "%Hb %Ga, %gc", "widths": "-1, 0, 1", + "columns": "1, 0, 1", "types": "ev_void, ev_short, ev_void", } jump_formats = { @@ -233,6 +260,7 @@ jump_formats = { "opname": "jump", "format": "{jump_fmt[mm]}", "widths": "{jump_widths[mm]}, 0", + "columns": "{jump_columns[mm]}, 0", "types": "{jump_types[mm]}", "args": { "op_mode": "ABCD", @@ -243,7 +271,8 @@ jump_formats = { "ev_ptr, ev_short, ev_invalid", "ev_ptr, ev_int, ev_invalid", ], - "jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ] + "jump_widths": [ "0, 0", "1, 1", "1, 0", "1, 1" ], + "jump_columns": [ "0, 0", "1, 1", "1, 0", "1, 1" ], }, } load64_formats = { @@ -252,12 +281,14 @@ load64_formats = { "opname": "load64", "format": "{load_fmt[mm]}, %gc", "widths": "{load_widths[s+2][mm]}, {s+3}", + "columns": "{load_columns[s+2][mm]}, {s+3}", "types": "{load_types[mm]}, ev_void", "args": { "op_mode": address_mode, "load_fmt": load_fmt, "load_types": address_types, "load_widths": address_widths, + "load_columns": address_columns, }, } lea_formats = { @@ -266,12 +297,14 @@ lea_formats = { "opname": "lea", "format": "{lea_fmt[mm]}, %gc", "widths": "{lea_widths[mm]}, 1", + "columns": "{lea_columns[mm]}, 1", "types": "{lea_types[mm]}, ev_ptr", "args": { "op_mode": address_mode, "lea_fmt": store_fmt, "lea_types": address_types, "lea_widths": address_widths[4], + "lea_columns": address_columns[4], }, } lea2_formats = { @@ -280,12 +313,14 @@ lea2_formats = { "opname": "lea", "format": "{lea_fmt[m+4]}, %gc", "widths": "{lea_widths[m+4]}, 1", + "columns": "{lea_columns[m+4]}, 1", "types": "{lea_types[m+4]}, ev_ptr", "args": { "op_mode": "EF", "lea_fmt": store_fmt, "lea_types": address_types, "lea_widths": address_widths[4], + "lea_columns": address_columns[4], }, } load_formats = { @@ -294,12 +329,14 @@ load_formats = { "opname": "load", "format": "{load_fmt[mm]}, %gc", "widths": "{load_widths[ss][mm]}, {ss+1}", + "columns": "{load_columns[ss][mm]}, {ss+1}", "types": "{load_types[mm]}, ev_void", "args": { "op_mode": address_mode, "load_fmt": load_fmt, "load_types": address_types, "load_widths": address_widths, + "load_columns": address_columns, }, } mathops_formats = { @@ -307,6 +344,7 @@ mathops_formats = { "mnemonic": "{op_math[ooo]}.{math_type[tt]}", "opname": "{op_math[ooo]}", "widths": "{ss+1}, {ss+1}, {ss+1}", + "columns": "1, 1, 1", "types": "{math_types[tt]}, {math_types[tt]}, {math_types[tt]}", "args": { "op_math": ["mul", "div", "rem", "mod", "add", "sub", None, None], @@ -314,23 +352,50 @@ mathops_formats = { "math_types": etype_tt, }, } +matmul_formats = { + "opcode": "OP_{op_matmul[oo].upper()}_22_{mat_type[t]}", + "mnemonic": "{op_matmul[oo]}", + "opname": "{op_matmul[oo]}", + "widths": "2, 2, 2", + "columns": "{cols_matmul[oo]}", + "types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}", + "args": { + "op_matmul": ["mvmul", "vmmul", "outer"], + "cols_matmul": ["2, 1, 1", "1, 2, 1", "1, 1, 1", None], + "mat_type": ['F', 'D'], + "mat_types": float_t, + "mat_dim": ((2, 3, 4, 3, 2, 3, 4, 4), + (3, 3, 3, 2, 4, 4, 4, 2)), + }, +} +matvec_formats = { + "opcode": "OP_MVMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}", + "mnemonic": "mvmul", + "opname": "mvmul", + "widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}", + "columns": "{mat_dim[1][d][dd]}, 1, 1", + "types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}", + "args": { + "mat_type": ['F', 'D'], + "mat_types": float_t, + "mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)), + ((3, 3, 3, 2), (4, 4, 4, 2))), + }, +} memset_formats = { "opcode": "OP_MEMSET_{op_memset[oo].upper()}", "mnemonic": "memset.{op_memset[oo]}", "opname": "memset{suff_memset[oo]}", "format": "{memset_fmt[oo]}", "widths": "{memset_widths[oo]}", + "columns": "{memset_columns[oo]}", "types": "{memset_types[oo]}", "args": { "op_memset": ["i", "p", "pi", None], "suff_memset": ["", "p", "p", None], "memset_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None], - "memset_widths": [ - "1, 0, -1", - "1, 1, 1", - "1, 0, 1", - None, - ], + "memset_widths": memset_widths, + "memset_columns": memset_columns, "memset_types": [ "ev_int, ev_short, ev_void", "ev_int, ev_int, ev_ptr", @@ -344,17 +409,14 @@ move_formats = { "opname": "move{suff_move[oo]}", "format": "{move_fmt[oo]}", "widths": "{move_widths[oo]}", + "columns": "{move_columns[oo]}", "types": "{move_types[oo]}", "args": { "op_move": ["i", "p", "pi", None], "suff_move": ["", "p", "p", None], "move_fmt": ["%Ga, %sb, %gc", "%Ga, %Gb, %Gc", "%Ga, %sb, %Gc", None], - "move_widths": [ - "-1, 0, -1", - "1, 1, 1", - "1, 0, 1", - None, - ], + "move_widths": move_widths, + "move_columns": move_columns, "move_types": [ "ev_void, ev_short, ev_void", "ev_ptr, ev_int, ev_ptr", @@ -368,14 +430,30 @@ noop_formats = { "opname": "nop", "format": "there were plums...", "widths": "0, 0, 0", + "columns": "0, 0, 0", "types": "ev_invalid, ev_invalid, ev_invalid", } +outer_formats = { + "opcode": "OP_OUTER_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}", + "mnemonic": "outer", + "opname": "outer", + "widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, {mat_dim[0][d][dd]}", + "columns": "1, 1, {mat_dim[1][d][dd]}", + "types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}", + "args": { + "mat_type": ['F', 'D'], + "mat_types": float_t, + "mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)), + ((3, 3, 3, 2), (4, 4, 4, 2))), + }, +} push_formats = { "opcode": "OP_PUSH_{op_mode[mm]}_{ss+1}", "mnemonic": "push", "opname": "push", "format": "{push_fmt[mm]}", "widths": "{ss+1}, 0, 0", + "columns": "1, 0, 0", "types": "{push_types[mm]}, ev_invalid", "args": { "op_mode": address_mode, @@ -389,6 +467,7 @@ pop_formats = { "opname": "pop", "format": "{pop_fmt[mm]}", "widths": "{ss+1}, 0, 0", + "columns": "1, 0, 0", "types": "{pop_types[mm]}, ev_invalid", "args": { "op_mode": address_mode, @@ -401,6 +480,7 @@ scale_formats = { "mnemonic": "scale.{scale_type[t]}", "opname": "scale", "widths": "{ss+1}, 1, {ss+1}", + "columns": "1, 1, 1", "types": "{scale_types[t]}, {scale_types[t]}, {scale_types[t]}", "args": { "scale_type": ['F', 'D'], @@ -412,6 +492,7 @@ shiftops_formats = { "mnemonic": "{mn_shift[u*2+r]}.{shift_type[u*2+t]}", "opname": "{op_shift[u*2+r]}", "widths": "{ss+1}, {ss+1}, {ss+1}", + "columns": "1, 1, 1", "types": "{shift_types[t][u]}, {shift_types[t][0]}, {shift_types[t][u]}", "args": { "mn_shift": ["shl", "asr", "shl", "shr"], @@ -429,6 +510,7 @@ statef_formats = { "opname": "state", "format": "{state_fmt[c]}", "widths": "1, 1, {c}", + "columns": "1, 1, 1", "types": "ev_float, ev_func, {state_types[c]}", "args": { "state": ["ft", "ftt"], @@ -442,6 +524,7 @@ stated_formats = { "opname": "state", "format": "{state_fmt[c]}", "widths": "1, 1, {c}", + "columns": "1, 1, 1", "types": "ev_int, ev_func, {state_types[c]}", "args": { "state": ["dt", "dtt"], @@ -455,6 +538,7 @@ store_formats = { "opname": "{store_op[mm]}", "format": "%Gc, {store_fmt[mm]}", "widths": "{store_widths[ss][mm]}, {ss+1}", + "columns": "{store_columns[ss][mm]}, {ss+1}", "types": "{store_types[mm]}, ev_void", "args": { "op_mode": address_mode, @@ -462,6 +546,7 @@ store_formats = { "store_op": ["assign", "store", "store", "store"], "store_types": address_types, "store_widths": address_widths, + "store_columns": address_columns, }, } store64_formats = { @@ -470,6 +555,7 @@ store64_formats = { "opname": "{store_op[mm]}64", "format": "%Gc, {store_fmt[mm]}", "widths": "{store_widths[s+2][mm]}, {s+3}", + "columns": "{store_columns[s+2][mm]}, 1", "types": "{store_types[mm]}, ev_void", "args": { "op_mode": address_mode, @@ -477,6 +563,7 @@ store64_formats = { "store_op": ["assign", "store", "store", "store"], "store_types": address_types, "store_widths": address_widths, + "store_columns": address_columns, }, } string_formats = { @@ -485,6 +572,7 @@ string_formats = { "opname": "{op_str[o*4+oo]}", "format": "{str_fmt[o*4+oo]}", "widths": "1, {(o*4+oo)<7 and 1 or 0}, 1", + "columns": "1, {(o*4+oo)<7 and 1 or 0}, 1", "types": "{str_types[o*4+oo]}", "args": { "op_str": ["eq", "lt", "gt", "add", "cmp", "ge", "le", "not"], @@ -516,6 +604,7 @@ swizzle_formats = { "opname": "swizzle", "format": "%Ga.%Sb %gc", "widths": "4, 0, 4", + "columns": "1, 0, 1", "types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}", "args": { "swiz_type": ['F', 'D'], @@ -528,6 +617,7 @@ swizzle2_formats = { "opname": "swizzle", "format": "%Ga.%Sb %gc", "widths": "{s+2}, 0, {s+2}", + "columns": "1, 0, 1", "types": "{swizzle_types[t]}, ev_short, {swizzle_types[t]}", "args": { "swiz_type": ['F', 'D'], @@ -540,6 +630,7 @@ wedge2_formats = { "opname": "wedge", "format": "%Ga, %Gb, %gc", "widths": "2, 2, 1", + "columns": "1, 1, 1", "types": "{wedge_types[t]}", "args": { "wedge_type": ['F', 'D'], @@ -551,6 +642,7 @@ return_formats = { "mnemonic": "return", "opname": "return", "widths": "-1, -1, 0", # width specified by st->c + "columns": "-1, -1, 1", "format": "%Mc5", "types": "ev_void, ev_void, ev_void", } @@ -559,6 +651,7 @@ udivops_formats = { "mnemonic": "{op_udiv[o]}.{udiv_type[t]}", "opname": "{op_udiv[o]}", "widths": "{ss+1}, {ss+1}, {ss+1}", + "columns": "1, 1, 1", "types": "{udiv_types[t]}, {udiv_types[t]}, {udiv_types[t]}", "args": { "op_udiv": ["div", "rem"], @@ -566,11 +659,26 @@ udivops_formats = { "udiv_types": ["ev_uint", "ev_ulong"], }, } +vecmat_formats = { + "opcode": "OP_VMMUL_{mat_dim[1][d][dd]}{mat_dim[0][d][dd]}_{mat_type[t]}", + "mnemonic": "outer", + "opname": "outer", + "widths": "{mat_dim[0][d][dd]}, {mat_dim[1][d][dd]}, 0", + "columns": "1, {mat_dim[1][d][dd]}, 1", + "types": "{mat_types[t]}, {mat_types[t]}, {mat_types[t]}", + "args": { + "mat_type": ['F', 'D'], + "mat_types": float_t, + "mat_dim": (((2, 3, 4, 3), (2, 3, 4, 4)), + ((3, 3, 3, 2), (4, 4, 4, 2))), + }, +} vecops_formats = { "opcode": "OP_{op_vop[ooo].upper()}_{vop_type[t]}", "mnemonic": "{op_vop[ooo]}.{vop_type[t]}", "opname": "{op_vop[ooo]}", "widths": "{vec_widths[ooo]}", + "columns": "1, 1, 1", "types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}", "args": { "op_vop": ["cross", "cdot", "vdot", "qdot", @@ -594,6 +702,7 @@ vecops2_formats = { "mnemonic": "{op_vop[d]}.{vop_type[t]}", "opname": "{op_vop[d]}", "widths": "4, 4, 4", + "columns": "1, 1, 1", "types": "{vec_types[t]}, {vec_types[t]}, {vec_types[t]}", "args": { "op_vop": ["qv4mul", "v4qmul"], @@ -607,6 +716,7 @@ with_formats = { "opname": "with", "format": "%sa, %sb, %sc", "widths": "0, -1, 0", + "columns": "0, -1, 0", "types": "ev_short, ev_void, ev_short", } @@ -628,9 +738,12 @@ group_map = { "load": load_formats, "load64": load64_formats, "mathops": mathops_formats, + "matmul": matmul_formats, + "matvec": matvec_formats, "memset": memset_formats, "move": move_formats, "noop": noop_formats, + "outer": outer_formats, "push": push_formats, "pop": pop_formats, "scale": scale_formats, @@ -644,6 +757,7 @@ group_map = { "swizzle2": swizzle2_formats, "return": return_formats, "udivops": udivops_formats, + "vecmat": vecmat_formats, "vecops": vecops_formats, "vecops2": vecops2_formats, "wedge2": wedge2_formats, @@ -710,12 +824,9 @@ def process_opcode(opcode, group): fmt = eval(f'''f"{gm['format']}"''', params) else: fmt = None - if fmt is None: - fmt = "0" - else: - fmt = f'"{fmt}"' - inst["fmt"] = fmt + inst["fmt"] = "0" if fmt is None else f'"{fmt}"' inst["wd"] = "{%s}" % eval(f'''f"{gm['widths']}"''', params) + inst["cl"] = "{%s}" % eval(f'''f"{gm['columns']}"''', params) inst["ty"] = "{%s}" % eval(f'''f"{gm['types']}"''', params) import sys @@ -759,6 +870,7 @@ elif sys.argv[1] == "table": '\\t.opname = {on},\\n' '\\t.mnemonic = {mn},\\n' '\\t.widths = {wd},\\n' + '\\t.columns = {cl},\\n' '\\t.types = {ty},\\n' '\\t.fmt = {fmt},\\n' '}},"', group)) diff --git a/libs/gamecode/pr_exec.c b/libs/gamecode/pr_exec.c index ebd47edab..6c868dce8 100644 --- a/libs/gamecode/pr_exec.c +++ b/libs/gamecode/pr_exec.c @@ -2291,13 +2291,146 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth) stk = pr_stack_pop (pr); MM(ivec4) = STK(ivec4); break; + +#define OP_mvmul_T_3(T,cols,rows,t) \ + case OP_MVMUL_##cols##3##_##T: \ + { \ + auto a = &OPA(t##vec##rows); \ + auto b = OPB(t##vec##rows); \ + auto c = OPC(t##vec##rows); \ + VectorScale (a[0], b[0], c); \ + for (int i = 1; i < cols; i++) { \ + VectorMultAdd(c, b[i], a[i], c); \ + } \ + } \ + break +#define OP_mvmul_T(T,cols,rows,t) \ + case OP_MVMUL_##cols##rows##_##T: \ + { \ + auto a = &OPA(t##vec##rows); \ + auto b = OPB(t##vec##rows); \ + pr_##t##vec##rows##_t c = a[0] * b[0]; \ + for (int i = 1; i < cols; i++) { \ + c += a[i] * b[i]; \ + } \ + OPC(t##vec##rows) = c; \ + } \ + break // 0 0100 - // spare + OP_mvmul_T (F,3,2,); + OP_mvmul_T_3(F,3,3,); + OP_mvmul_T (F,3,4,); + OP_mvmul_T_3(F,2,3,); + OP_mvmul_T (D,3,2,d); + OP_mvmul_T_3(D,3,3,d); + OP_mvmul_T (D,3,4,d); + OP_mvmul_T_3(D,2,3,d); + OP_mvmul_T (F,4,2,); + OP_mvmul_T_3(F,4,3,); + OP_mvmul_T (F,4,4,); + OP_mvmul_T (F,2,4,); + OP_mvmul_T (D,4,2,d); + OP_mvmul_T_3(D,4,3,d); + OP_mvmul_T (D,4,4,d); + OP_mvmul_T (D,2,4,d); + +#define OP_vmmul_T_3(T,cols,rows,t) \ + case OP_VMMUL_##cols##3##_##T: \ + { \ + auto a = OPA(t##vec##rows); \ + auto b = &OPB(t##vec##rows); \ + auto c = &OPC(t##vec##cols)[0]; \ + for (int i = 0; i < cols; i++) { \ + c[i] = DotProduct (a, b[i]); \ + } \ + } \ + break +#define OP_vmmul_T(T,cols,rows,t,t2) \ + case OP_VMMUL_##cols##rows##_##T: \ + { \ + auto a = OPA(t##vec##rows); \ + auto b = &OPB(t##vec##rows); \ + pr_##t##vec##rows##_t c; \ + for (int i = 0; i < cols; i++) { \ + c[i] = dot##rows##t2(a, b[i])[0]; \ + } \ + OPC(t##vec##rows) = c; \ + } \ + break +#define dot4f dotf +#define dot4d dotd // 0 0101 - // spare + OP_vmmul_T (F,3,2,,f); + OP_vmmul_T_3(F,3,3,); + OP_vmmul_T (F,3,4,,f); + OP_vmmul_T_3(F,2,3,); + OP_vmmul_T (D,3,2,d,d); + OP_vmmul_T_3(D,3,3,d); + OP_vmmul_T (D,3,4,d,d); + OP_vmmul_T_3(D,2,3,d); + OP_vmmul_T (F,4,2,,f); + OP_vmmul_T_3(F,4,3,); + OP_vmmul_T (F,4,4,,f); + OP_vmmul_T (F,2,4,,f); + OP_vmmul_T (D,4,2,d,d); + OP_vmmul_T_3(D,4,3,d); + OP_vmmul_T (D,4,4,d,d); + OP_vmmul_T (D,2,4,d,d); +#undef dot4f +#undef dot4d + +#define OP_outer_T(T,cols,rows,t) \ + case OP_OUTER_##cols##rows##_##T: \ + { \ + auto a = OPA(t##vec##rows); \ + auto b = OPB(t##vec##rows); \ + auto c = &OPC(t##vec##rows); \ + for (int i = 0; i < cols; i++) { \ + for (int j = 0; j < rows; j++) { \ + c[i][j] = a[i] * b[j]; \ + } \ + } \ + } \ + break // 0 0110 - // spare + OP_outer_T(F,3,2,); + OP_outer_T(F,3,3,); + OP_outer_T(F,3,4,); + OP_outer_T(F,2,3,); + OP_outer_T(D,3,2,d); + OP_outer_T(D,3,3,d); + OP_outer_T(D,3,4,d); + OP_outer_T(D,2,3,d); + OP_outer_T(F,4,2,); + OP_outer_T(F,4,3,); + OP_outer_T(F,4,4,); + OP_outer_T(F,2,4,); + OP_outer_T(D,4,2,d); + OP_outer_T(D,4,3,d); + OP_outer_T(D,4,4,d); + OP_outer_T(D,2,4,d); + // 0 0111 + OP_mvmul_T(F,2,2,); + OP_vmmul_T(F,2,2,,f); + OP_outer_T(F,2,2,); + case OP_WEDGE_F_2: + { + auto a = OPA(vec2); + auto b = OPB(vec2); + OPC(float) = a[0] * b[1] - a[1] * b[0]; + } + break; + OP_mvmul_T(D,2,2,d); + OP_vmmul_T(D,2,2,d,d); + OP_outer_T(D,2,2,d); + case OP_WEDGE_D_2: + { + auto a = OPA(dvec2); + auto b = OPB(dvec2); + OPC(double) = a[0] * b[1] - a[1] * b[0]; + } + break; case OP_SWIZZLE_F_2: { auto s2 = OPA(ivec2); @@ -2328,21 +2461,9 @@ pr_exec_ruamoko (progs_t *pr, int exitdepth) storevec3l (&OPC(long), s4); } break; - case OP_WEDGE_F_2: - { - auto a = OPA(vec2); - auto b = OPB(vec2); - OPC(float) = a[0] * b[1] - a[1] * b[0]; - } - break; // spare - case OP_WEDGE_D_2: - { - auto a = OPA(dvec2); - auto b = OPB(dvec2); - OPC(double) = a[0] * b[1] - a[1] * b[0]; - } - break; + // spare + // spare // spare #define OP_cmp_1(OP, T, rt, cmp, ct) \