2001-02-19 21:15:25 +00:00
|
|
|
/*
|
|
|
|
pr_exec.c
|
|
|
|
|
|
|
|
(description)
|
|
|
|
|
|
|
|
Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License
|
|
|
|
as published by the Free Software Foundation; either version 2
|
|
|
|
of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to:
|
|
|
|
|
|
|
|
Free Software Foundation, Inc.
|
|
|
|
59 Temple Place - Suite 330
|
|
|
|
Boston, MA 02111-1307, USA
|
|
|
|
|
|
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
# include "config.h"
|
|
|
|
#endif
|
2003-01-15 15:31:36 +00:00
|
|
|
|
2001-02-19 21:15:25 +00:00
|
|
|
#ifdef HAVE_STRING_H
|
|
|
|
# include <string.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_STRINGS_H
|
|
|
|
# include <strings.h>
|
|
|
|
#endif
|
2002-11-13 19:26:44 +00:00
|
|
|
#include <signal.h>
|
2001-02-19 21:15:25 +00:00
|
|
|
#include <stdarg.h>
|
|
|
|
|
2001-03-27 20:33:07 +00:00
|
|
|
#include "QF/cvar.h"
|
2002-09-14 07:51:53 +00:00
|
|
|
#include "QF/dstring.h"
|
2001-09-11 05:18:15 +00:00
|
|
|
#include "QF/mathlib.h"
|
2001-03-27 20:33:07 +00:00
|
|
|
#include "QF/progs.h"
|
|
|
|
#include "QF/sys.h"
|
2001-06-03 17:36:49 +00:00
|
|
|
#include "QF/zone.h"
|
|
|
|
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
#include "QF/simd/vec2d.h"
|
|
|
|
#include "QF/simd/vec2f.h"
|
|
|
|
#include "QF/simd/vec2i.h"
|
|
|
|
#include "QF/simd/vec4d.h"
|
|
|
|
#include "QF/simd/vec4f.h"
|
|
|
|
#include "QF/simd/vec4i.h"
|
2001-06-03 17:36:49 +00:00
|
|
|
#include "compat.h"
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2022-01-03 05:39:32 +00:00
|
|
|
const char *prdebug_names[] = {
|
|
|
|
[prd_none] = "none",
|
|
|
|
[prd_trace] = "trace",
|
|
|
|
[prd_breakpoint] = "breakpoint",
|
|
|
|
[prd_watchpoint] = "watchpoint",
|
|
|
|
[prd_subenter] = "subenter",
|
|
|
|
[prd_subexit] = "subexit",
|
|
|
|
[prd_begin] = "begin",
|
|
|
|
[prd_terminate] = "terminate",
|
|
|
|
[prd_runerror] = "runerror",
|
|
|
|
[prd_error] = "error",
|
|
|
|
};
|
2001-08-13 20:29:33 +00:00
|
|
|
|
2001-02-19 21:15:25 +00:00
|
|
|
/*
|
|
|
|
PR_RunError
|
|
|
|
|
|
|
|
Aborts the currently executing function
|
|
|
|
*/
|
2007-03-10 12:00:59 +00:00
|
|
|
VISIBLE void
|
2001-07-15 07:04:17 +00:00
|
|
|
PR_RunError (progs_t * pr, const char *error, ...)
|
2001-02-19 21:15:25 +00:00
|
|
|
{
|
2020-03-26 02:44:02 +00:00
|
|
|
dstring_t *string = dstring_new ();//FIXME leaks when debugging
|
2002-10-23 20:42:02 +00:00
|
|
|
va_list argptr;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
|
|
|
va_start (argptr, error);
|
2002-10-23 20:42:02 +00:00
|
|
|
dvsprintf (string, error, argptr);
|
2001-02-19 21:15:25 +00:00
|
|
|
va_end (argptr);
|
|
|
|
|
2020-03-24 06:35:42 +00:00
|
|
|
if (pr->debug_handler) {
|
2020-03-26 02:44:02 +00:00
|
|
|
pr->debug_handler (prd_runerror, string->str, pr->debug_data);
|
2020-03-24 06:35:42 +00:00
|
|
|
// not expected to return, but if so, behave as if there was no handler
|
|
|
|
}
|
|
|
|
|
2002-10-23 20:42:02 +00:00
|
|
|
Sys_Printf ("%s\n", string->str);
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2012-12-16 03:27:15 +00:00
|
|
|
PR_DumpState (pr);
|
|
|
|
|
2001-10-08 03:46:44 +00:00
|
|
|
// dump the stack so PR_Error can shutdown functions
|
2012-05-21 23:23:22 +00:00
|
|
|
pr->pr_depth = 0;
|
2020-02-25 06:18:15 +00:00
|
|
|
pr->localstack_used = 0;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2002-10-23 20:42:02 +00:00
|
|
|
PR_Error (pr, "Program error: %s", string->str);
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
|
|
|
|
2020-02-25 05:30:26 +00:00
|
|
|
VISIBLE pr_stashed_params_t *
|
|
|
|
_PR_SaveParams (progs_t *pr, pr_stashed_params_t *params)
|
2007-06-09 13:44:06 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int size = pr->pr_param_size * sizeof (pr_type_t);
|
|
|
|
|
2020-02-25 05:30:26 +00:00
|
|
|
params->param_ptrs[0] = pr->pr_params[0];
|
|
|
|
params->param_ptrs[1] = pr->pr_params[1];
|
2007-06-09 13:44:06 +00:00
|
|
|
pr->pr_params[0] = pr->pr_real_params[0];
|
|
|
|
pr->pr_params[1] = pr->pr_real_params[1];
|
|
|
|
for (i = 0; i < pr->pr_argc; i++) {
|
2020-02-25 05:30:26 +00:00
|
|
|
memcpy (params->params + i * pr->pr_param_size,
|
2007-06-09 13:44:06 +00:00
|
|
|
pr->pr_real_params[i], size);
|
2020-02-25 05:30:26 +00:00
|
|
|
if (i < 2) { //XXX FIXME what the what?!?
|
|
|
|
memcpy (pr->pr_real_params[i], params->param_ptrs[0], size);
|
|
|
|
}
|
2007-06-09 13:44:06 +00:00
|
|
|
}
|
2020-02-25 05:30:26 +00:00
|
|
|
params->argc = pr->pr_argc;
|
|
|
|
return params;
|
2007-06-09 13:44:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VISIBLE void
|
2020-02-25 05:30:26 +00:00
|
|
|
PR_RestoreParams (progs_t *pr, pr_stashed_params_t *params)
|
2007-06-09 13:44:06 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int size = pr->pr_param_size * sizeof (pr_type_t);
|
|
|
|
|
2020-02-25 05:30:26 +00:00
|
|
|
pr->pr_params[0] = params->param_ptrs[0];
|
|
|
|
pr->pr_params[1] = params->param_ptrs[1];
|
|
|
|
pr->pr_argc = params->argc;
|
|
|
|
for (i = 0; i < pr->pr_argc; i++) {
|
2007-06-09 13:44:06 +00:00
|
|
|
memcpy (pr->pr_real_params[i],
|
2020-02-25 05:30:26 +00:00
|
|
|
params->params + i * pr->pr_param_size, size);
|
|
|
|
}
|
2007-06-09 13:44:06 +00:00
|
|
|
}
|
|
|
|
|
2007-03-10 12:00:59 +00:00
|
|
|
VISIBLE inline void
|
2004-01-05 07:10:32 +00:00
|
|
|
PR_PushFrame (progs_t *pr)
|
|
|
|
{
|
|
|
|
prstack_t *frame;
|
|
|
|
|
2022-01-22 01:57:26 +00:00
|
|
|
if (pr->pr_depth == PR_MAX_STACK_DEPTH)
|
2004-01-05 07:10:32 +00:00
|
|
|
PR_RunError (pr, "stack overflow");
|
|
|
|
|
|
|
|
frame = pr->pr_stack + pr->pr_depth++;
|
|
|
|
|
2020-04-02 06:00:01 +00:00
|
|
|
frame->staddr = pr->pr_xstatement;
|
2022-01-17 05:45:14 +00:00
|
|
|
if (pr->globals.stack) {
|
|
|
|
frame->stack_ptr = *pr->globals.stack;
|
|
|
|
}
|
2022-01-17 06:08:58 +00:00
|
|
|
frame->bases = pr->pr_bases;
|
2020-04-02 06:00:01 +00:00
|
|
|
frame->func = pr->pr_xfunction;
|
|
|
|
frame->tstr = pr->pr_xtstr;
|
2022-01-17 10:12:28 +00:00
|
|
|
frame->return_ptr = pr->pr_return;
|
2004-01-05 07:10:32 +00:00
|
|
|
|
2020-03-09 14:36:09 +00:00
|
|
|
pr->pr_xtstr = pr->pr_pushtstr;
|
|
|
|
pr->pr_pushtstr = 0;
|
2004-01-05 07:10:32 +00:00
|
|
|
pr->pr_xfunction = 0;
|
|
|
|
}
|
|
|
|
|
2007-03-10 12:00:59 +00:00
|
|
|
VISIBLE inline void
|
2004-01-05 07:10:32 +00:00
|
|
|
PR_PopFrame (progs_t *pr)
|
|
|
|
{
|
|
|
|
prstack_t *frame;
|
|
|
|
|
|
|
|
if (pr->pr_depth <= 0)
|
|
|
|
PR_Error (pr, "prog stack underflow");
|
|
|
|
|
|
|
|
if (pr->pr_xtstr)
|
|
|
|
PR_FreeTempStrings (pr);
|
2020-03-09 14:36:09 +00:00
|
|
|
// normally, this won't happen, but if a builtin pushed a temp string
|
|
|
|
// when calling a function and the callee was another builtin that
|
|
|
|
// did not call a progs function, then the push strings will still be
|
|
|
|
// valid because PR_EnterFunction was never called
|
2020-03-09 18:24:31 +00:00
|
|
|
// however, not if a temp string survived: better to hold on to the push
|
|
|
|
// strings a little longer than lose one erroneously
|
|
|
|
if (!pr->pr_xtstr && pr->pr_pushtstr) {
|
2020-03-09 14:36:09 +00:00
|
|
|
pr->pr_xtstr = pr->pr_pushtstr;
|
|
|
|
pr->pr_pushtstr = 0;
|
|
|
|
PR_FreeTempStrings (pr);
|
|
|
|
}
|
2004-01-05 07:10:32 +00:00
|
|
|
|
|
|
|
// up stack
|
|
|
|
frame = pr->pr_stack + --pr->pr_depth;
|
|
|
|
|
2022-01-17 10:12:28 +00:00
|
|
|
pr->pr_return = frame->return_ptr;
|
2020-04-02 06:00:01 +00:00
|
|
|
pr->pr_xfunction = frame->func;
|
|
|
|
pr->pr_xstatement = frame->staddr;
|
2004-01-05 07:10:32 +00:00
|
|
|
pr->pr_xtstr = frame->tstr;
|
2022-01-17 06:08:58 +00:00
|
|
|
pr->pr_bases = frame->bases;
|
2022-01-17 05:45:14 +00:00
|
|
|
// restore data stack (discard any locals)
|
|
|
|
if (pr->globals.stack) {
|
|
|
|
*pr->globals.stack = frame->stack_ptr;
|
|
|
|
}
|
2004-01-05 07:10:32 +00:00
|
|
|
}
|
|
|
|
|
2020-02-16 08:13:45 +00:00
|
|
|
static __attribute__((pure)) long
|
2022-01-27 01:55:06 +00:00
|
|
|
align_offset (long offset, dparmsize_t paramsize)
|
2020-02-16 08:13:45 +00:00
|
|
|
{
|
2022-01-27 01:55:06 +00:00
|
|
|
int mask = (1 << paramsize.alignment) - 1;
|
2020-02-16 08:13:45 +00:00
|
|
|
return (offset + mask) & ~mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
copy_param (pr_type_t *dst, pr_type_t *src, size_t size)
|
|
|
|
{
|
|
|
|
while (size--) {
|
|
|
|
memcpy (dst++, src++, sizeof (pr_type_t));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-11-07 03:00:00 +00:00
|
|
|
/** Setup the stackframe prior to calling a progs function. Saves all local
|
|
|
|
data the called function will trample on and copies the parameters used
|
|
|
|
by the function into the function's local data space.
|
|
|
|
\param pr pointer to progs_t VM struct
|
|
|
|
\param f pointer to the descriptor for the called function
|
|
|
|
\note Passing a descriptor for a builtin function will result in
|
|
|
|
undefined behavior.
|
2001-02-19 21:15:25 +00:00
|
|
|
*/
|
2004-11-07 03:00:00 +00:00
|
|
|
static void
|
2007-04-07 01:41:23 +00:00
|
|
|
PR_EnterFunction (progs_t *pr, bfunction_t *f)
|
2001-02-19 21:15:25 +00:00
|
|
|
{
|
2020-02-16 08:13:45 +00:00
|
|
|
pr_int_t i;
|
2022-01-23 05:17:25 +00:00
|
|
|
pr_type_t *dstParams[PR_MAX_PARAMS];
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t paramofs = 0;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2020-03-31 15:09:34 +00:00
|
|
|
if (pr->pr_trace && !pr->debug_handler) {
|
2020-03-24 04:24:55 +00:00
|
|
|
Sys_Printf ("Entering function %s\n",
|
2021-12-31 06:02:31 +00:00
|
|
|
PR_GetString (pr, f->descriptor->name));
|
2020-03-24 04:24:55 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2004-01-05 07:10:32 +00:00
|
|
|
PR_PushFrame (pr);
|
|
|
|
|
2022-01-22 12:41:35 +00:00
|
|
|
//Sys_Printf("%s:\n", PR_GetString(pr,f->name));
|
|
|
|
pr->pr_xfunction = f;
|
|
|
|
pr->pr_xstatement = f->first_statement - 1; // offset the st++
|
|
|
|
|
2022-01-27 02:24:00 +00:00
|
|
|
if (pr->progs->version == PROG_VERSION) {
|
2022-01-22 12:41:35 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-01-27 01:55:06 +00:00
|
|
|
if (f->numparams > 0) {
|
|
|
|
paramofs = f->params_start;
|
|
|
|
for (i = 0; i < f->numparams; i++) {
|
|
|
|
paramofs = align_offset (paramofs, f->param_size[i]);
|
2020-02-16 08:13:45 +00:00
|
|
|
dstParams[i] = pr->pr_globals + paramofs;
|
2022-01-27 01:55:06 +00:00
|
|
|
paramofs += f->param_size[i].size;
|
2020-02-16 08:13:45 +00:00
|
|
|
if (pr->pr_params[i] != pr->pr_real_params[i]) {
|
|
|
|
copy_param (pr->pr_real_params[i], pr->pr_params[i],
|
2022-01-27 01:55:06 +00:00
|
|
|
f->param_size[i].size);
|
2020-03-11 10:38:50 +00:00
|
|
|
pr->pr_params[i] = pr->pr_real_params[i];
|
2020-02-16 08:13:45 +00:00
|
|
|
}
|
2005-06-14 13:40:34 +00:00
|
|
|
}
|
2022-01-27 01:55:06 +00:00
|
|
|
} else if (f->numparams < 0) {
|
|
|
|
paramofs = f->params_start + 2; // argc and argv
|
|
|
|
for (i = 0; i < -f->numparams - 1; i++) {
|
|
|
|
paramofs = align_offset (paramofs, f->param_size[i]);
|
2020-02-16 08:13:45 +00:00
|
|
|
dstParams[i] = pr->pr_globals + paramofs;
|
2022-01-27 01:55:06 +00:00
|
|
|
paramofs += f->param_size[i].size;
|
2020-02-16 08:13:45 +00:00
|
|
|
if (pr->pr_params[i] != pr->pr_real_params[i]) {
|
|
|
|
copy_param (pr->pr_real_params[i], pr->pr_params[i],
|
2022-01-27 01:55:06 +00:00
|
|
|
f->param_size[i].size);
|
2020-03-11 10:38:50 +00:00
|
|
|
pr->pr_params[i] = pr->pr_real_params[i];
|
2020-02-16 08:13:45 +00:00
|
|
|
}
|
2005-06-14 13:40:34 +00:00
|
|
|
}
|
2022-01-27 01:55:06 +00:00
|
|
|
dparmsize_t paramsize = { pr->pr_param_size, pr->pr_param_alignment };
|
|
|
|
paramofs = align_offset (paramofs, paramsize );
|
2022-01-23 05:17:25 +00:00
|
|
|
if (i < PR_MAX_PARAMS) {
|
2020-02-16 08:13:45 +00:00
|
|
|
dstParams[i] = pr->pr_globals + paramofs;
|
2005-06-14 13:40:34 +00:00
|
|
|
}
|
2020-02-16 09:00:29 +00:00
|
|
|
for (; i < pr->pr_argc; i++) {
|
2020-02-16 08:13:45 +00:00
|
|
|
if (pr->pr_params[i] != pr->pr_real_params[i]) {
|
|
|
|
copy_param (pr->pr_real_params[i], pr->pr_params[i],
|
2022-01-27 01:55:06 +00:00
|
|
|
paramsize.size);
|
2020-03-11 10:38:50 +00:00
|
|
|
pr->pr_params[i] = pr->pr_real_params[i];
|
2020-02-16 08:13:45 +00:00
|
|
|
}
|
2005-06-14 13:40:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-06-05 23:57:51 +00:00
|
|
|
// save off any locals that the new function steps on
|
2022-01-22 01:57:26 +00:00
|
|
|
if (pr->localstack_used + f->locals > PR_LOCAL_STACK_SIZE)
|
2002-05-21 21:14:32 +00:00
|
|
|
PR_RunError (pr, "PR_EnterFunction: locals stack overflow");
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2001-06-05 23:57:51 +00:00
|
|
|
memcpy (&pr->localstack[pr->localstack_used],
|
2022-01-27 01:55:06 +00:00
|
|
|
&pr->pr_globals[f->params_start],
|
2020-02-16 08:13:45 +00:00
|
|
|
sizeof (pr_type_t) * f->locals);
|
|
|
|
pr->localstack_used += f->locals;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2022-01-23 15:13:44 +00:00
|
|
|
if (pr_deadbeef_locals->int_val) {
|
2022-01-27 01:55:06 +00:00
|
|
|
for (pr_uint_t i = f->params_start;
|
|
|
|
i < f->params_start + f->locals; i++) {
|
2022-01-18 04:21:06 +00:00
|
|
|
pr->pr_globals[i].int_var = 0xdeadbeef;
|
2022-01-23 15:13:44 +00:00
|
|
|
}
|
|
|
|
}
|
2001-08-13 20:29:33 +00:00
|
|
|
|
2001-09-10 12:56:23 +00:00
|
|
|
// copy parameters
|
2022-01-27 01:55:06 +00:00
|
|
|
if (f->numparams >= 0) {
|
|
|
|
for (i = 0; i < f->numparams; i++) {
|
|
|
|
copy_param (dstParams[i], pr->pr_params[i], f->param_size[i].size);
|
2002-05-22 20:43:29 +00:00
|
|
|
}
|
|
|
|
} else {
|
2020-02-16 09:00:29 +00:00
|
|
|
int copy_args;
|
2022-01-27 01:55:06 +00:00
|
|
|
pr_type_t *argc = &pr->pr_globals[f->params_start + 0];
|
|
|
|
pr_type_t *argv = &pr->pr_globals[f->params_start + 1];
|
|
|
|
for (i = 0; i < -f->numparams - 1; i++) {
|
|
|
|
copy_param (dstParams[i], pr->pr_params[i], f->param_size[i].size);
|
2002-05-22 20:43:29 +00:00
|
|
|
}
|
2020-02-16 09:00:29 +00:00
|
|
|
copy_args = pr->pr_argc - i;
|
2022-01-18 04:21:06 +00:00
|
|
|
argc->int_var = copy_args;
|
|
|
|
argv->int_var = dstParams[i] - pr->pr_globals;
|
2022-01-23 05:17:25 +00:00
|
|
|
if (i < PR_MAX_PARAMS) {
|
2020-02-16 09:00:29 +00:00
|
|
|
memcpy (dstParams[i], pr->pr_params[i],
|
|
|
|
(copy_args * pr->pr_param_size) * sizeof (pr_type_t));
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-01-06 18:28:13 +00:00
|
|
|
static void
|
2020-03-24 04:24:55 +00:00
|
|
|
PR_LeaveFunction (progs_t *pr, int to_engine)
|
2001-02-19 21:15:25 +00:00
|
|
|
{
|
2007-04-07 01:41:23 +00:00
|
|
|
bfunction_t *f = pr->pr_xfunction;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2004-01-05 07:10:32 +00:00
|
|
|
PR_PopFrame (pr);
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2020-03-31 15:09:34 +00:00
|
|
|
if (pr->pr_trace && !pr->debug_handler) {
|
2020-03-24 04:24:55 +00:00
|
|
|
Sys_Printf ("Leaving function %s\n",
|
2021-12-31 06:02:31 +00:00
|
|
|
PR_GetString (pr, f->descriptor->name));
|
2020-03-24 04:24:55 +00:00
|
|
|
if (to_engine) {
|
|
|
|
Sys_Printf ("Returning to engine\n");
|
|
|
|
} else {
|
|
|
|
bfunction_t *rf = pr->pr_xfunction;
|
2020-03-24 14:16:25 +00:00
|
|
|
if (rf) {
|
|
|
|
Sys_Printf ("Returning to function %s\n",
|
2021-12-31 06:02:31 +00:00
|
|
|
PR_GetString (pr, rf->descriptor->name));
|
2020-03-24 14:16:25 +00:00
|
|
|
}
|
2020-03-24 04:24:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-27 02:24:00 +00:00
|
|
|
if (pr->progs->version == PROG_VERSION) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-09-10 12:56:23 +00:00
|
|
|
// restore locals from the stack
|
2020-02-16 08:13:45 +00:00
|
|
|
pr->localstack_used -= f->locals;
|
2001-02-19 21:15:25 +00:00
|
|
|
if (pr->localstack_used < 0)
|
2002-05-21 21:14:32 +00:00
|
|
|
PR_RunError (pr, "PR_LeaveFunction: locals stack underflow");
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2022-01-27 01:55:06 +00:00
|
|
|
memcpy (&pr->pr_globals[f->params_start],
|
2020-02-16 08:13:45 +00:00
|
|
|
&pr->localstack[pr->localstack_used],
|
|
|
|
sizeof (pr_type_t) * f->locals);
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
|
|
|
|
2010-01-13 06:36:16 +00:00
|
|
|
VISIBLE void
|
2022-01-18 03:11:14 +00:00
|
|
|
PR_BoundsCheckSize (progs_t *pr, pr_ptr_t addr, unsigned size)
|
2010-01-13 06:19:50 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
if (addr < (pr_ptr_t) (pr->pr_return - pr->pr_globals))
|
2012-12-22 05:24:11 +00:00
|
|
|
PR_RunError (pr, "null pointer access");
|
2012-12-21 12:53:13 +00:00
|
|
|
if (addr >= pr->globals_size
|
2010-01-13 06:19:50 +00:00
|
|
|
|| size > (unsigned) (pr->globals_size - addr))
|
|
|
|
PR_RunError (pr, "invalid memory access: %d (0 to %d-%d)", addr,
|
|
|
|
pr->globals_size, size);
|
2012-11-19 11:03:21 +00:00
|
|
|
if (pr_boundscheck->int_val >= 2
|
|
|
|
&& PR_GetPointer (pr, addr + size) > (pr_type_t *) pr->zone) {
|
|
|
|
void *mem = (void *) PR_GetPointer (pr, addr);
|
|
|
|
Z_CheckPointer (pr->zone, mem, size * sizeof (pr_type_t));
|
|
|
|
}
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
|
|
|
|
2010-01-13 06:36:16 +00:00
|
|
|
VISIBLE void
|
2010-01-13 06:19:50 +00:00
|
|
|
PR_BoundsCheck (progs_t *pr, int addr, etype_t type)
|
|
|
|
{
|
|
|
|
PR_BoundsCheckSize (pr, addr, pr_type_size[type]);
|
|
|
|
}
|
|
|
|
|
2022-01-02 11:46:32 +00:00
|
|
|
#define OPA(type) (*((pr_##type##_t *) (op_a)))
|
|
|
|
#define OPB(type) (*((pr_##type##_t *) (op_b)))
|
|
|
|
#define OPC(type) (*((pr_##type##_t *) (op_c)))
|
2020-02-14 07:38:37 +00:00
|
|
|
|
2001-11-19 17:51:31 +00:00
|
|
|
/*
|
|
|
|
This gets around the problem of needing to test for -0.0 but denormals
|
|
|
|
causing exceptions (or wrong results for what we need) on the alpha.
|
|
|
|
*/
|
2022-01-02 11:46:32 +00:00
|
|
|
#define FNZ(x) ((x) & ~0x80000000u)
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2002-11-13 19:26:44 +00:00
|
|
|
static int
|
|
|
|
signal_hook (int sig, void *data)
|
|
|
|
{
|
|
|
|
progs_t *pr = (progs_t *) data;
|
2012-05-21 23:23:22 +00:00
|
|
|
|
2002-11-13 19:26:44 +00:00
|
|
|
if (sig == SIGFPE && pr_faultchecks->int_val) {
|
|
|
|
dstatement_t *st;
|
|
|
|
pr_type_t *op_a, *op_b, *op_c;
|
|
|
|
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
op_a = pr->pr_globals + st->a;
|
|
|
|
op_b = pr->pr_globals + st->b;
|
|
|
|
op_c = pr->pr_globals + st->c;
|
|
|
|
|
|
|
|
switch (st->op) {
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DIV_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if ((OPA(int) & 0x80000000)
|
|
|
|
^ (OPB(int) & 0x80000000))
|
|
|
|
OPC(int) = 0xff7fffff;
|
2002-11-13 19:26:44 +00:00
|
|
|
else
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = 0x7f7fffff;
|
2002-11-13 19:26:44 +00:00
|
|
|
return 1;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DIV_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int) & 0x80000000)
|
|
|
|
OPC(int) = -0x80000000;
|
2002-11-13 19:26:44 +00:00
|
|
|
else
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = 0x7fffffff;
|
2002-11-13 19:26:44 +00:00
|
|
|
return 1;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOD_I_v6p:
|
|
|
|
case OP_MOD_F_v6p:
|
|
|
|
case OP_REM_I_v6p:
|
|
|
|
case OP_REM_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = 0x00000000;
|
2004-02-20 00:25:08 +00:00
|
|
|
return 1;
|
2002-11-13 19:26:44 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PR_DumpState (pr);
|
2011-03-20 04:34:28 +00:00
|
|
|
fflush (stdout);
|
2002-11-13 19:26:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-03 14:04:00 +00:00
|
|
|
static void
|
|
|
|
error_handler (void *data)
|
|
|
|
{
|
|
|
|
progs_t *pr = (progs_t *) data;
|
|
|
|
PR_DumpState (pr);
|
|
|
|
fflush (stdout);
|
|
|
|
}
|
|
|
|
|
2007-03-10 12:00:59 +00:00
|
|
|
VISIBLE int
|
2022-01-18 06:32:43 +00:00
|
|
|
PR_CallFunction (progs_t *pr, pr_func_t fnum, pr_type_t *return_ptr)
|
2004-11-07 03:00:00 +00:00
|
|
|
{
|
2007-04-07 01:41:23 +00:00
|
|
|
bfunction_t *f;
|
2004-11-07 03:00:00 +00:00
|
|
|
|
|
|
|
if (!fnum)
|
|
|
|
PR_RunError (pr, "NULL function");
|
2022-01-21 01:09:02 +00:00
|
|
|
if (!return_ptr || return_ptr == pr->pr_globals) {
|
|
|
|
return_ptr = pr->pr_return_buffer;
|
|
|
|
}
|
2007-04-07 01:41:23 +00:00
|
|
|
f = pr->function_table + fnum;
|
2004-11-07 03:00:00 +00:00
|
|
|
if (f->first_statement < 0) {
|
|
|
|
// negative statements are built in functions
|
2022-01-24 07:46:49 +00:00
|
|
|
if (pr->progs->version == PROG_VERSION) {
|
|
|
|
PR_SetupParams (pr, 0, 0);
|
|
|
|
}
|
2020-03-31 15:09:34 +00:00
|
|
|
if (pr->pr_trace && !pr->debug_handler) {
|
2016-01-03 07:13:59 +00:00
|
|
|
Sys_Printf ("Calling builtin %s @ %p\n",
|
2021-12-31 06:02:31 +00:00
|
|
|
PR_GetString (pr, f->descriptor->name), f->func);
|
2016-01-03 07:13:59 +00:00
|
|
|
}
|
2022-01-17 10:12:28 +00:00
|
|
|
pr_type_t *saved_return = pr->pr_return;
|
|
|
|
pr->pr_return = return_ptr;
|
2007-04-07 01:41:23 +00:00
|
|
|
f->func (pr);
|
2022-01-17 10:12:28 +00:00
|
|
|
pr->pr_return = saved_return;
|
2004-11-07 03:00:00 +00:00
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
PR_EnterFunction (pr, f);
|
2022-01-17 10:12:28 +00:00
|
|
|
pr->pr_return = return_ptr;
|
2004-11-07 03:00:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-11 04:24:03 +00:00
|
|
|
static void
|
2022-01-18 03:11:14 +00:00
|
|
|
check_stack_pointer (progs_t *pr, pr_ptr_t stack, int size)
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-24 03:50:15 +00:00
|
|
|
if (stack & 3) {
|
|
|
|
PR_RunError (pr, "Progs stack not aligned");
|
|
|
|
}
|
2018-10-11 04:24:03 +00:00
|
|
|
if (stack < pr->stack_bottom) {
|
|
|
|
PR_RunError (pr, "Progs stack overflow");
|
|
|
|
}
|
|
|
|
if (stack > pr->globals_size - size) {
|
|
|
|
PR_RunError (pr, "Progs stack underflow");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-24 03:50:15 +00:00
|
|
|
VISIBLE pr_type_t *
|
|
|
|
PR_SetupParams (progs_t *pr, int num_params, int min_alignment)
|
|
|
|
{
|
|
|
|
if (pr->progs->version < PROG_VERSION) {
|
|
|
|
if (num_params > PR_MAX_PARAMS) {
|
|
|
|
PR_Error (pr, "attempt to settup more than %d params",
|
|
|
|
PR_MAX_PARAMS);
|
|
|
|
}
|
|
|
|
pr->pr_params[0] = pr->pr_real_params[0];
|
|
|
|
pr->pr_params[1] = pr->pr_real_params[1];
|
|
|
|
return pr->pr_real_params[0];
|
|
|
|
}
|
|
|
|
int offset = num_params * 4;
|
|
|
|
if (min_alignment < 4) {
|
|
|
|
min_alignment = 4;
|
|
|
|
}
|
|
|
|
pr_ptr_t mask = ~(min_alignment - 1);
|
|
|
|
pr_ptr_t stack = (*pr->globals.stack - offset) & mask;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 0);
|
|
|
|
}
|
|
|
|
*pr->globals.stack = stack;
|
|
|
|
pr->pr_params[0] = pr->pr_globals + stack;
|
2022-01-24 09:35:16 +00:00
|
|
|
num_params = max (num_params, PR_MAX_PARAMS);
|
2022-01-24 03:50:15 +00:00
|
|
|
for (int i = 1; i < num_params; i++) {
|
|
|
|
pr->pr_params[i] = pr->pr_params[0] + i * 4;
|
|
|
|
}
|
|
|
|
return pr->pr_params[0];
|
|
|
|
}
|
|
|
|
|
2020-03-11 13:48:55 +00:00
|
|
|
static inline void
|
2022-01-18 06:50:32 +00:00
|
|
|
pr_memset (pr_type_t *dst, int val, pr_uint_t count)
|
2020-03-11 13:48:55 +00:00
|
|
|
{
|
|
|
|
while (count-- > 0) {
|
2022-01-18 04:21:06 +00:00
|
|
|
(*dst++).int_var = val;
|
2020-03-11 13:48:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-02 11:16:45 +00:00
|
|
|
static void
|
|
|
|
pr_exec_quakec (progs_t *pr, int exitdepth)
|
2001-02-19 21:15:25 +00:00
|
|
|
{
|
2022-01-02 11:16:45 +00:00
|
|
|
int profile, startprofile;
|
2013-01-17 05:11:54 +00:00
|
|
|
int fldofs;
|
2007-04-06 00:47:41 +00:00
|
|
|
pr_uint_t pointer;
|
|
|
|
dstatement_t *st;
|
|
|
|
pr_type_t *ptr;
|
2020-03-24 10:45:39 +00:00
|
|
|
pr_type_t old_val = {0};
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2001-09-10 12:56:23 +00:00
|
|
|
// make a stack frame
|
2001-02-19 21:15:25 +00:00
|
|
|
startprofile = profile = 0;
|
|
|
|
|
2004-11-07 03:00:00 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
|
2007-05-08 02:04:47 +00:00
|
|
|
if (pr->watch) {
|
2020-03-24 14:16:25 +00:00
|
|
|
old_val = *pr->watch;
|
2007-05-08 02:04:47 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
while (1) {
|
2002-06-07 19:41:13 +00:00
|
|
|
pr_type_t *op_a, *op_b, *op_c;
|
|
|
|
|
2001-02-19 21:15:25 +00:00
|
|
|
st++;
|
2002-10-22 15:07:54 +00:00
|
|
|
++pr->pr_xstatement;
|
|
|
|
if (pr->pr_xstatement != st - pr->pr_statements)
|
|
|
|
PR_RunError (pr, "internal error");
|
2001-12-14 08:15:04 +00:00
|
|
|
if (++profile > 1000000 && !pr->no_exec_limit) {
|
2001-02-19 21:15:25 +00:00
|
|
|
PR_RunError (pr, "runaway loop error");
|
|
|
|
}
|
|
|
|
|
2002-06-07 19:41:13 +00:00
|
|
|
op_a = pr->pr_globals + st->a;
|
|
|
|
op_b = pr->pr_globals + st->b;
|
|
|
|
op_c = pr->pr_globals + st->c;
|
|
|
|
|
2020-03-24 06:35:42 +00:00
|
|
|
if (pr->pr_trace) {
|
|
|
|
if (pr->debug_handler) {
|
2020-03-26 02:44:02 +00:00
|
|
|
pr->debug_handler (prd_trace, 0, pr->debug_data);
|
2020-03-24 06:35:42 +00:00
|
|
|
} else {
|
|
|
|
PR_PrintStatement (pr, st, 1);
|
|
|
|
}
|
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2020-02-26 04:40:26 +00:00
|
|
|
if (st->op & OP_BREAK) {
|
2020-03-24 06:35:42 +00:00
|
|
|
if (pr->debug_handler) {
|
2020-03-26 02:44:02 +00:00
|
|
|
pr->debug_handler (prd_breakpoint, 0, pr->debug_data);
|
2020-02-26 04:40:26 +00:00
|
|
|
} else {
|
|
|
|
PR_RunError (pr, "breakpoint hit");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
pr_opcode_v6p_e op = st->op & ~OP_BREAK;
|
2020-02-26 04:40:26 +00:00
|
|
|
switch (op) {
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(double) + OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) + OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_V_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorAdd (&OPA(float), &OPB(float), &OPC(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatAdd (&OPA(float), &OPB(float), &OPC(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_S_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(string) = PR_CatStrings (pr,
|
2004-11-02 04:59:00 +00:00
|
|
|
PR_GetString (pr,
|
2022-01-02 11:46:32 +00:00
|
|
|
OPA(string)),
|
2004-11-02 04:59:00 +00:00
|
|
|
PR_GetString (pr,
|
2022-01-02 11:46:32 +00:00
|
|
|
OPB(string)));
|
2001-06-03 17:36:49 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SUB_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(double) - OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SUB_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) - OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SUB_V_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorSubtract (&OPA(float), &OPB(float),
|
|
|
|
&OPC(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SUB_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatSubtract (&OPA(float), &OPB(float), &OPC(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(double) * OPB(double);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) * OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_V_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = DotProduct (&OPA(float), &OPB(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_DV_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x.x * x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
double scale = OPA(double);
|
|
|
|
VectorScale (&OPB(float), scale, &OPC(float));
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_VD_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x * x.x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
double scale = OPB(double);
|
|
|
|
VectorScale (&OPA(float), scale, &OPC(float));
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_FV_v6p:
|
2013-01-17 01:23:02 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x.x * x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
float scale = OPA(float);
|
|
|
|
VectorScale (&OPB(float), scale, &OPC(float));
|
2013-01-17 01:23:02 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_VF_v6p:
|
2013-01-17 01:23:02 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x * x.x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
float scale = OPB(float);
|
|
|
|
VectorScale (&OPA(float), scale, &OPC(float));
|
2013-01-17 01:23:02 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatMult (&OPA(float), &OPB(float), &OPC(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_QV_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatMultVec (&OPA(float), &OPB(float), &OPC(float));
|
2012-04-26 01:29:21 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_DQ_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x.s * x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
double scale = OPA(double);
|
|
|
|
QuatScale (&OPB(float), scale, &OPC(float));
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_QD_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x * x.s;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
double scale = OPB(double);
|
|
|
|
QuatScale (&OPA(float), scale, &OPC(float));
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
2012-04-26 01:29:21 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_FQ_v6p:
|
2013-01-17 01:23:02 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x.s * x;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
float scale = OPA(float);
|
|
|
|
QuatScale (&OPB(float), scale, &OPC(float));
|
2013-01-17 01:23:02 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_QF_v6p:
|
2013-01-17 01:23:02 +00:00
|
|
|
{
|
|
|
|
// avoid issues with the likes of x = x * x.s;
|
|
|
|
// makes for faster code, too
|
2022-01-02 11:46:32 +00:00
|
|
|
float scale = OPB(float);
|
|
|
|
QuatScale (&OPA(float), scale, &OPC(float));
|
2013-01-17 01:23:02 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONJ_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatConj (&OPA(float), &OPC(float));
|
2004-04-08 04:57:17 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DIV_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(double) / OPB(double);
|
2004-04-08 04:57:17 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DIV_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) / OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITAND_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = (int) OPA(float) & (int) OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITOR_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = (int) OPA(float) | (int) OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITXOR_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = (int) OPA(float) ^ (int) OPB(float);
|
2001-08-09 16:34:46 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITNOT_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = ~ (int) OPA(float);
|
2001-08-09 16:34:46 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SHL_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = (int) OPA(float) << (int) OPB(float);
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SHR_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = (int) OPA(float) >> (int) OPB(float);
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SHL_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) << OPB(int);
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SHR_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) >> OPB(int);
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SHR_U_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(uint) = OPA(uint) >> OPB(int);
|
2003-08-01 21:20:04 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GE_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) >= OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) <= OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GT_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) > OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LT_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(float) < OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_AND_v6p: // OPA and OPB have to be float for -0.0
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = FNZ (OPA(uint)) && FNZ (OPB(uint));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_OR_v6p: // OPA and OPB have to be float for -0.0
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = FNZ (OPA(uint)) || FNZ (OPB(uint));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = !FNZ (OPA(uint));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_V_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = VectorIsZero (&OPA(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = QuatIsZero (&OPA(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_S_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = !OPA(string) || !*PR_GetString (pr, OPA(string));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_FN_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = !OPA(func);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_ENT_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = !OPA(entity);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(float) == OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = VectorCompare (&OPA(float), &OPB(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = QuatCompare (&OPA(float), &OPB(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_E_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = OPA(field) == OPB(field);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_FN_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = OPA(func) == OPB(func);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_F_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(float) != OPB(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = !VectorCompare (&OPA(float), &OPB(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = !QuatCompare (&OPA(float), &OPB(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_S_v6p:
|
|
|
|
case OP_GE_S_v6p:
|
|
|
|
case OP_LT_S_v6p:
|
|
|
|
case OP_GT_S_v6p:
|
|
|
|
case OP_NE_S_v6p:
|
|
|
|
case OP_EQ_S_v6p:
|
2001-06-04 03:36:35 +00:00
|
|
|
{
|
2022-01-02 11:46:32 +00:00
|
|
|
int cmp = strcmp (PR_GetString (pr, OPA(string)),
|
|
|
|
PR_GetString (pr, OPB(string)));
|
2001-06-04 03:36:35 +00:00
|
|
|
switch (st->op) {
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_S_v6p: cmp = (cmp <= 0); break;
|
|
|
|
case OP_GE_S_v6p: cmp = (cmp >= 0); break;
|
|
|
|
case OP_LT_S_v6p: cmp = (cmp < 0); break;
|
|
|
|
case OP_GT_S_v6p: cmp = (cmp > 0); break;
|
|
|
|
case OP_NE_S_v6p: break;
|
|
|
|
case OP_EQ_S_v6p: cmp = !cmp; break;
|
2002-06-09 16:31:08 +00:00
|
|
|
default: break;
|
2001-06-04 03:36:35 +00:00
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = cmp;
|
2001-06-04 03:36:35 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_E_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = OPA(entity) != OPB(entity);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_FN_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
OPC(int) = OPA(func) != OPB(func);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
|
|
|
|
2001-02-27 08:21:40 +00:00
|
|
|
// ==================
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STORE_F_v6p:
|
|
|
|
case OP_STORE_ENT_v6p:
|
|
|
|
case OP_STORE_FLD_v6p: // integers
|
|
|
|
case OP_STORE_S_v6p:
|
|
|
|
case OP_STORE_FN_v6p: // pointers
|
|
|
|
case OP_STORE_I_v6p:
|
|
|
|
case OP_STORE_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPB(int) = OPA(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STORE_V_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&OPA(float), &OPB(float));
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STORE_Q_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&OPA(float), &OPB(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STORE_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPB(double) = OPA(double);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-02-19 21:15:25 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREP_F_v6p:
|
|
|
|
case OP_STOREP_ENT_v6p:
|
|
|
|
case OP_STOREP_FLD_v6p: // integers
|
|
|
|
case OP_STOREP_S_v6p:
|
|
|
|
case OP_STOREP_FN_v6p: // pointers
|
|
|
|
case OP_STOREP_I_v6p:
|
|
|
|
case OP_STOREP_P_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
ptr->int_var = OPA(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREP_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_vector);
|
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&OPA(float), &ptr->vector_var);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREP_Q_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&OPA(float), &ptr->quat_var);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREP_D_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr);
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_double);
|
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
*(double *) ptr = OPA(double);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-11-02 22:41:11 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADDRESS_v6p:
|
2002-11-08 02:43:04 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) >= pr->pr_edict_area_size)
|
2002-11-08 02:43:04 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to address an out "
|
|
|
|
"of bounds edict");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) == 0 && pr->null_bad)
|
2002-11-08 02:43:04 +00:00
|
|
|
PR_RunError (pr, "assignment to world entity");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPB(field) >= pr->progs->entityfields)
|
2002-11-08 02:43:04 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to address an "
|
|
|
|
"invalid field in an edict");
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
fldofs = OPA(entity) + OPB(field);
|
|
|
|
OPC(ptr) = &pr->pr_edict_area[fldofs] - pr->pr_globals;
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADDRESS_VOID_v6p:
|
|
|
|
case OP_ADDRESS_F_v6p:
|
|
|
|
case OP_ADDRESS_V_v6p:
|
|
|
|
case OP_ADDRESS_Q_v6p:
|
|
|
|
case OP_ADDRESS_S_v6p:
|
|
|
|
case OP_ADDRESS_ENT_v6p:
|
|
|
|
case OP_ADDRESS_FLD_v6p:
|
|
|
|
case OP_ADDRESS_FN_v6p:
|
|
|
|
case OP_ADDRESS_I_v6p:
|
|
|
|
case OP_ADDRESS_P_v6p:
|
|
|
|
case OP_ADDRESS_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = st->a;
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOAD_F_v6p:
|
|
|
|
case OP_LOAD_FLD_v6p:
|
|
|
|
case OP_LOAD_ENT_v6p:
|
|
|
|
case OP_LOAD_S_v6p:
|
|
|
|
case OP_LOAD_FN_v6p:
|
|
|
|
case OP_LOAD_I_v6p:
|
|
|
|
case OP_LOAD_P_v6p:
|
2002-11-08 02:43:04 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) >= pr->pr_edict_area_size)
|
2002-11-08 02:43:04 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an out of "
|
|
|
|
"bounds edict number");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPB(field) >= pr->progs->entityfields)
|
2002-11-08 02:43:04 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an invalid "
|
|
|
|
"field in an edict");
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
fldofs = OPA(entity) + OPB(field);
|
2022-01-18 04:21:06 +00:00
|
|
|
OPC(int) = pr->pr_edict_area[fldofs].int_var;
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOAD_V_v6p:
|
2002-11-08 02:43:04 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) >= pr->pr_edict_area_size)
|
2004-04-08 00:56:30 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an out of "
|
|
|
|
"bounds edict number");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPB(field) + 2 >= pr->progs->entityfields)
|
2004-04-08 00:56:30 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an invalid "
|
|
|
|
"field in an edict");
|
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
fldofs = OPA(entity) + OPB(field);
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (op_c, &pr->pr_edict_area[fldofs], 3 * sizeof (*op_c));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOAD_Q_v6p:
|
2004-04-08 00:56:30 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) >= pr->pr_edict_area_size)
|
2004-04-08 00:56:30 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an out of "
|
|
|
|
"bounds edict number");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPB(field) + 3 >= pr->progs->entityfields)
|
2004-04-08 00:56:30 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an invalid "
|
|
|
|
"field in an edict");
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
fldofs = OPA(entity) + OPB(field);
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (op_c, &pr->pr_edict_area[fldofs], 4 * sizeof (*op_c));
|
2001-02-27 08:21:40 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOAD_D_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(entity) >= pr->pr_edict_area_size)
|
2020-02-14 07:38:37 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an out of "
|
|
|
|
"bounds edict number");
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPB(field) + 1 >= pr->progs->entityfields)
|
2020-02-14 07:38:37 +00:00
|
|
|
PR_RunError (pr, "Progs attempted to read an invalid "
|
|
|
|
"field in an edict");
|
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
fldofs = OPA(entity) + OPB(field);
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (op_c, &pr->pr_edict_area[fldofs], sizeof (double));
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2001-11-02 22:41:11 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADB_F_v6p:
|
|
|
|
case OP_LOADB_S_v6p:
|
|
|
|
case OP_LOADB_ENT_v6p:
|
|
|
|
case OP_LOADB_FLD_v6p:
|
|
|
|
case OP_LOADB_FN_v6p:
|
|
|
|
case OP_LOADB_I_v6p:
|
|
|
|
case OP_LOADB_P_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(entity) + OPB(field);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2001-11-02 22:41:11 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
OPC(int) = ptr->int_var;
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADB_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(entity) + OPB(field);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_vector);
|
|
|
|
}
|
2001-11-02 22:41:11 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&ptr->vector_var, &OPC(float));
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADB_Q_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(entity) + OPB(field);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&ptr->quat_var, &OPC(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADB_D_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(entity) + OPB(field);
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_double);
|
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = *(double *) ptr;
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-11-02 22:41:11 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADBI_F_v6p:
|
|
|
|
case OP_LOADBI_S_v6p:
|
|
|
|
case OP_LOADBI_ENT_v6p:
|
|
|
|
case OP_LOADBI_FLD_v6p:
|
|
|
|
case OP_LOADBI_FN_v6p:
|
|
|
|
case OP_LOADBI_I_v6p:
|
|
|
|
case OP_LOADBI_P_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + (short) st->b;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2001-12-07 20:07:38 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
OPC(int) = ptr->int_var;
|
2001-12-07 20:07:38 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADBI_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + (short) st->b;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_vector);
|
|
|
|
}
|
2001-12-07 20:07:38 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&ptr->vector_var, &OPC(float));
|
2001-12-07 20:07:38 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADBI_Q_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + (short) st->b;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&ptr->quat_var, &OPC(float));
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LOADBI_D_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + (short) st->b;
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = *(double *) ptr;
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-12-07 20:07:38 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LEA_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
|
|
|
OPC(ptr) = pointer;
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LEAI_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + (short) st->b;
|
|
|
|
OPC(ptr) = pointer;
|
2001-12-08 08:19:48 +00:00
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREB_F_v6p:
|
|
|
|
case OP_STOREB_S_v6p:
|
|
|
|
case OP_STOREB_ENT_v6p:
|
|
|
|
case OP_STOREB_FLD_v6p:
|
|
|
|
case OP_STOREB_FN_v6p:
|
|
|
|
case OP_STOREB_I_v6p:
|
|
|
|
case OP_STOREB_P_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + OPC(int);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2001-11-02 22:41:11 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
ptr->int_var = OPA(int);
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREB_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + OPC(int);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_vector);
|
|
|
|
}
|
2001-11-02 22:41:11 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&OPA(float), &ptr->vector_var);
|
2001-11-02 22:41:11 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREB_Q_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + OPC(int);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&OPA(float), &ptr->quat_var);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREB_D_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + OPC(int);
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
*(double *) ptr = OPA(double);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-11-02 22:41:11 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREBI_F_v6p:
|
|
|
|
case OP_STOREBI_S_v6p:
|
|
|
|
case OP_STOREBI_ENT_v6p:
|
|
|
|
case OP_STOREBI_FLD_v6p:
|
|
|
|
case OP_STOREBI_FN_v6p:
|
|
|
|
case OP_STOREBI_I_v6p:
|
|
|
|
case OP_STOREBI_P_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + (short) st->c;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2001-12-07 20:07:38 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
ptr->int_var = OPA(int);
|
2001-12-07 20:07:38 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREBI_V_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + (short) st->c;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
PR_BoundsCheck (pr, pointer, ev_vector);
|
|
|
|
}
|
2001-12-07 20:07:38 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
VectorCopy (&OPA(float), &ptr->vector_var);
|
2001-12-07 20:07:38 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREBI_Q_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + (short) st->c;
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
QuatCopy (&OPA(float), &ptr->quat_var);
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STOREBI_D_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPB(ptr) + (short) st->c;
|
2020-02-14 07:38:37 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-02 11:46:32 +00:00
|
|
|
*(double *) ptr = OPA(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2001-12-07 20:07:38 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSH_F_v6p:
|
|
|
|
case OP_PUSH_FLD_v6p:
|
|
|
|
case OP_PUSH_ENT_v6p:
|
|
|
|
case OP_PUSH_S_v6p:
|
|
|
|
case OP_PUSH_FN_v6p:
|
|
|
|
case OP_PUSH_I_v6p:
|
|
|
|
case OP_PUSH_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 1;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
stk->int_var = OPA(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSH_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 3;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (stk, op_a, 3 * sizeof (*op_c));
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSH_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 4;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (stk, op_a, 4 * sizeof (*op_c));
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHB_F_v6p:
|
|
|
|
case OP_PUSHB_S_v6p:
|
|
|
|
case OP_PUSHB_ENT_v6p:
|
|
|
|
case OP_PUSHB_FLD_v6p:
|
|
|
|
case OP_PUSHB_FN_v6p:
|
|
|
|
case OP_PUSHB_I_v6p:
|
|
|
|
case OP_PUSHB_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 1;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2022-01-18 04:21:06 +00:00
|
|
|
stk->int_var = ptr->int_var;
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHB_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 3;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2020-12-20 17:12:51 +00:00
|
|
|
VectorCopy (&ptr->vector_var, &stk->vector_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHB_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 4;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2020-12-20 17:12:51 +00:00
|
|
|
QuatCopy (&ptr->quat_var, &stk->quat_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHBI_F_v6p:
|
|
|
|
case OP_PUSHBI_S_v6p:
|
|
|
|
case OP_PUSHBI_ENT_v6p:
|
|
|
|
case OP_PUSHBI_FLD_v6p:
|
|
|
|
case OP_PUSHBI_FN_v6p:
|
|
|
|
case OP_PUSHBI_I_v6p:
|
|
|
|
case OP_PUSHBI_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 1;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2022-01-18 04:21:06 +00:00
|
|
|
stk->int_var = ptr->int_var;
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHBI_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 3;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2020-12-20 17:12:51 +00:00
|
|
|
VectorCopy (&ptr->vector_var, &stk->vector_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_PUSHBI_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 4;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2020-12-20 17:12:51 +00:00
|
|
|
QuatCopy (&ptr->quat_var, &stk->quat_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POP_F_v6p:
|
|
|
|
case OP_POP_FLD_v6p:
|
|
|
|
case OP_POP_ENT_v6p:
|
|
|
|
case OP_POP_S_v6p:
|
|
|
|
case OP_POP_FN_v6p:
|
|
|
|
case OP_POP_I_v6p:
|
|
|
|
case OP_POP_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
OPA(int) = stk->int_var;
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 1;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POP_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (op_a, stk, 3 * sizeof (*op_c));
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 3;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POP_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
memcpy (op_a, stk, 4 * sizeof (*op_c));
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 4;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPB_F_v6p:
|
|
|
|
case OP_POPB_S_v6p:
|
|
|
|
case OP_POPB_ENT_v6p:
|
|
|
|
case OP_POPB_FLD_v6p:
|
|
|
|
case OP_POPB_FN_v6p:
|
|
|
|
case OP_POPB_I_v6p:
|
|
|
|
case OP_POPB_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2022-01-18 04:21:06 +00:00
|
|
|
ptr->int_var = stk->int_var;
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 1;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPB_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 13:31:22 +00:00
|
|
|
VectorCopy (&stk->vector_var, &ptr->vector_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 3;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPB_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + OPB(int);
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 13:31:22 +00:00
|
|
|
QuatCopy (&stk->quat_var, &ptr->quat_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 4;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPBI_F_v6p:
|
|
|
|
case OP_POPBI_S_v6p:
|
|
|
|
case OP_POPBI_ENT_v6p:
|
|
|
|
case OP_POPBI_FLD_v6p:
|
|
|
|
case OP_POPBI_FN_v6p:
|
|
|
|
case OP_POPBI_I_v6p:
|
|
|
|
case OP_POPBI_P_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 1);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2022-01-18 04:21:06 +00:00
|
|
|
ptr->int_var = stk->int_var;
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 1;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPBI_V_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 3);
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 13:31:22 +00:00
|
|
|
VectorCopy (&stk->vector_var, &ptr->vector_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 3;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_POPBI_Q_v6p:
|
2018-10-11 04:24:03 +00:00
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2018-10-11 04:24:03 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
|
2022-01-18 06:50:32 +00:00
|
|
|
pointer = OPA(ptr) + st->b;
|
2018-10-11 04:24:03 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
|
|
|
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
2022-01-18 06:48:43 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_quaternion);
|
2018-10-11 04:24:03 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 13:31:22 +00:00
|
|
|
QuatCopy (&stk->quat_var, &ptr->quat_var);
|
2018-10-11 04:24:03 +00:00
|
|
|
*pr->globals.stack = stack + 4;
|
|
|
|
}
|
2004-04-08 00:56:30 +00:00
|
|
|
break;
|
2001-12-07 20:07:38 +00:00
|
|
|
|
2001-02-27 08:21:40 +00:00
|
|
|
// ==================
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IFNOT_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (!OPA(int)) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IF_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int)) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IFBE_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int) <= 0) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IFB_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int) < 0) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IFAE_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int) >= 0) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_IFA_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
if (OPA(int) > 0) {
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->b - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GOTO_v6p:
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement += (short)st->a - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_JUMP_v6p:
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
if (pr_boundscheck->int_val
|
2022-01-26 10:30:25 +00:00
|
|
|
&& (OPA(uint) >= pr->progs->statements.count)) {
|
2002-05-18 00:49:16 +00:00
|
|
|
PR_RunError (pr, "Invalid jump destination");
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
}
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_xstatement = OPA(uint) - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_JUMPB_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
pointer = st->a + OPB(int);
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 04:21:06 +00:00
|
|
|
PR_BoundsCheck (pr, pointer, ev_int);
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2001-11-13 08:58:54 +00:00
|
|
|
ptr = pr->pr_globals + pointer;
|
2022-01-18 04:21:06 +00:00
|
|
|
pointer = ptr->int_var;
|
2001-11-13 08:58:54 +00:00
|
|
|
if (pr_boundscheck->int_val
|
2022-01-26 10:30:25 +00:00
|
|
|
&& (pointer >= pr->progs->statements.count)) {
|
2002-05-18 00:49:16 +00:00
|
|
|
PR_RunError (pr, "Invalid jump destination");
|
2001-11-13 08:58:54 +00:00
|
|
|
}
|
2003-08-24 07:23:12 +00:00
|
|
|
pr->pr_xstatement = pointer - 1; // offset the st++
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
2001-11-13 08:58:54 +00:00
|
|
|
break;
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_RCALL2_v6p:
|
|
|
|
case OP_RCALL3_v6p:
|
|
|
|
case OP_RCALL4_v6p:
|
|
|
|
case OP_RCALL5_v6p:
|
|
|
|
case OP_RCALL6_v6p:
|
|
|
|
case OP_RCALL7_v6p:
|
|
|
|
case OP_RCALL8_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_params[1] = op_c;
|
2005-06-12 09:54:01 +00:00
|
|
|
goto op_rcall;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_RCALL1_v6p:
|
2005-06-12 09:54:01 +00:00
|
|
|
pr->pr_params[1] = pr->pr_real_params[1];
|
|
|
|
op_rcall:
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_params[0] = op_b;
|
2022-01-02 12:06:14 +00:00
|
|
|
pr->pr_argc = st->op - OP_RCALL1_v6p + 1;
|
2005-06-12 09:54:01 +00:00
|
|
|
goto op_call;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CALL0_v6p:
|
|
|
|
case OP_CALL1_v6p:
|
|
|
|
case OP_CALL2_v6p:
|
|
|
|
case OP_CALL3_v6p:
|
|
|
|
case OP_CALL4_v6p:
|
|
|
|
case OP_CALL5_v6p:
|
|
|
|
case OP_CALL6_v6p:
|
|
|
|
case OP_CALL7_v6p:
|
|
|
|
case OP_CALL8_v6p:
|
2005-06-12 09:54:01 +00:00
|
|
|
PR_RESET_PARAMS (pr);
|
2022-01-02 12:06:14 +00:00
|
|
|
pr->pr_argc = st->op - OP_CALL0_v6p;
|
2005-06-12 09:54:01 +00:00
|
|
|
op_call:
|
2001-02-19 21:15:25 +00:00
|
|
|
pr->pr_xfunction->profile += profile - startprofile;
|
|
|
|
startprofile = profile;
|
2022-01-18 06:50:32 +00:00
|
|
|
PR_CallFunction (pr, OPA(func), pr->pr_return);
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DONE_v6p:
|
|
|
|
case OP_RETURN_v6p:
|
2003-08-13 17:27:34 +00:00
|
|
|
if (!st->a)
|
|
|
|
memset (&R_INT (pr), 0,
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_param_size * sizeof (*op_a));
|
|
|
|
else if (&R_INT (pr) != &OPA(int))
|
|
|
|
memcpy (&R_INT (pr), op_a,
|
|
|
|
pr->pr_param_size * sizeof (*op_a));
|
2011-01-12 15:29:56 +00:00
|
|
|
// fallthrough
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_RETURN_V_v6p:
|
2003-09-15 21:13:13 +00:00
|
|
|
pr->pr_xfunction->profile += profile - startprofile;
|
|
|
|
startprofile = profile;
|
2020-03-24 04:24:55 +00:00
|
|
|
PR_LeaveFunction (pr, pr->pr_depth == exitdepth);
|
2002-10-22 15:07:54 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
2002-08-20 23:04:57 +00:00
|
|
|
if (pr->pr_depth == exitdepth) {
|
2004-11-07 03:00:00 +00:00
|
|
|
if (pr->pr_trace && pr->pr_depth <= pr->pr_trace_depth)
|
|
|
|
pr->pr_trace = false;
|
2016-01-03 14:04:00 +00:00
|
|
|
// all done
|
|
|
|
goto exit_program;
|
2002-08-20 23:04:57 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STATE_v6p:
|
2013-01-17 05:11:54 +00:00
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
2021-07-15 07:55:02 +00:00
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
2022-01-16 10:32:47 +00:00
|
|
|
float time = *pr->globals.ftime + 0.1;
|
2021-07-15 07:55:02 +00:00
|
|
|
pr->pr_edict_area[nextthink].float_var = time;
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_edict_area[frame].float_var = OPA(float);
|
2022-01-18 06:50:32 +00:00
|
|
|
pr->pr_edict_area[think].func_var = OPB(func);
|
2013-01-17 05:11:54 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_STATE_F_v6p:
|
2013-01-17 05:11:54 +00:00
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
2021-07-15 07:55:02 +00:00
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
2022-01-16 10:32:47 +00:00
|
|
|
float time = *pr->globals.ftime + OPC(float);
|
2021-07-15 07:55:02 +00:00
|
|
|
pr->pr_edict_area[nextthink].float_var = time;
|
2022-01-02 11:46:32 +00:00
|
|
|
pr->pr_edict_area[frame].float_var = OPA(float);
|
2022-01-18 06:50:32 +00:00
|
|
|
pr->pr_edict_area[think].func_var = OPB(func);
|
2013-01-17 05:11:54 +00:00
|
|
|
}
|
2004-02-11 01:43:33 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_ADD_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) + OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_SUB_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) - OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MUL_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) * OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_DIV_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) / OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOD_I_v6p:
|
2020-02-16 02:53:56 +00:00
|
|
|
{
|
|
|
|
// implement true modulo for integers:
|
|
|
|
// 5 mod 3 = 2
|
|
|
|
// -5 mod 3 = 1
|
|
|
|
// 5 mod -3 = -1
|
|
|
|
// -5 mod -3 = -2
|
2022-01-02 11:46:32 +00:00
|
|
|
int a = OPA(int);
|
|
|
|
int b = OPB(int);
|
2020-02-16 02:53:56 +00:00
|
|
|
int c = a % b;
|
|
|
|
// % is really remainder and so has the same sign rules
|
|
|
|
// as division: -5 % 3 = -2, so need to add b (3 here)
|
|
|
|
// if c's sign is incorrect, but only if c is non-zero
|
|
|
|
int mask = (a ^ b) >> 31;
|
2020-02-16 03:08:08 +00:00
|
|
|
mask &= ~(!!c + 0) + 1; // +0 to convert bool to int (gcc)
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = c + (mask & b);
|
2020-02-16 02:53:56 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_REM_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) % OPB(int);
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOD_D_v6p:
|
2020-02-16 02:53:56 +00:00
|
|
|
{
|
2022-01-02 11:46:32 +00:00
|
|
|
double a = OPA(double);
|
|
|
|
double b = OPB(double);
|
2020-02-16 02:53:56 +00:00
|
|
|
// floating point modulo is so much easier :P
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = a - b * floor (a / b);
|
2020-02-16 02:53:56 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_REM_D_v6p:
|
2020-02-14 07:38:37 +00:00
|
|
|
{
|
2022-01-02 11:46:32 +00:00
|
|
|
double a = OPA(double);
|
|
|
|
double b = OPB(double);
|
|
|
|
OPC(double) = a - b * trunc (a / b);
|
2020-02-14 07:38:37 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOD_F_v6p:
|
2020-02-16 02:53:56 +00:00
|
|
|
{
|
2022-01-02 11:46:32 +00:00
|
|
|
float a = OPA(float);
|
|
|
|
float b = OPB(float);
|
|
|
|
OPC(float) = a - b * floorf (a / b);
|
2020-02-16 02:53:56 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_REM_F_v6p:
|
2020-02-14 04:54:26 +00:00
|
|
|
{
|
2022-01-02 11:46:32 +00:00
|
|
|
float a = OPA(float);
|
|
|
|
float b = OPB(float);
|
|
|
|
OPC(float) = a - b * truncf (a / b);
|
2020-02-14 04:54:26 +00:00
|
|
|
}
|
2001-08-10 16:17:00 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_IF_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_FI_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(float);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITAND_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) & OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITOR_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) | OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITXOR_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) ^ OPB(int);
|
2001-08-09 16:34:46 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BITNOT_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = ~OPA(int);
|
2001-08-09 16:34:46 +00:00
|
|
|
break;
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GE_I_v6p:
|
|
|
|
case OP_GE_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) >= OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GE_U_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(uint) >= OPB(uint);
|
2003-08-24 05:53:15 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_I_v6p:
|
|
|
|
case OP_LE_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) <= OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_U_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(uint) <= OPB(uint);
|
2003-08-24 05:53:15 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GT_I_v6p:
|
|
|
|
case OP_GT_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) > OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GT_U_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(uint) > OPB(uint);
|
2003-08-24 05:53:15 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LT_I_v6p:
|
|
|
|
case OP_LT_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) < OPB(int);
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LT_U_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(uint) < OPB(uint);
|
2003-08-24 05:53:15 +00:00
|
|
|
break;
|
pr_comp.h:
o add ev_uniteger to the types enum
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
progs.h:
o add uinteger accessors
pr_exec.c:
o implement ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
pr_opcode.c:
o add opcodes for ifbe, ifb, ifae, ifa, jump, lt.ui, gt.ui, le.ui, ge.ui
expr.h:
o prototype inc_users
qfcc.h:
o add externs for op_ifbe, op_ifb, op_ifae and op_ifa
emit.c:
o don't bother emiting an assignment to a temp def that's only used once
(ie, it's never read, only written to)
o support the new if* instructions
expr.c:
o support the new if* insructions
o dectect expression loops in append_expr
o support unsigned integers
o re-work temp def usage counting
pr_def.c
o debugging for temp def usage counts
pr_opcode.c:
o support the new if* instructions
qc-parse.y:
o provide defines for IFBE IFB IFAE IFA
switch.c:
o do binary searches for strings, floats and ints if there are more than
8 cases in a switch. Strings need more testing.
2001-11-09 00:58:16 +00:00
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_AND_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) && OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_OR_I_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) || OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_I_v6p:
|
|
|
|
case OP_NOT_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = !OPA(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_I_v6p:
|
|
|
|
case OP_EQ_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) == OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_I_v6p:
|
|
|
|
case OP_NE_P_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(int) != OPB(int);
|
2001-02-19 21:15:25 +00:00
|
|
|
break;
|
|
|
|
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOVEI_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
memmove (op_c, op_a, st->b * 4);
|
2002-10-16 06:44:41 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOVEP_v6p:
|
2010-01-13 06:19:50 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
PR_BoundsCheckSize (pr, OPC(ptr), OPB(uint));
|
|
|
|
PR_BoundsCheckSize (pr, OPA(ptr), OPB(uint));
|
2010-01-13 06:19:50 +00:00
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
memmove (pr->pr_globals + OPC(ptr),
|
|
|
|
pr->pr_globals + OPA(ptr),
|
2022-01-02 11:46:32 +00:00
|
|
|
OPB(uint) * 4);
|
2002-10-16 06:44:41 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MOVEPI_v6p:
|
2011-03-09 01:29:24 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
PR_BoundsCheckSize (pr, OPC(ptr), st->b);
|
|
|
|
PR_BoundsCheckSize (pr, OPA(ptr), st->b);
|
2011-03-09 01:29:24 +00:00
|
|
|
}
|
2022-01-18 06:50:32 +00:00
|
|
|
memmove (pr->pr_globals + OPC(ptr),
|
|
|
|
pr->pr_globals + OPA(ptr),
|
2011-03-09 01:29:24 +00:00
|
|
|
st->b * 4);
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MEMSETI_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
pr_memset (op_c, OPA(ptr), st->b);
|
2020-03-13 08:50:57 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MEMSETP_v6p:
|
2020-03-11 13:48:55 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 06:50:32 +00:00
|
|
|
PR_BoundsCheckSize (pr, OPC(ptr), OPB(uint));
|
2020-03-11 13:48:55 +00:00
|
|
|
}
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_memset (pr->pr_globals + OPC(ptr), OPA(int),
|
2022-01-18 06:50:32 +00:00
|
|
|
OPB(uint));
|
2020-03-11 13:48:55 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_MEMSETPI_v6p:
|
2020-03-11 13:48:55 +00:00
|
|
|
if (pr_boundscheck->int_val) {
|
2022-01-18 03:11:14 +00:00
|
|
|
PR_BoundsCheckSize (pr, OPC(ptr), st->b);
|
2020-03-11 13:48:55 +00:00
|
|
|
}
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_memset (pr->pr_globals + OPC(ptr), OPA(int),
|
2020-03-11 13:48:55 +00:00
|
|
|
st->b);
|
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GE_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(double) >= OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LE_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(double) <= OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_GT_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(double) > OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_LT_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(double) < OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NOT_D_v6p:
|
2022-01-18 04:21:06 +00:00
|
|
|
OPC(int) = (op_a[0].int_var
|
|
|
|
|| (op_a[1].int_var & ~0x80000000u));
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_EQ_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(double) == OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_NE_D_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(double) != OPB(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_ID_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(int);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_DI_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(int) = OPA(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_FD_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(double) = OPA(float);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_CONV_DF_v6p:
|
2022-01-02 11:46:32 +00:00
|
|
|
OPC(float) = OPA(double);
|
2020-02-14 07:38:37 +00:00
|
|
|
break;
|
2002-10-16 06:44:41 +00:00
|
|
|
|
2001-07-23 01:31:22 +00:00
|
|
|
// LordHavoc: to be enabled when Progs version 7 (or whatever it will be numbered) is finalized
|
|
|
|
/*
|
2022-01-02 12:06:14 +00:00
|
|
|
case OP_BOUNDCHECK_v6p:
|
2022-01-18 06:50:32 +00:00
|
|
|
if (OPA(ptr) >= st->b) {
|
2001-09-10 12:56:23 +00:00
|
|
|
PR_RunError (pr, "Progs boundcheck failed at line number "
|
2002-05-18 00:49:16 +00:00
|
|
|
"%d, value is < 0 or >= %d", st->b, st->c);
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
*/
|
|
|
|
default:
|
2022-01-03 08:54:54 +00:00
|
|
|
PR_RunError (pr, "Bad opcode %i", st->op & ~OP_BREAK);
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
if (pr->watch && pr->watch->int_var != old_val.int_var) {
|
2020-03-24 04:26:35 +00:00
|
|
|
if (!pr->wp_conditional
|
2022-01-18 04:21:06 +00:00
|
|
|
|| pr->watch->int_var == pr->wp_val.int_var) {
|
2020-03-24 06:35:42 +00:00
|
|
|
if (pr->debug_handler) {
|
2020-03-26 02:44:02 +00:00
|
|
|
pr->debug_handler (prd_watchpoint, 0, pr->debug_data);
|
2020-03-24 06:35:42 +00:00
|
|
|
} else {
|
|
|
|
PR_RunError (pr, "watchpoint hit: %d -> %d",
|
2022-01-18 04:21:06 +00:00
|
|
|
old_val.int_var, pr->watch->int_var);
|
2020-03-24 06:35:42 +00:00
|
|
|
}
|
2020-03-24 04:26:35 +00:00
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
old_val.int_var = pr->watch->int_var;
|
2020-03-24 04:26:35 +00:00
|
|
|
}
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2022-01-02 11:16:45 +00:00
|
|
|
exit_program:
|
|
|
|
}
|
|
|
|
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
#define MM(type) (*((pr_##type##_t *) (mm)))
|
|
|
|
#define STK(type) (*((pr_##type##_t *) (stk)))
|
|
|
|
|
|
|
|
static pr_type_t *
|
2022-01-15 07:27:46 +00:00
|
|
|
pr_address_mode (progs_t *pr, const dstatement_t *st, int mm_ind)
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
{
|
|
|
|
pr_type_t *op_a = pr->pr_globals + st->a + PR_BASE (pr, st, A);
|
2022-01-04 10:01:05 +00:00
|
|
|
pr_type_t *op_b = pr->pr_globals + st->b + PR_BASE (pr, st, B);
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t mm_offs = 0;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
|
|
|
|
switch (mm_ind) {
|
|
|
|
case 0:
|
|
|
|
// regular global access
|
|
|
|
mm_offs = op_a - pr->pr_globals;
|
|
|
|
break;
|
|
|
|
case 1:
|
2022-01-20 05:55:29 +00:00
|
|
|
// entity.field (equivalent to OP_LOAD_t_v6p)
|
|
|
|
pr_ptr_t edict_area = pr->pr_edict_area - pr->pr_globals;
|
|
|
|
mm_offs = edict_area + OPA(entity) + OPB(field);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
2022-01-03 05:41:29 +00:00
|
|
|
// constant indexed pointer: *a + b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
mm_offs = OPA(ptr) + (short) st->b;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case 3:
|
2022-01-03 05:41:29 +00:00
|
|
|
// variable indexed pointer: *a + *b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
mm_offs = OPA(ptr) + OPB(int);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return pr->pr_globals + mm_offs;
|
|
|
|
}
|
|
|
|
|
2022-01-10 02:38:21 +00:00
|
|
|
static pr_type_t *
|
|
|
|
pr_call_mode (progs_t *pr, const dstatement_t *st, int mm_ind)
|
|
|
|
{
|
|
|
|
pr_type_t *op_a = pr->pr_globals + st->a + PR_BASE (pr, st, A);
|
|
|
|
pr_type_t *op_b = pr->pr_globals + st->b + PR_BASE (pr, st, B);
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t mm_offs = 0;
|
2022-01-10 02:38:21 +00:00
|
|
|
|
|
|
|
switch (mm_ind) {
|
|
|
|
case 1:
|
|
|
|
// regular global access
|
|
|
|
mm_offs = op_a - pr->pr_globals;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
// constant indexed pointer: *a + b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
mm_offs = OPA(ptr) + (short) st->b;
|
2022-01-10 02:38:21 +00:00
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
// variable indexed pointer: *a + *b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
mm_offs = OPA(ptr) + OPB(int);
|
2022-01-10 02:38:21 +00:00
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
// entity.field (equivalent to OP_LOAD_t_v6p)
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t edict_area = pr->pr_edict_area - pr->pr_globals;
|
2022-01-18 06:50:32 +00:00
|
|
|
mm_offs = edict_area + OPA(entity) + OPB(field);
|
2022-01-10 02:38:21 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return pr->pr_globals + mm_offs;
|
|
|
|
}
|
|
|
|
|
2022-01-18 03:11:14 +00:00
|
|
|
static pr_ptr_t __attribute__((pure))
|
2022-01-16 05:22:04 +00:00
|
|
|
pr_jump_mode (progs_t *pr, const dstatement_t *st, int jump_ind)
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
{
|
|
|
|
pr_type_t *op_a = pr->pr_globals + st->a + PR_BASE (pr, st, A);
|
2022-01-04 10:01:05 +00:00
|
|
|
pr_type_t *op_b = pr->pr_globals + st->b + PR_BASE (pr, st, B);
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t jump_offs = pr->pr_xstatement;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
|
|
|
|
switch (jump_ind) {
|
|
|
|
case 0:
|
|
|
|
// instruction relative offset
|
2022-01-04 05:30:20 +00:00
|
|
|
jump_offs = jump_offs + (short) st->a;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
// simple pointer dereference: *a
|
2022-01-18 06:50:32 +00:00
|
|
|
jump_offs = OPA(ptr);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
2022-01-03 05:41:29 +00:00
|
|
|
// constant indexed pointer: *a + b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
jump_offs = OPA(ptr) + (short) st->b;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case 3:
|
2022-01-03 05:41:29 +00:00
|
|
|
// variable indexed pointer: *a + *b (supports -ve offset)
|
2022-01-18 06:50:32 +00:00
|
|
|
jump_offs = OPA(ptr) + OPB(int);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-01-26 10:30:25 +00:00
|
|
|
if (pr_boundscheck->int_val && jump_offs >= pr->progs->statements.count) {
|
2022-01-04 08:53:10 +00:00
|
|
|
PR_RunError (pr, "out of bounds: %x", jump_offs);
|
|
|
|
}
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
return jump_offs - 1; // for st++
|
|
|
|
}
|
|
|
|
|
2022-01-15 07:51:59 +00:00
|
|
|
static pr_type_t *
|
|
|
|
pr_stack_push (progs_t *pr)
|
|
|
|
{
|
|
|
|
// keep the stack 16-byte aligned
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack - 4;
|
2022-01-15 07:51:59 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
|
|
|
}
|
|
|
|
*pr->globals.stack = stack;
|
|
|
|
return stk;
|
|
|
|
}
|
|
|
|
|
|
|
|
static pr_type_t *
|
|
|
|
pr_stack_pop (progs_t *pr)
|
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
2022-01-15 07:51:59 +00:00
|
|
|
pr_type_t *stk = pr->pr_globals + stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack, 4);
|
|
|
|
}
|
|
|
|
// keep the stack 16-byte aligned
|
|
|
|
*pr->globals.stack = stack + 4;
|
|
|
|
return stk;
|
|
|
|
}
|
|
|
|
|
2022-01-21 11:33:15 +00:00
|
|
|
static void
|
|
|
|
pr_stack_adjust (progs_t *pr, int mode, int offset)
|
|
|
|
{
|
|
|
|
// keep the stack 16-byte aligned
|
|
|
|
if (mode || (offset & 3)) {
|
|
|
|
PR_RunError (pr, "invalid stack adjustment: %d, %d", mode, offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_ptr_t stack = *pr->globals.stack;
|
|
|
|
if (pr_boundscheck->int_val) {
|
|
|
|
check_stack_pointer (pr, stack + offset, 0);
|
|
|
|
}
|
|
|
|
*pr->globals.stack = stack + offset;
|
|
|
|
}
|
|
|
|
|
2022-01-15 07:51:59 +00:00
|
|
|
static void
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
pr_with (progs_t *pr, const dstatement_t *st)
|
|
|
|
{
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t edict_area = pr->pr_edict_area - pr->pr_globals;
|
2022-01-04 08:55:20 +00:00
|
|
|
pr_type_t *op_b = pr->pr_globals + PR_BASE (pr, st, B) + st->b;
|
2022-01-15 07:51:59 +00:00
|
|
|
pr_type_t *stk;
|
2022-01-15 09:44:11 +00:00
|
|
|
pr_uint_t *base = &pr->pr_bases[st->c & 3];
|
2022-01-04 08:55:20 +00:00
|
|
|
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
switch (st->a) {
|
2022-01-04 08:55:20 +00:00
|
|
|
// fixed offset
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case 0:
|
|
|
|
// hard-0 base
|
2022-01-15 09:44:11 +00:00
|
|
|
*base = st->b;
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case 1:
|
2022-01-15 09:44:11 +00:00
|
|
|
// relative to current base (-ve offset)
|
|
|
|
*base = PR_BASE (pr, st, B) + (pr_short_t) st->b;
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case 2:
|
|
|
|
// relative to stack (-ve offset)
|
2022-01-15 09:44:11 +00:00
|
|
|
*base = *pr->globals.stack + (pr_short_t) st->b;
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case 3:
|
2022-01-15 09:44:11 +00:00
|
|
|
// relative to edict_area (only +ve)
|
|
|
|
*base = edict_area + st->b;
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
2022-01-04 08:55:20 +00:00
|
|
|
|
|
|
|
case 4:
|
|
|
|
// hard-0 base
|
2022-01-15 09:44:11 +00:00
|
|
|
*base = pr->pr_globals[st->b].pointer_var;
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
2022-01-04 08:55:20 +00:00
|
|
|
case 5:
|
2022-01-18 03:11:14 +00:00
|
|
|
*base = OPB(ptr);
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
2022-01-04 08:55:20 +00:00
|
|
|
case 6:
|
|
|
|
// relative to stack (-ve offset)
|
2022-01-15 09:44:11 +00:00
|
|
|
*base = *pr->globals.stack + OPB(int);
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
2022-01-04 08:55:20 +00:00
|
|
|
case 7:
|
2022-01-15 09:44:11 +00:00
|
|
|
// relative to edict_area (only +ve)
|
2022-01-18 06:50:32 +00:00
|
|
|
*base = edict_area + OPB(field);
|
2022-01-15 07:51:59 +00:00
|
|
|
return;
|
2022-01-15 09:44:11 +00:00
|
|
|
|
2022-01-15 07:51:59 +00:00
|
|
|
case 8:
|
2022-01-15 09:44:11 +00:00
|
|
|
// pushregs
|
2022-01-15 07:51:59 +00:00
|
|
|
stk = pr_stack_push (pr);
|
|
|
|
STK(uivec4) = pr->pr_bases;
|
|
|
|
return;
|
|
|
|
case 9:
|
2022-01-15 09:44:11 +00:00
|
|
|
// popregs
|
2022-01-15 07:51:59 +00:00
|
|
|
stk = pr_stack_pop (pr);
|
|
|
|
pr->pr_bases = STK(uivec4);
|
|
|
|
return;
|
2022-01-15 09:44:11 +00:00
|
|
|
case 10:
|
|
|
|
// reset
|
|
|
|
pr->pr_bases = (pr_uivec4_t) {};
|
|
|
|
return;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
PR_RunError (pr, "Invalid with index: %u", st->a);
|
|
|
|
}
|
|
|
|
|
2022-01-03 10:30:32 +00:00
|
|
|
static pr_ivec4_t
|
|
|
|
pr_swizzle_f (pr_ivec4_t vec, pr_ushort_t swiz)
|
|
|
|
{
|
|
|
|
goto do_swizzle;
|
|
|
|
#define swizzle __builtin_shuffle
|
|
|
|
swizzle_xxxx: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_yxxx: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_zxxx: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_wxxx: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_xyxx: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_yyxx: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_zyxx: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_wyxx: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_xzxx: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_yzxx: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_zzxx: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_wzxx: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_xwxx: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_ywxx: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_zwxx: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_wwxx: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_xxyx: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_yxyx: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_zxyx: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_wxyx: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_xyyx: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_yyyx: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_zyyx: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_wyyx: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_xzyx: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_yzyx: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_zzyx: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_wzyx: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_xwyx: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_ywyx: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_zwyx: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_wwyx: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_xxzx: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_yxzx: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_zxzx: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_wxzx: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_xyzx: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_yyzx: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_zyzx: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_wyzx: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_xzzx: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_yzzx: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_zzzx: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_wzzx: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_xwzx: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_ywzx: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_zwzx: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_wwzx: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_xxwx: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_yxwx: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_zxwx: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_wxwx: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_xywx: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_yywx: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_zywx: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_wywx: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_xzwx: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_yzwx: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_zzwx: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_wzwx: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_xwwx: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_ywwx: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_zwwx: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_wwwx: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_xxxy: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_yxxy: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_zxxy: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_wxxy: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_xyxy: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_yyxy: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_zyxy: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_wyxy: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_xzxy: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_yzxy: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_zzxy: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_wzxy: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_xwxy: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_ywxy: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_zwxy: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_wwxy: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_xxyy: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_yxyy: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_zxyy: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_wxyy: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_xyyy: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_yyyy: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_zyyy: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_wyyy: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_xzyy: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_yzyy: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_zzyy: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_wzyy: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_xwyy: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_ywyy: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_zwyy: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_wwyy: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_xxzy: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_yxzy: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_zxzy: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_wxzy: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_xyzy: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_yyzy: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_zyzy: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_wyzy: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_xzzy: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_yzzy: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_zzzy: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_wzzy: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_xwzy: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_ywzy: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_zwzy: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_wwzy: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_xxwy: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_yxwy: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_zxwy: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_wxwy: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_xywy: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_yywy: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_zywy: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_wywy: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_xzwy: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_yzwy: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_zzwy: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_wzwy: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_xwwy: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_ywwy: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_zwwy: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_wwwy: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_xxxz: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_yxxz: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_zxxz: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_wxxz: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_xyxz: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_yyxz: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_zyxz: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_wyxz: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_xzxz: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_yzxz: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_zzxz: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_wzxz: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_xwxz: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_ywxz: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_zwxz: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_wwxz: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_xxyz: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_yxyz: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_zxyz: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_wxyz: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_xyyz: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_yyyz: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_zyyz: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_wyyz: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_xzyz: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_yzyz: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_zzyz: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_wzyz: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_xwyz: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_ywyz: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_zwyz: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_wwyz: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_xxzz: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_yxzz: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_zxzz: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_wxzz: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_xyzz: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_yyzz: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_zyzz: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_wyzz: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_xzzz: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_yzzz: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_zzzz: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_wzzz: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_xwzz: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_ywzz: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_zwzz: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_wwzz: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_xxwz: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_yxwz: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_zxwz: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_wxwz: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_xywz: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_yywz: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_zywz: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_wywz: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_xzwz: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_yzwz: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_zzwz: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_wzwz: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_xwwz: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_ywwz: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_zwwz: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_wwwz: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_xxxw: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_yxxw: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_zxxw: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_wxxw: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_xyxw: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_yyxw: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_zyxw: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_wyxw: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_xzxw: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_yzxw: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_zzxw: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_wzxw: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_xwxw: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_ywxw: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_zwxw: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_wwxw: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_xxyw: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_yxyw: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_zxyw: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_wxyw: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_xyyw: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_yyyw: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_zyyw: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_wyyw: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_xzyw: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_yzyw: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_zzyw: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_wzyw: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_xwyw: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_ywyw: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_zwyw: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_wwyw: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_xxzw: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_yxzw: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_zxzw: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_wxzw: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_xyzw: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_yyzw: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_zyzw: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_wyzw: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_xzzw: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_yzzw: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_zzzw: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_wzzw: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_xwzw: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_ywzw: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_zwzw: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_wwzw: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_xxww: vec = swizzle (vec, (pr_ivec4_t) { 0, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_yxww: vec = swizzle (vec, (pr_ivec4_t) { 1, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_zxww: vec = swizzle (vec, (pr_ivec4_t) { 2, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_wxww: vec = swizzle (vec, (pr_ivec4_t) { 3, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_xyww: vec = swizzle (vec, (pr_ivec4_t) { 0, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_yyww: vec = swizzle (vec, (pr_ivec4_t) { 1, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_zyww: vec = swizzle (vec, (pr_ivec4_t) { 2, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_wyww: vec = swizzle (vec, (pr_ivec4_t) { 3, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_xzww: vec = swizzle (vec, (pr_ivec4_t) { 0, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_yzww: vec = swizzle (vec, (pr_ivec4_t) { 1, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_zzww: vec = swizzle (vec, (pr_ivec4_t) { 2, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_wzww: vec = swizzle (vec, (pr_ivec4_t) { 3, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_xwww: vec = swizzle (vec, (pr_ivec4_t) { 0, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_ywww: vec = swizzle (vec, (pr_ivec4_t) { 1, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_zwww: vec = swizzle (vec, (pr_ivec4_t) { 2, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_wwww: vec = swizzle (vec, (pr_ivec4_t) { 3, 3, 3, 3 }); goto negate;
|
|
|
|
static void *swizzle_table[256] = {
|
|
|
|
&&swizzle_xxxx, &&swizzle_yxxx, &&swizzle_zxxx, &&swizzle_wxxx,
|
|
|
|
&&swizzle_xyxx, &&swizzle_yyxx, &&swizzle_zyxx, &&swizzle_wyxx,
|
|
|
|
&&swizzle_xzxx, &&swizzle_yzxx, &&swizzle_zzxx, &&swizzle_wzxx,
|
|
|
|
&&swizzle_xwxx, &&swizzle_ywxx, &&swizzle_zwxx, &&swizzle_wwxx,
|
|
|
|
&&swizzle_xxyx, &&swizzle_yxyx, &&swizzle_zxyx, &&swizzle_wxyx,
|
|
|
|
&&swizzle_xyyx, &&swizzle_yyyx, &&swizzle_zyyx, &&swizzle_wyyx,
|
|
|
|
&&swizzle_xzyx, &&swizzle_yzyx, &&swizzle_zzyx, &&swizzle_wzyx,
|
|
|
|
&&swizzle_xwyx, &&swizzle_ywyx, &&swizzle_zwyx, &&swizzle_wwyx,
|
|
|
|
&&swizzle_xxzx, &&swizzle_yxzx, &&swizzle_zxzx, &&swizzle_wxzx,
|
|
|
|
&&swizzle_xyzx, &&swizzle_yyzx, &&swizzle_zyzx, &&swizzle_wyzx,
|
|
|
|
&&swizzle_xzzx, &&swizzle_yzzx, &&swizzle_zzzx, &&swizzle_wzzx,
|
|
|
|
&&swizzle_xwzx, &&swizzle_ywzx, &&swizzle_zwzx, &&swizzle_wwzx,
|
|
|
|
&&swizzle_xxwx, &&swizzle_yxwx, &&swizzle_zxwx, &&swizzle_wxwx,
|
|
|
|
&&swizzle_xywx, &&swizzle_yywx, &&swizzle_zywx, &&swizzle_wywx,
|
|
|
|
&&swizzle_xzwx, &&swizzle_yzwx, &&swizzle_zzwx, &&swizzle_wzwx,
|
|
|
|
&&swizzle_xwwx, &&swizzle_ywwx, &&swizzle_zwwx, &&swizzle_wwwx,
|
|
|
|
&&swizzle_xxxy, &&swizzle_yxxy, &&swizzle_zxxy, &&swizzle_wxxy,
|
|
|
|
&&swizzle_xyxy, &&swizzle_yyxy, &&swizzle_zyxy, &&swizzle_wyxy,
|
|
|
|
&&swizzle_xzxy, &&swizzle_yzxy, &&swizzle_zzxy, &&swizzle_wzxy,
|
|
|
|
&&swizzle_xwxy, &&swizzle_ywxy, &&swizzle_zwxy, &&swizzle_wwxy,
|
|
|
|
&&swizzle_xxyy, &&swizzle_yxyy, &&swizzle_zxyy, &&swizzle_wxyy,
|
|
|
|
&&swizzle_xyyy, &&swizzle_yyyy, &&swizzle_zyyy, &&swizzle_wyyy,
|
|
|
|
&&swizzle_xzyy, &&swizzle_yzyy, &&swizzle_zzyy, &&swizzle_wzyy,
|
|
|
|
&&swizzle_xwyy, &&swizzle_ywyy, &&swizzle_zwyy, &&swizzle_wwyy,
|
|
|
|
&&swizzle_xxzy, &&swizzle_yxzy, &&swizzle_zxzy, &&swizzle_wxzy,
|
|
|
|
&&swizzle_xyzy, &&swizzle_yyzy, &&swizzle_zyzy, &&swizzle_wyzy,
|
|
|
|
&&swizzle_xzzy, &&swizzle_yzzy, &&swizzle_zzzy, &&swizzle_wzzy,
|
|
|
|
&&swizzle_xwzy, &&swizzle_ywzy, &&swizzle_zwzy, &&swizzle_wwzy,
|
|
|
|
&&swizzle_xxwy, &&swizzle_yxwy, &&swizzle_zxwy, &&swizzle_wxwy,
|
|
|
|
&&swizzle_xywy, &&swizzle_yywy, &&swizzle_zywy, &&swizzle_wywy,
|
|
|
|
&&swizzle_xzwy, &&swizzle_yzwy, &&swizzle_zzwy, &&swizzle_wzwy,
|
|
|
|
&&swizzle_xwwy, &&swizzle_ywwy, &&swizzle_zwwy, &&swizzle_wwwy,
|
|
|
|
&&swizzle_xxxz, &&swizzle_yxxz, &&swizzle_zxxz, &&swizzle_wxxz,
|
|
|
|
&&swizzle_xyxz, &&swizzle_yyxz, &&swizzle_zyxz, &&swizzle_wyxz,
|
|
|
|
&&swizzle_xzxz, &&swizzle_yzxz, &&swizzle_zzxz, &&swizzle_wzxz,
|
|
|
|
&&swizzle_xwxz, &&swizzle_ywxz, &&swizzle_zwxz, &&swizzle_wwxz,
|
|
|
|
&&swizzle_xxyz, &&swizzle_yxyz, &&swizzle_zxyz, &&swizzle_wxyz,
|
|
|
|
&&swizzle_xyyz, &&swizzle_yyyz, &&swizzle_zyyz, &&swizzle_wyyz,
|
|
|
|
&&swizzle_xzyz, &&swizzle_yzyz, &&swizzle_zzyz, &&swizzle_wzyz,
|
|
|
|
&&swizzle_xwyz, &&swizzle_ywyz, &&swizzle_zwyz, &&swizzle_wwyz,
|
|
|
|
&&swizzle_xxzz, &&swizzle_yxzz, &&swizzle_zxzz, &&swizzle_wxzz,
|
|
|
|
&&swizzle_xyzz, &&swizzle_yyzz, &&swizzle_zyzz, &&swizzle_wyzz,
|
|
|
|
&&swizzle_xzzz, &&swizzle_yzzz, &&swizzle_zzzz, &&swizzle_wzzz,
|
|
|
|
&&swizzle_xwzz, &&swizzle_ywzz, &&swizzle_zwzz, &&swizzle_wwzz,
|
|
|
|
&&swizzle_xxwz, &&swizzle_yxwz, &&swizzle_zxwz, &&swizzle_wxwz,
|
|
|
|
&&swizzle_xywz, &&swizzle_yywz, &&swizzle_zywz, &&swizzle_wywz,
|
|
|
|
&&swizzle_xzwz, &&swizzle_yzwz, &&swizzle_zzwz, &&swizzle_wzwz,
|
|
|
|
&&swizzle_xwwz, &&swizzle_ywwz, &&swizzle_zwwz, &&swizzle_wwwz,
|
|
|
|
&&swizzle_xxxw, &&swizzle_yxxw, &&swizzle_zxxw, &&swizzle_wxxw,
|
|
|
|
&&swizzle_xyxw, &&swizzle_yyxw, &&swizzle_zyxw, &&swizzle_wyxw,
|
|
|
|
&&swizzle_xzxw, &&swizzle_yzxw, &&swizzle_zzxw, &&swizzle_wzxw,
|
|
|
|
&&swizzle_xwxw, &&swizzle_ywxw, &&swizzle_zwxw, &&swizzle_wwxw,
|
|
|
|
&&swizzle_xxyw, &&swizzle_yxyw, &&swizzle_zxyw, &&swizzle_wxyw,
|
|
|
|
&&swizzle_xyyw, &&swizzle_yyyw, &&swizzle_zyyw, &&swizzle_wyyw,
|
|
|
|
&&swizzle_xzyw, &&swizzle_yzyw, &&swizzle_zzyw, &&swizzle_wzyw,
|
|
|
|
&&swizzle_xwyw, &&swizzle_ywyw, &&swizzle_zwyw, &&swizzle_wwyw,
|
|
|
|
&&swizzle_xxzw, &&swizzle_yxzw, &&swizzle_zxzw, &&swizzle_wxzw,
|
|
|
|
&&swizzle_xyzw, &&swizzle_yyzw, &&swizzle_zyzw, &&swizzle_wyzw,
|
|
|
|
&&swizzle_xzzw, &&swizzle_yzzw, &&swizzle_zzzw, &&swizzle_wzzw,
|
|
|
|
&&swizzle_xwzw, &&swizzle_ywzw, &&swizzle_zwzw, &&swizzle_wwzw,
|
|
|
|
&&swizzle_xxww, &&swizzle_yxww, &&swizzle_zxww, &&swizzle_wxww,
|
|
|
|
&&swizzle_xyww, &&swizzle_yyww, &&swizzle_zyww, &&swizzle_wyww,
|
|
|
|
&&swizzle_xzww, &&swizzle_yzww, &&swizzle_zzww, &&swizzle_wzww,
|
|
|
|
&&swizzle_xwww, &&swizzle_ywww, &&swizzle_zwww, &&swizzle_wwww,
|
|
|
|
};
|
|
|
|
#undef swizzle
|
|
|
|
static const pr_ivec4_t neg[16] = {
|
|
|
|
{ 0, 0, 0, 0 },
|
|
|
|
{ 1<<31, 0, 0, 0 },
|
|
|
|
{ 0, 1<<31, 0, 0 },
|
|
|
|
{ 1<<31, 1<<31, 0, 0 },
|
|
|
|
{ 0, 0, 1<<31, 0 },
|
|
|
|
{ 1<<31, 0, 1<<31, 0 },
|
|
|
|
{ 0, 1<<31, 1<<31, 0 },
|
|
|
|
{ 1<<31, 1<<31, 1<<31, 0 },
|
|
|
|
{ 0, 0, 0, 1<<31 },
|
|
|
|
{ 1<<31, 0, 0, 1<<31 },
|
|
|
|
{ 0, 1<<31, 0, 1<<31 },
|
|
|
|
{ 1<<31, 1<<31, 0, 1<<31 },
|
|
|
|
{ 0, 0, 1<<31, 1<<31 },
|
|
|
|
{ 1<<31, 0, 1<<31, 1<<31 },
|
|
|
|
{ 0, 1<<31, 1<<31, 1<<31 },
|
|
|
|
{ 1<<31, 1<<31, 1<<31, 1<<31 },
|
|
|
|
};
|
|
|
|
static const pr_ivec4_t zero[16] = {
|
|
|
|
{ ~0, ~0, ~0, ~0 },
|
|
|
|
{ 0, ~0, ~0, ~0 },
|
|
|
|
{ ~0, 0, ~0, ~0 },
|
|
|
|
{ 0, 0, ~0, ~0 },
|
|
|
|
{ ~0, ~0, 0, ~0 },
|
|
|
|
{ 0, ~0, 0, ~0 },
|
|
|
|
{ ~0, 0, 0, ~0 },
|
|
|
|
{ 0, 0, 0, ~0 },
|
|
|
|
{ ~0, ~0, ~0, 0 },
|
|
|
|
{ 0, ~0, ~0, 0 },
|
|
|
|
{ ~0, 0, ~0, 0 },
|
|
|
|
{ 0, 0, ~0, 0 },
|
|
|
|
{ ~0, ~0, 0, 0 },
|
|
|
|
{ 0, ~0, 0, 0 },
|
|
|
|
{ ~0, 0, 0, 0 },
|
|
|
|
{ 0, 0, 0, 0 },
|
|
|
|
};
|
|
|
|
|
|
|
|
do_swizzle:
|
|
|
|
goto *swizzle_table[swiz & 0xff];
|
|
|
|
negate:
|
|
|
|
vec ^= neg[(swiz >> 8) & 0xf];
|
|
|
|
vec &= zero[(swiz >> 12) & 0xf];
|
|
|
|
return vec;
|
|
|
|
}
|
|
|
|
|
2022-01-03 10:55:27 +00:00
|
|
|
static pr_lvec4_t
|
2022-01-06 13:21:24 +00:00
|
|
|
#ifdef _WIN64
|
|
|
|
//force gcc to use registers for the parameters to avoid alignment issues
|
|
|
|
//on the stack (gcc bug as of 11.2)
|
|
|
|
__attribute__((sysv_abi))
|
|
|
|
#endif
|
2022-01-03 10:55:27 +00:00
|
|
|
pr_swizzle_d (pr_lvec4_t vec, pr_ushort_t swiz)
|
|
|
|
{
|
|
|
|
goto do_swizzle;
|
|
|
|
#define swizzle __builtin_shuffle
|
|
|
|
swizzle_xxxx: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_yxxx: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_zxxx: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_wxxx: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 0, 0 }); goto negate;
|
|
|
|
swizzle_xyxx: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_yyxx: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_zyxx: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_wyxx: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 0, 0 }); goto negate;
|
|
|
|
swizzle_xzxx: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_yzxx: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_zzxx: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_wzxx: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 0, 0 }); goto negate;
|
|
|
|
swizzle_xwxx: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_ywxx: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_zwxx: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_wwxx: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 0, 0 }); goto negate;
|
|
|
|
swizzle_xxyx: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_yxyx: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_zxyx: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_wxyx: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 1, 0 }); goto negate;
|
|
|
|
swizzle_xyyx: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_yyyx: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_zyyx: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_wyyx: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 1, 0 }); goto negate;
|
|
|
|
swizzle_xzyx: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_yzyx: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_zzyx: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_wzyx: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 1, 0 }); goto negate;
|
|
|
|
swizzle_xwyx: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_ywyx: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_zwyx: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_wwyx: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 1, 0 }); goto negate;
|
|
|
|
swizzle_xxzx: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_yxzx: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_zxzx: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_wxzx: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 2, 0 }); goto negate;
|
|
|
|
swizzle_xyzx: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_yyzx: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_zyzx: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_wyzx: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 2, 0 }); goto negate;
|
|
|
|
swizzle_xzzx: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_yzzx: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_zzzx: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_wzzx: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 2, 0 }); goto negate;
|
|
|
|
swizzle_xwzx: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_ywzx: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_zwzx: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_wwzx: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 2, 0 }); goto negate;
|
|
|
|
swizzle_xxwx: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_yxwx: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_zxwx: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_wxwx: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 3, 0 }); goto negate;
|
|
|
|
swizzle_xywx: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_yywx: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_zywx: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_wywx: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 3, 0 }); goto negate;
|
|
|
|
swizzle_xzwx: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_yzwx: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_zzwx: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_wzwx: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 3, 0 }); goto negate;
|
|
|
|
swizzle_xwwx: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_ywwx: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_zwwx: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_wwwx: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 3, 0 }); goto negate;
|
|
|
|
swizzle_xxxy: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_yxxy: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_zxxy: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_wxxy: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 0, 1 }); goto negate;
|
|
|
|
swizzle_xyxy: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_yyxy: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_zyxy: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_wyxy: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 0, 1 }); goto negate;
|
|
|
|
swizzle_xzxy: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_yzxy: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_zzxy: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_wzxy: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 0, 1 }); goto negate;
|
|
|
|
swizzle_xwxy: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_ywxy: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_zwxy: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_wwxy: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 0, 1 }); goto negate;
|
|
|
|
swizzle_xxyy: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_yxyy: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_zxyy: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_wxyy: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 1, 1 }); goto negate;
|
|
|
|
swizzle_xyyy: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_yyyy: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_zyyy: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_wyyy: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 1, 1 }); goto negate;
|
|
|
|
swizzle_xzyy: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_yzyy: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_zzyy: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_wzyy: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 1, 1 }); goto negate;
|
|
|
|
swizzle_xwyy: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_ywyy: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_zwyy: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_wwyy: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 1, 1 }); goto negate;
|
|
|
|
swizzle_xxzy: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_yxzy: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_zxzy: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_wxzy: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 2, 1 }); goto negate;
|
|
|
|
swizzle_xyzy: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_yyzy: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_zyzy: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_wyzy: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 2, 1 }); goto negate;
|
|
|
|
swizzle_xzzy: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_yzzy: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_zzzy: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_wzzy: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 2, 1 }); goto negate;
|
|
|
|
swizzle_xwzy: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_ywzy: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_zwzy: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_wwzy: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 2, 1 }); goto negate;
|
|
|
|
swizzle_xxwy: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_yxwy: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_zxwy: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_wxwy: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 3, 1 }); goto negate;
|
|
|
|
swizzle_xywy: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_yywy: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_zywy: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_wywy: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 3, 1 }); goto negate;
|
|
|
|
swizzle_xzwy: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_yzwy: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_zzwy: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_wzwy: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 3, 1 }); goto negate;
|
|
|
|
swizzle_xwwy: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_ywwy: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_zwwy: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_wwwy: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 3, 1 }); goto negate;
|
|
|
|
swizzle_xxxz: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_yxxz: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_zxxz: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_wxxz: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 0, 2 }); goto negate;
|
|
|
|
swizzle_xyxz: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_yyxz: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_zyxz: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_wyxz: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 0, 2 }); goto negate;
|
|
|
|
swizzle_xzxz: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_yzxz: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_zzxz: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_wzxz: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 0, 2 }); goto negate;
|
|
|
|
swizzle_xwxz: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_ywxz: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_zwxz: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_wwxz: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 0, 2 }); goto negate;
|
|
|
|
swizzle_xxyz: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_yxyz: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_zxyz: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_wxyz: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 1, 2 }); goto negate;
|
|
|
|
swizzle_xyyz: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_yyyz: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_zyyz: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_wyyz: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 1, 2 }); goto negate;
|
|
|
|
swizzle_xzyz: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_yzyz: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_zzyz: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_wzyz: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 1, 2 }); goto negate;
|
|
|
|
swizzle_xwyz: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_ywyz: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_zwyz: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_wwyz: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 1, 2 }); goto negate;
|
|
|
|
swizzle_xxzz: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_yxzz: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_zxzz: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_wxzz: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 2, 2 }); goto negate;
|
|
|
|
swizzle_xyzz: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_yyzz: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_zyzz: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_wyzz: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 2, 2 }); goto negate;
|
|
|
|
swizzle_xzzz: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_yzzz: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_zzzz: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_wzzz: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 2, 2 }); goto negate;
|
|
|
|
swizzle_xwzz: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_ywzz: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_zwzz: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_wwzz: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 2, 2 }); goto negate;
|
|
|
|
swizzle_xxwz: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_yxwz: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_zxwz: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_wxwz: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 3, 2 }); goto negate;
|
|
|
|
swizzle_xywz: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_yywz: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_zywz: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_wywz: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 3, 2 }); goto negate;
|
|
|
|
swizzle_xzwz: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_yzwz: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_zzwz: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_wzwz: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 3, 2 }); goto negate;
|
|
|
|
swizzle_xwwz: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_ywwz: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_zwwz: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_wwwz: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 3, 2 }); goto negate;
|
|
|
|
swizzle_xxxw: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_yxxw: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_zxxw: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_wxxw: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 0, 3 }); goto negate;
|
|
|
|
swizzle_xyxw: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_yyxw: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_zyxw: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_wyxw: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 0, 3 }); goto negate;
|
|
|
|
swizzle_xzxw: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_yzxw: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_zzxw: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_wzxw: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 0, 3 }); goto negate;
|
|
|
|
swizzle_xwxw: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_ywxw: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_zwxw: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_wwxw: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 0, 3 }); goto negate;
|
|
|
|
swizzle_xxyw: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_yxyw: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_zxyw: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_wxyw: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 1, 3 }); goto negate;
|
|
|
|
swizzle_xyyw: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_yyyw: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_zyyw: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_wyyw: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 1, 3 }); goto negate;
|
|
|
|
swizzle_xzyw: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_yzyw: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_zzyw: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_wzyw: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 1, 3 }); goto negate;
|
|
|
|
swizzle_xwyw: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_ywyw: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_zwyw: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_wwyw: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 1, 3 }); goto negate;
|
|
|
|
swizzle_xxzw: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_yxzw: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_zxzw: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_wxzw: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 2, 3 }); goto negate;
|
|
|
|
swizzle_xyzw: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_yyzw: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_zyzw: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_wyzw: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 2, 3 }); goto negate;
|
|
|
|
swizzle_xzzw: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_yzzw: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_zzzw: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_wzzw: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 2, 3 }); goto negate;
|
|
|
|
swizzle_xwzw: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_ywzw: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_zwzw: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_wwzw: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 2, 3 }); goto negate;
|
|
|
|
swizzle_xxww: vec = swizzle (vec, (pr_lvec4_t) { 0, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_yxww: vec = swizzle (vec, (pr_lvec4_t) { 1, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_zxww: vec = swizzle (vec, (pr_lvec4_t) { 2, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_wxww: vec = swizzle (vec, (pr_lvec4_t) { 3, 0, 3, 3 }); goto negate;
|
|
|
|
swizzle_xyww: vec = swizzle (vec, (pr_lvec4_t) { 0, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_yyww: vec = swizzle (vec, (pr_lvec4_t) { 1, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_zyww: vec = swizzle (vec, (pr_lvec4_t) { 2, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_wyww: vec = swizzle (vec, (pr_lvec4_t) { 3, 1, 3, 3 }); goto negate;
|
|
|
|
swizzle_xzww: vec = swizzle (vec, (pr_lvec4_t) { 0, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_yzww: vec = swizzle (vec, (pr_lvec4_t) { 1, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_zzww: vec = swizzle (vec, (pr_lvec4_t) { 2, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_wzww: vec = swizzle (vec, (pr_lvec4_t) { 3, 2, 3, 3 }); goto negate;
|
|
|
|
swizzle_xwww: vec = swizzle (vec, (pr_lvec4_t) { 0, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_ywww: vec = swizzle (vec, (pr_lvec4_t) { 1, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_zwww: vec = swizzle (vec, (pr_lvec4_t) { 2, 3, 3, 3 }); goto negate;
|
|
|
|
swizzle_wwww: vec = swizzle (vec, (pr_lvec4_t) { 3, 3, 3, 3 }); goto negate;
|
|
|
|
static void *swizzle_table[256] = {
|
|
|
|
&&swizzle_xxxx, &&swizzle_yxxx, &&swizzle_zxxx, &&swizzle_wxxx,
|
|
|
|
&&swizzle_xyxx, &&swizzle_yyxx, &&swizzle_zyxx, &&swizzle_wyxx,
|
|
|
|
&&swizzle_xzxx, &&swizzle_yzxx, &&swizzle_zzxx, &&swizzle_wzxx,
|
|
|
|
&&swizzle_xwxx, &&swizzle_ywxx, &&swizzle_zwxx, &&swizzle_wwxx,
|
|
|
|
&&swizzle_xxyx, &&swizzle_yxyx, &&swizzle_zxyx, &&swizzle_wxyx,
|
|
|
|
&&swizzle_xyyx, &&swizzle_yyyx, &&swizzle_zyyx, &&swizzle_wyyx,
|
|
|
|
&&swizzle_xzyx, &&swizzle_yzyx, &&swizzle_zzyx, &&swizzle_wzyx,
|
|
|
|
&&swizzle_xwyx, &&swizzle_ywyx, &&swizzle_zwyx, &&swizzle_wwyx,
|
|
|
|
&&swizzle_xxzx, &&swizzle_yxzx, &&swizzle_zxzx, &&swizzle_wxzx,
|
|
|
|
&&swizzle_xyzx, &&swizzle_yyzx, &&swizzle_zyzx, &&swizzle_wyzx,
|
|
|
|
&&swizzle_xzzx, &&swizzle_yzzx, &&swizzle_zzzx, &&swizzle_wzzx,
|
|
|
|
&&swizzle_xwzx, &&swizzle_ywzx, &&swizzle_zwzx, &&swizzle_wwzx,
|
|
|
|
&&swizzle_xxwx, &&swizzle_yxwx, &&swizzle_zxwx, &&swizzle_wxwx,
|
|
|
|
&&swizzle_xywx, &&swizzle_yywx, &&swizzle_zywx, &&swizzle_wywx,
|
|
|
|
&&swizzle_xzwx, &&swizzle_yzwx, &&swizzle_zzwx, &&swizzle_wzwx,
|
|
|
|
&&swizzle_xwwx, &&swizzle_ywwx, &&swizzle_zwwx, &&swizzle_wwwx,
|
|
|
|
&&swizzle_xxxy, &&swizzle_yxxy, &&swizzle_zxxy, &&swizzle_wxxy,
|
|
|
|
&&swizzle_xyxy, &&swizzle_yyxy, &&swizzle_zyxy, &&swizzle_wyxy,
|
|
|
|
&&swizzle_xzxy, &&swizzle_yzxy, &&swizzle_zzxy, &&swizzle_wzxy,
|
|
|
|
&&swizzle_xwxy, &&swizzle_ywxy, &&swizzle_zwxy, &&swizzle_wwxy,
|
|
|
|
&&swizzle_xxyy, &&swizzle_yxyy, &&swizzle_zxyy, &&swizzle_wxyy,
|
|
|
|
&&swizzle_xyyy, &&swizzle_yyyy, &&swizzle_zyyy, &&swizzle_wyyy,
|
|
|
|
&&swizzle_xzyy, &&swizzle_yzyy, &&swizzle_zzyy, &&swizzle_wzyy,
|
|
|
|
&&swizzle_xwyy, &&swizzle_ywyy, &&swizzle_zwyy, &&swizzle_wwyy,
|
|
|
|
&&swizzle_xxzy, &&swizzle_yxzy, &&swizzle_zxzy, &&swizzle_wxzy,
|
|
|
|
&&swizzle_xyzy, &&swizzle_yyzy, &&swizzle_zyzy, &&swizzle_wyzy,
|
|
|
|
&&swizzle_xzzy, &&swizzle_yzzy, &&swizzle_zzzy, &&swizzle_wzzy,
|
|
|
|
&&swizzle_xwzy, &&swizzle_ywzy, &&swizzle_zwzy, &&swizzle_wwzy,
|
|
|
|
&&swizzle_xxwy, &&swizzle_yxwy, &&swizzle_zxwy, &&swizzle_wxwy,
|
|
|
|
&&swizzle_xywy, &&swizzle_yywy, &&swizzle_zywy, &&swizzle_wywy,
|
|
|
|
&&swizzle_xzwy, &&swizzle_yzwy, &&swizzle_zzwy, &&swizzle_wzwy,
|
|
|
|
&&swizzle_xwwy, &&swizzle_ywwy, &&swizzle_zwwy, &&swizzle_wwwy,
|
|
|
|
&&swizzle_xxxz, &&swizzle_yxxz, &&swizzle_zxxz, &&swizzle_wxxz,
|
|
|
|
&&swizzle_xyxz, &&swizzle_yyxz, &&swizzle_zyxz, &&swizzle_wyxz,
|
|
|
|
&&swizzle_xzxz, &&swizzle_yzxz, &&swizzle_zzxz, &&swizzle_wzxz,
|
|
|
|
&&swizzle_xwxz, &&swizzle_ywxz, &&swizzle_zwxz, &&swizzle_wwxz,
|
|
|
|
&&swizzle_xxyz, &&swizzle_yxyz, &&swizzle_zxyz, &&swizzle_wxyz,
|
|
|
|
&&swizzle_xyyz, &&swizzle_yyyz, &&swizzle_zyyz, &&swizzle_wyyz,
|
|
|
|
&&swizzle_xzyz, &&swizzle_yzyz, &&swizzle_zzyz, &&swizzle_wzyz,
|
|
|
|
&&swizzle_xwyz, &&swizzle_ywyz, &&swizzle_zwyz, &&swizzle_wwyz,
|
|
|
|
&&swizzle_xxzz, &&swizzle_yxzz, &&swizzle_zxzz, &&swizzle_wxzz,
|
|
|
|
&&swizzle_xyzz, &&swizzle_yyzz, &&swizzle_zyzz, &&swizzle_wyzz,
|
|
|
|
&&swizzle_xzzz, &&swizzle_yzzz, &&swizzle_zzzz, &&swizzle_wzzz,
|
|
|
|
&&swizzle_xwzz, &&swizzle_ywzz, &&swizzle_zwzz, &&swizzle_wwzz,
|
|
|
|
&&swizzle_xxwz, &&swizzle_yxwz, &&swizzle_zxwz, &&swizzle_wxwz,
|
|
|
|
&&swizzle_xywz, &&swizzle_yywz, &&swizzle_zywz, &&swizzle_wywz,
|
|
|
|
&&swizzle_xzwz, &&swizzle_yzwz, &&swizzle_zzwz, &&swizzle_wzwz,
|
|
|
|
&&swizzle_xwwz, &&swizzle_ywwz, &&swizzle_zwwz, &&swizzle_wwwz,
|
|
|
|
&&swizzle_xxxw, &&swizzle_yxxw, &&swizzle_zxxw, &&swizzle_wxxw,
|
|
|
|
&&swizzle_xyxw, &&swizzle_yyxw, &&swizzle_zyxw, &&swizzle_wyxw,
|
|
|
|
&&swizzle_xzxw, &&swizzle_yzxw, &&swizzle_zzxw, &&swizzle_wzxw,
|
|
|
|
&&swizzle_xwxw, &&swizzle_ywxw, &&swizzle_zwxw, &&swizzle_wwxw,
|
|
|
|
&&swizzle_xxyw, &&swizzle_yxyw, &&swizzle_zxyw, &&swizzle_wxyw,
|
|
|
|
&&swizzle_xyyw, &&swizzle_yyyw, &&swizzle_zyyw, &&swizzle_wyyw,
|
|
|
|
&&swizzle_xzyw, &&swizzle_yzyw, &&swizzle_zzyw, &&swizzle_wzyw,
|
|
|
|
&&swizzle_xwyw, &&swizzle_ywyw, &&swizzle_zwyw, &&swizzle_wwyw,
|
|
|
|
&&swizzle_xxzw, &&swizzle_yxzw, &&swizzle_zxzw, &&swizzle_wxzw,
|
|
|
|
&&swizzle_xyzw, &&swizzle_yyzw, &&swizzle_zyzw, &&swizzle_wyzw,
|
|
|
|
&&swizzle_xzzw, &&swizzle_yzzw, &&swizzle_zzzw, &&swizzle_wzzw,
|
|
|
|
&&swizzle_xwzw, &&swizzle_ywzw, &&swizzle_zwzw, &&swizzle_wwzw,
|
|
|
|
&&swizzle_xxww, &&swizzle_yxww, &&swizzle_zxww, &&swizzle_wxww,
|
|
|
|
&&swizzle_xyww, &&swizzle_yyww, &&swizzle_zyww, &&swizzle_wyww,
|
|
|
|
&&swizzle_xzww, &&swizzle_yzww, &&swizzle_zzww, &&swizzle_wzww,
|
|
|
|
&&swizzle_xwww, &&swizzle_ywww, &&swizzle_zwww, &&swizzle_wwww,
|
|
|
|
};
|
|
|
|
#undef swizzle
|
2022-01-06 13:20:16 +00:00
|
|
|
#define L(x) UINT64_C(x)
|
2022-01-03 10:55:27 +00:00
|
|
|
static const pr_lvec4_t neg[16] = {
|
2022-01-06 13:20:16 +00:00
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(0), INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(1)<<63, INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(1)<<63, INT64_C(0) },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(0), INT64_C(1)<<63, INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(0) },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(0), INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(0), INT64_C(0), INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(0), INT64_C(1)<<63, INT64_C(0), INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(0), INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(1)<<63, INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(0), INT64_C(1)<<63, INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(0), INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(1)<<63 },
|
|
|
|
{ INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(1)<<63, INT64_C(1)<<63 },
|
2022-01-03 10:55:27 +00:00
|
|
|
};
|
|
|
|
static const pr_lvec4_t zero[16] = {
|
2022-01-06 13:20:16 +00:00
|
|
|
{ ~INT64_C(0), ~INT64_C(0), ~INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ INT64_C(0), ~INT64_C(0), ~INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), INT64_C(0), ~INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), ~INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), ~INT64_C(0), INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ INT64_C(0), ~INT64_C(0), INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), INT64_C(0), INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(0), ~INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), ~INT64_C(0), ~INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), ~INT64_C(0), ~INT64_C(0), INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), INT64_C(0), ~INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), ~INT64_C(0), INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), ~INT64_C(0), INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), ~INT64_C(0), INT64_C(0), INT64_C(0) },
|
|
|
|
{ ~INT64_C(0), INT64_C(0), INT64_C(0), INT64_C(0) },
|
|
|
|
{ INT64_C(0), INT64_C(0), INT64_C(0), INT64_C(0) },
|
2022-01-03 10:55:27 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
do_swizzle:
|
|
|
|
goto *swizzle_table[swiz & 0xff];
|
|
|
|
negate:
|
|
|
|
vec ^= neg[(swiz >> 8) & 0xf];
|
|
|
|
vec &= zero[(swiz >> 12) & 0xf];
|
|
|
|
return vec;
|
|
|
|
}
|
|
|
|
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
static void
|
|
|
|
pr_exec_ruamoko (progs_t *pr, int exitdepth)
|
|
|
|
{
|
|
|
|
int profile, startprofile;
|
|
|
|
dstatement_t *st;
|
|
|
|
pr_type_t old_val = {0};
|
|
|
|
|
|
|
|
// make a stack frame
|
|
|
|
startprofile = profile = 0;
|
|
|
|
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
|
|
|
|
if (pr->watch) {
|
|
|
|
old_val = *pr->watch;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
st++;
|
|
|
|
++pr->pr_xstatement;
|
|
|
|
if (pr->pr_xstatement != st - pr->pr_statements)
|
|
|
|
PR_RunError (pr, "internal error");
|
|
|
|
if (++profile > 1000000 && !pr->no_exec_limit) {
|
|
|
|
PR_RunError (pr, "runaway loop error");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pr->pr_trace) {
|
|
|
|
if (pr->debug_handler) {
|
|
|
|
pr->debug_handler (prd_trace, 0, pr->debug_data);
|
|
|
|
} else {
|
|
|
|
PR_PrintStatement (pr, st, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (st->op & OP_BREAK) {
|
|
|
|
if (pr->debug_handler) {
|
|
|
|
pr->debug_handler (prd_breakpoint, 0, pr->debug_data);
|
|
|
|
} else {
|
|
|
|
PR_RunError (pr, "breakpoint hit");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-18 03:11:14 +00:00
|
|
|
pr_ptr_t st_a = st->a + PR_BASE (pr, st, A);
|
|
|
|
pr_ptr_t st_b = st->b + PR_BASE (pr, st, B);
|
|
|
|
pr_ptr_t st_c = st->c + PR_BASE (pr, st, C);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
pr_type_t *op_a = pr->pr_globals + st_a;
|
|
|
|
pr_type_t *op_b = pr->pr_globals + st_b;
|
|
|
|
pr_type_t *op_c = pr->pr_globals + st_c;
|
|
|
|
|
|
|
|
pr_type_t *stk;
|
|
|
|
pr_type_t *mm;
|
2022-01-18 06:32:43 +00:00
|
|
|
pr_func_t function;
|
2022-01-03 08:54:54 +00:00
|
|
|
pr_opcode_e st_op = st->op & OP_MASK;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
switch (st_op) {
|
|
|
|
// 0 0000
|
2022-01-21 09:44:51 +00:00
|
|
|
case OP_NOP:
|
|
|
|
break;
|
|
|
|
case OP_ADJSTK:
|
2022-01-21 11:33:15 +00:00
|
|
|
pr_stack_adjust (pr, st->a, (short) st->b);
|
2022-01-21 09:44:51 +00:00
|
|
|
break;
|
|
|
|
case OP_LDCONST:
|
|
|
|
PR_RunError (pr, "OP_LDCONST not implemented");
|
|
|
|
break;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case OP_LOAD_B_1:
|
|
|
|
case OP_LOAD_C_1:
|
|
|
|
case OP_LOAD_D_1:
|
2022-01-20 05:55:29 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_LOAD_B_1 + 4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OPC(int) = MM(int);
|
|
|
|
break;
|
|
|
|
case OP_LOAD_B_2:
|
|
|
|
case OP_LOAD_C_2:
|
|
|
|
case OP_LOAD_D_2:
|
2022-01-20 05:55:29 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_LOAD_B_2 + 4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OPC(ivec2) = MM(ivec2);
|
|
|
|
break;
|
|
|
|
case OP_LOAD_B_3:
|
|
|
|
case OP_LOAD_C_3:
|
|
|
|
case OP_LOAD_D_3:
|
2022-01-20 05:55:29 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_LOAD_B_3 + 4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
VectorCopy (&MM(int), &OPC(int));
|
|
|
|
break;
|
|
|
|
case OP_LOAD_B_4:
|
|
|
|
case OP_LOAD_C_4:
|
|
|
|
case OP_LOAD_D_4:
|
2022-01-20 05:55:29 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_LOAD_B_4 + 4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OPC(ivec4) = MM(ivec4);
|
|
|
|
break;
|
|
|
|
// 0 0001
|
|
|
|
case OP_STORE_A_1:
|
|
|
|
case OP_STORE_B_1:
|
|
|
|
case OP_STORE_C_1:
|
|
|
|
case OP_STORE_D_1:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_STORE_A_1) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
MM(int) = OPC(int);
|
|
|
|
break;
|
|
|
|
case OP_STORE_A_2:
|
|
|
|
case OP_STORE_B_2:
|
|
|
|
case OP_STORE_C_2:
|
|
|
|
case OP_STORE_D_2:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_STORE_A_2) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
MM(ivec2) = OPC(ivec2);
|
|
|
|
break;
|
|
|
|
case OP_STORE_A_3:
|
|
|
|
case OP_STORE_B_3:
|
|
|
|
case OP_STORE_C_3:
|
|
|
|
case OP_STORE_D_3:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_STORE_A_3) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
VectorCopy (&OPC(int), &MM(int));
|
|
|
|
break;
|
|
|
|
case OP_STORE_A_4:
|
|
|
|
case OP_STORE_B_4:
|
|
|
|
case OP_STORE_C_4:
|
|
|
|
case OP_STORE_D_4:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_STORE_A_4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
MM(ivec4) = OPC(ivec4);
|
|
|
|
break;
|
|
|
|
// 0 0010
|
|
|
|
case OP_PUSH_A_1:
|
|
|
|
case OP_PUSH_B_1:
|
|
|
|
case OP_PUSH_C_1:
|
|
|
|
case OP_PUSH_D_1:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_PUSH_A_1) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_push (pr);
|
|
|
|
STK(int) = MM(int);
|
|
|
|
break;
|
|
|
|
case OP_PUSH_A_2:
|
|
|
|
case OP_PUSH_B_2:
|
|
|
|
case OP_PUSH_C_2:
|
|
|
|
case OP_PUSH_D_2:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_PUSH_A_2) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_push (pr);
|
|
|
|
STK(ivec2) = MM(ivec2);
|
|
|
|
break;
|
|
|
|
case OP_PUSH_A_3:
|
|
|
|
case OP_PUSH_B_3:
|
|
|
|
case OP_PUSH_C_3:
|
|
|
|
case OP_PUSH_D_3:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_PUSH_A_3) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_push (pr);
|
|
|
|
VectorCopy (&MM(int), &STK(int));
|
|
|
|
break;
|
|
|
|
case OP_PUSH_A_4:
|
|
|
|
case OP_PUSH_B_4:
|
|
|
|
case OP_PUSH_C_4:
|
|
|
|
case OP_PUSH_D_4:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_PUSH_A_4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_push (pr);
|
|
|
|
STK(ivec4) = MM(ivec4);
|
|
|
|
break;
|
|
|
|
// 0 0011
|
|
|
|
case OP_POP_A_1:
|
|
|
|
case OP_POP_B_1:
|
|
|
|
case OP_POP_C_1:
|
|
|
|
case OP_POP_D_1:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_POP_A_1) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_pop (pr);
|
|
|
|
MM(int) = STK(int);
|
|
|
|
break;
|
|
|
|
case OP_POP_A_2:
|
|
|
|
case OP_POP_B_2:
|
|
|
|
case OP_POP_C_2:
|
|
|
|
case OP_POP_D_2:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_POP_A_2) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_pop (pr);
|
|
|
|
MM(ivec2) = STK(ivec2);
|
|
|
|
break;
|
|
|
|
case OP_POP_A_3:
|
|
|
|
case OP_POP_B_3:
|
|
|
|
case OP_POP_C_3:
|
|
|
|
case OP_POP_D_3:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_POP_A_3) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_pop (pr);
|
|
|
|
VectorCopy (&STK(int), &MM(int));
|
|
|
|
break;
|
|
|
|
case OP_POP_A_4:
|
|
|
|
case OP_POP_B_4:
|
|
|
|
case OP_POP_C_4:
|
|
|
|
case OP_POP_D_4:
|
2022-01-15 07:27:46 +00:00
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_POP_A_4) >> 2);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
stk = pr_stack_pop (pr);
|
|
|
|
MM(ivec4) = STK(ivec4);
|
|
|
|
break;
|
|
|
|
// 0 0100
|
2022-01-16 10:32:47 +00:00
|
|
|
// spare
|
2022-01-10 08:01:14 +00:00
|
|
|
// 0 0101
|
2022-01-16 10:32:47 +00:00
|
|
|
// spare
|
2022-01-10 08:01:14 +00:00
|
|
|
// 0 0110
|
2022-01-16 10:32:47 +00:00
|
|
|
// spare
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 0 0111
|
2022-01-16 10:32:47 +00:00
|
|
|
// spare
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
|
|
|
|
#define OP_cmp_1(OP, T, rt, cmp, ct) \
|
|
|
|
case OP_##OP##_##T##_1: \
|
|
|
|
OPC(rt) = -(OPA(ct) cmp OPB(ct)); \
|
|
|
|
break
|
|
|
|
#define OP_cmp_2(OP, T, rt, cmp, ct) \
|
|
|
|
case OP_##OP##_##T##_2: \
|
|
|
|
OPC(rt) = (OPA(ct) cmp OPB(ct)); \
|
|
|
|
break
|
|
|
|
#define OP_cmp_3(OP, T, rt, cmp, ct) \
|
|
|
|
case OP_##OP##_##T##_3: \
|
|
|
|
VectorCompCompare (&OPC(rt), -, &OPA(ct), cmp, &OPB(ct)); \
|
|
|
|
break;
|
|
|
|
#define OP_cmp_4(OP, T, rt, cmp, ct) \
|
|
|
|
case OP_##OP##_##T##_4: \
|
|
|
|
OPC(rt) = (OPA(ct) cmp OPB(ct)); \
|
|
|
|
break
|
|
|
|
#define OP_cmp_T(OP, T, rt1, rt2, rt4, cmp, ct1, ct2, ct4) \
|
|
|
|
OP_cmp_1 (OP, T, rt1, cmp, ct1); \
|
|
|
|
OP_cmp_2 (OP, T, rt2, cmp, ct2); \
|
|
|
|
OP_cmp_3 (OP, T, rt1, cmp, ct1); \
|
|
|
|
OP_cmp_4 (OP, T, rt4, cmp, ct4)
|
|
|
|
#define OP_cmp(OP, cmp) \
|
|
|
|
OP_cmp_T (OP, I, int, ivec2, ivec4, cmp, int, ivec2, ivec4); \
|
|
|
|
OP_cmp_T (OP, F, int, ivec2, ivec4, cmp, float, vec2, vec4); \
|
|
|
|
OP_cmp_T (OP, L, long, lvec2, lvec4, cmp, long, lvec2, lvec4); \
|
|
|
|
OP_cmp_T (OP, D, long, lvec2, lvec4, cmp, double, dvec2, dvec4)
|
|
|
|
|
|
|
|
// 0 1000
|
|
|
|
OP_cmp(EQ, ==);
|
|
|
|
// 0 1001
|
|
|
|
OP_cmp(LT, <);
|
|
|
|
// 0 1010
|
|
|
|
OP_cmp(GT, >);
|
|
|
|
// 0 1011
|
2022-01-10 08:01:14 +00:00
|
|
|
// spare
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 0 1100
|
|
|
|
OP_cmp(NE, !=);
|
|
|
|
// 0 1101
|
|
|
|
OP_cmp(GE, >=);
|
|
|
|
// 0 1110
|
|
|
|
OP_cmp(LE, <=);
|
2022-01-10 08:01:14 +00:00
|
|
|
// 0 1111
|
|
|
|
// spare
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
|
|
|
|
#define OP_op_1(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_1: \
|
|
|
|
OPC(t) = (OPA(t) op OPB(t)); \
|
|
|
|
break
|
|
|
|
#define OP_op_2(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_2: \
|
|
|
|
OPC(t) = (OPA(t) op OPB(t)); \
|
|
|
|
break
|
|
|
|
#define OP_op_3(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_3: \
|
|
|
|
VectorCompOp (&OPC(t), &OPA(t), op, &OPB(t)); \
|
|
|
|
break;
|
|
|
|
#define OP_op_4(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_4: \
|
|
|
|
OPC(t) = (OPA(t) op OPB(t)); \
|
|
|
|
break
|
|
|
|
#define OP_op_T(OP, T, t1, t2, t4, op) \
|
|
|
|
OP_op_1 (OP, T, t1, op); \
|
|
|
|
OP_op_2 (OP, T, t2, op); \
|
|
|
|
OP_op_3 (OP, T, t1, op); \
|
|
|
|
OP_op_4 (OP, T, t4, op)
|
|
|
|
#define OP_op(OP, op) \
|
|
|
|
OP_op_T (OP, I, int, ivec2, ivec4, op); \
|
|
|
|
OP_op_T (OP, F, float, vec2, vec4, op); \
|
|
|
|
OP_op_T (OP, L, long, lvec2, lvec4, op); \
|
|
|
|
OP_op_T (OP, D, double, dvec2, dvec4, op)
|
|
|
|
#define OP_uop_1(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_1: \
|
|
|
|
OPC(t) = op (OPA(t)); \
|
|
|
|
break
|
|
|
|
#define OP_uop_2(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_2: \
|
|
|
|
OPC(t) = op (OPA(t)); \
|
|
|
|
break
|
|
|
|
#define OP_uop_3(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_3: \
|
|
|
|
VectorCompUop (&OPC(t), op, &OPA(t)); \
|
|
|
|
break;
|
|
|
|
#define OP_uop_4(OP, T, t, op) \
|
|
|
|
case OP_##OP##_##T##_4: \
|
|
|
|
OPC(t) = op (OPA(t)); \
|
|
|
|
break
|
|
|
|
#define OP_uop_T(OP, T, t1, t2, t4, op) \
|
|
|
|
OP_uop_1 (OP, T, t1, op); \
|
|
|
|
OP_uop_2 (OP, T, t2, op); \
|
|
|
|
OP_uop_3 (OP, T, t1, op); \
|
|
|
|
OP_uop_4 (OP, T, t4, op)
|
|
|
|
|
|
|
|
// 1 0000
|
|
|
|
OP_op(MUL, *);
|
|
|
|
// 1 0001
|
|
|
|
OP_op(DIV, /);
|
|
|
|
|
2022-01-04 09:36:13 +00:00
|
|
|
// implement remainder (c %) for integers:
|
|
|
|
// 5 rem 3 = 2
|
|
|
|
// -5 rem 3 = -2
|
|
|
|
// 5 rem -3 = 2
|
|
|
|
// -5 rem -3 = -2
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
#define OP_store(d, s) *(d) = s
|
|
|
|
#define OP_remmod_T(OP, T, n, t, l, f, s) \
|
|
|
|
case OP_##OP##_##T##_##n: \
|
|
|
|
{ \
|
|
|
|
__auto_type a = l (&OPA(t)); \
|
|
|
|
__auto_type b = l (&OPB(t)); \
|
|
|
|
s (&OPC(t), a - b * f(a / b)); \
|
|
|
|
} \
|
|
|
|
break
|
|
|
|
#define OP_rem_T(T, n, t, l, f, s) \
|
|
|
|
OP_remmod_T(REM, T, n, t, l, f, s)
|
|
|
|
|
|
|
|
// 1 0010
|
|
|
|
OP_op_T (REM, I, int, ivec2, ivec4, %);
|
|
|
|
OP_rem_T (F, 1, float, *, truncf, OP_store);
|
|
|
|
OP_rem_T (F, 2, vec2, *, vtrunc2f, OP_store);
|
|
|
|
OP_rem_T (F, 3, float, loadvec3f, vtrunc4f, storevec3f);
|
|
|
|
OP_rem_T (F, 4, vec4, *, vtrunc4f, OP_store);
|
|
|
|
OP_op_T (REM, L, long, lvec2, lvec4, %);
|
|
|
|
OP_rem_T (D, 1, double, *, trunc, OP_store);
|
|
|
|
OP_rem_T (D, 2, dvec2, *, vtrunc2d, OP_store);
|
|
|
|
OP_rem_T (D, 3, double, loadvec3d, vtrunc4d, storevec3d);
|
|
|
|
OP_rem_T (D, 4, dvec4, *, vtrunc4d, OP_store);
|
|
|
|
|
2022-01-04 09:36:13 +00:00
|
|
|
// implement true modulo (python %) for integers:
|
|
|
|
// 5 mod 3 = 2
|
|
|
|
// -5 mod 3 = 1
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 5 mod -3 = -1
|
|
|
|
// -5 mod -3 = -2
|
|
|
|
#define OP_mod_Ti(T, n, t, l, m, s) \
|
|
|
|
case OP_MOD_##T##_##n: \
|
|
|
|
{ \
|
|
|
|
__auto_type a = l(&OPA(t)); \
|
|
|
|
__auto_type b = l(&OPB(t)); \
|
|
|
|
__auto_type c = a % b; \
|
|
|
|
/* % is really remainder and so has the same sign rules */\
|
|
|
|
/* as division: -5 % 3 = -2, so need to add b (3 here) */\
|
|
|
|
/* if c's sign is incorrect, but only if c is non-zero */\
|
|
|
|
__auto_type mask = m((a ^ b) < 0); \
|
|
|
|
mask &= m(c != 0); \
|
|
|
|
s(&OPC(t), c + (mask & b)); \
|
|
|
|
} \
|
|
|
|
break
|
|
|
|
// floating point modulo is so much easier :P (just use floor instead of trunc)
|
|
|
|
#define OP_mod_Tf(T, n, t, l, f, s) \
|
|
|
|
OP_remmod_T(MOD, T, n, t, l, f, s)
|
|
|
|
|
|
|
|
// 1 0011
|
|
|
|
OP_mod_Ti (I, 1, int, *, -, OP_store);
|
|
|
|
OP_mod_Ti (I, 2, ivec2, *, +, OP_store);
|
2022-01-04 09:36:13 +00:00
|
|
|
OP_mod_Ti (I, 3, int, loadvec3i1, +, storevec3i);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OP_mod_Ti (I, 4, ivec4, *, +, OP_store);
|
|
|
|
OP_mod_Tf (F, 1, float, *, floorf, OP_store);
|
|
|
|
OP_mod_Tf (F, 2, vec2, *, vfloor2f, OP_store);
|
|
|
|
OP_mod_Tf (F, 3, float, loadvec3f, vfloor4f, storevec3f);
|
|
|
|
OP_mod_Tf (F, 4, vec4, *, vfloor4f, OP_store);
|
|
|
|
OP_mod_Ti (L, 1, long, *, -, OP_store);
|
2022-01-04 09:36:13 +00:00
|
|
|
OP_mod_Ti (L, 2, lvec2, *, +, OP_store);
|
|
|
|
OP_mod_Ti (L, 3, long, loadvec3l1, +, storevec3l);
|
|
|
|
OP_mod_Ti (L, 4, lvec4, *, +, OP_store);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OP_mod_Tf (D, 1, double, *, floor, OP_store);
|
|
|
|
OP_mod_Tf (D, 2, dvec2, *, vfloor2d, OP_store);
|
|
|
|
OP_mod_Tf (D, 3, double, loadvec3d, vfloor4d, storevec3d);
|
|
|
|
OP_mod_Tf (D, 4, dvec4, *, vfloor4d, OP_store);
|
|
|
|
|
|
|
|
// 1 0100
|
|
|
|
OP_op(ADD, +);
|
|
|
|
// 1 0101
|
|
|
|
OP_op(SUB, -);
|
|
|
|
// 1 0110
|
|
|
|
OP_op_T (SHL, I, int, ivec2, ivec4, <<);
|
|
|
|
OP_op_T (SHL, L, long, lvec2, lvec4, <<);
|
|
|
|
case OP_EQ_S:
|
|
|
|
case OP_LT_S:
|
|
|
|
case OP_GT_S:
|
|
|
|
case OP_CMP_S:
|
|
|
|
case OP_GE_S:
|
|
|
|
case OP_LE_S:
|
|
|
|
{
|
|
|
|
int cmp = strcmp (PR_GetString (pr, OPA(string)),
|
|
|
|
PR_GetString (pr, OPB(string)));
|
|
|
|
switch (st_op) {
|
2022-01-14 13:44:08 +00:00
|
|
|
case OP_EQ_S: cmp = -(cmp == 0); break;
|
|
|
|
case OP_LT_S: cmp = -(cmp < 0); break;
|
|
|
|
case OP_GT_S: cmp = -(cmp > 0); break;
|
|
|
|
case OP_GE_S: cmp = -(cmp >= 0); break;
|
|
|
|
case OP_LE_S: cmp = -(cmp <= 0); break;
|
|
|
|
case OP_CMP_S: break;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
OPC(int) = cmp;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_ADD_S:
|
|
|
|
OPC(string) = PR_CatStrings(pr, PR_GetString (pr, OPA(string)),
|
|
|
|
PR_GetString (pr, OPB(string)));
|
|
|
|
break;
|
|
|
|
case OP_NOT_S:
|
2022-01-14 13:44:08 +00:00
|
|
|
OPC(int) = -(!OPA(string) || !*PR_GetString (pr, OPA(string)));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
// 1 0111
|
2022-01-05 10:04:43 +00:00
|
|
|
OP_op_T (ASR, I, int, ivec2, ivec4, >>);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OP_op_T (SHR, u, uint, uivec2, uivec4, >>);
|
2022-01-05 10:04:43 +00:00
|
|
|
OP_op_T (ASR, L, long, lvec2, lvec4, >>);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OP_op_T (SHR, U, ulong, ulvec2, ulvec4, >>);
|
|
|
|
// 1 1000
|
|
|
|
OP_op_T (BITAND, I, int, ivec2, ivec4, &);
|
|
|
|
OP_op_T (BITOR, I, int, ivec2, ivec4, |);
|
|
|
|
OP_op_T (BITXOR, I, int, ivec2, ivec4, ^);
|
|
|
|
OP_uop_T (BITNOT, I, int, ivec2, ivec4, ~);
|
|
|
|
// 1 1001
|
2022-01-11 04:00:54 +00:00
|
|
|
OP_cmp_T (LT, u, int, ivec2, ivec4, <, uint, uivec2, uivec4);
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_JUMP_A:
|
|
|
|
case OP_JUMP_B:
|
|
|
|
case OP_JUMP_C:
|
|
|
|
case OP_JUMP_D:
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, st_op - OP_JUMP_A);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
break;
|
|
|
|
OP_cmp_T (LT, U, long, lvec2, lvec4, <, ulong, ulvec2, ulvec4);
|
|
|
|
case OP_RETURN:
|
2022-01-17 07:54:27 +00:00
|
|
|
int ret_size = (st->c & 0x1f) + 1; // up to 32 words
|
|
|
|
if (st->c != 0xffff) {
|
2022-01-26 00:51:11 +00:00
|
|
|
mm = pr_address_mode (pr, st, st->c >> 5);
|
2022-01-16 05:22:04 +00:00
|
|
|
memcpy (&R_INT (pr), mm, ret_size * sizeof (*op_a));
|
|
|
|
}
|
|
|
|
pr->pr_xfunction->profile += profile - startprofile;
|
|
|
|
startprofile = profile;
|
|
|
|
PR_LeaveFunction (pr, pr->pr_depth == exitdepth);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
if (pr->pr_depth== exitdepth) {
|
|
|
|
if (pr->pr_trace && pr->pr_depth <= pr->pr_trace_depth) {
|
|
|
|
pr->pr_trace = false;
|
|
|
|
}
|
|
|
|
goto exit_program;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_CALL_B:
|
|
|
|
case OP_CALL_C:
|
|
|
|
case OP_CALL_D:
|
2022-01-17 05:29:14 +00:00
|
|
|
mm = pr_call_mode (pr, st, st_op - OP_CALL_B + 1);
|
2022-01-16 05:22:04 +00:00
|
|
|
function = mm->func_var;
|
|
|
|
pr->pr_argc = 0;
|
|
|
|
// op_c specifies the location for the return value if any
|
|
|
|
pr->pr_xfunction->profile += profile - startprofile;
|
|
|
|
startprofile = profile;
|
2022-01-17 10:12:28 +00:00
|
|
|
PR_CallFunction (pr, function, op_c);
|
2022-01-16 05:22:04 +00:00
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
break;
|
|
|
|
// 1 1010
|
|
|
|
OP_cmp_T (GT, u, int, ivec2, ivec4, >, uint, uivec2, uivec4);
|
2022-01-03 13:54:34 +00:00
|
|
|
case OP_SWIZZLE_F:
|
|
|
|
OPC(ivec4) = pr_swizzle_f (OPA(ivec4), st->b);
|
|
|
|
break;
|
2022-01-10 08:05:57 +00:00
|
|
|
case OP_SCALE_F_2:
|
|
|
|
OPC(vec2) = OPA(vec2) * OPB(float);
|
|
|
|
break;
|
|
|
|
case OP_SCALE_F_3:
|
|
|
|
VectorScale (&OPA(float), OPB(float), &OPC(float));
|
|
|
|
break;
|
|
|
|
case OP_SCALE_F_4:
|
|
|
|
OPC(vec4) = OPA(vec4) * OPB(float);
|
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
OP_cmp_T (GT, U, long, lvec2, lvec4, >, ulong, ulvec2, ulvec4);
|
2022-01-03 13:54:34 +00:00
|
|
|
case OP_SWIZZLE_D:
|
|
|
|
OPC(lvec4) = pr_swizzle_d (OPA(lvec4), st->b);
|
|
|
|
break;
|
2022-01-10 08:05:57 +00:00
|
|
|
case OP_SCALE_D_2:
|
|
|
|
OPC(dvec2) = OPA(dvec2) * OPB(double);
|
|
|
|
break;
|
|
|
|
case OP_SCALE_D_3:
|
|
|
|
VectorScale (&OPA(double), OPB(double), &OPC(double));
|
|
|
|
break;
|
|
|
|
case OP_SCALE_D_4:
|
|
|
|
OPC(dvec4) = OPA(dvec4) * OPB(double);
|
|
|
|
break;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 1 1011
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_CROSS_F:
|
|
|
|
{
|
|
|
|
pr_vec4_t a = loadvec3f (&OPA(float));
|
|
|
|
pr_vec4_t b = loadvec3f (&OPB(float));
|
|
|
|
pr_vec4_t c = crossf (a, b);
|
|
|
|
storevec3f (&OPC(float), c);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_CDOT_F:
|
|
|
|
OPC(vec2) = dot2f (OPA(vec2), OPB(vec2));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_VDOT_F:
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
{
|
2022-01-16 05:22:04 +00:00
|
|
|
vec_t d = DotProduct (&OPA(float),
|
|
|
|
&OPB(float));
|
|
|
|
VectorSet (d, d, d, &OPC(float));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_QDOT_F:
|
|
|
|
OPC(vec4) = dotf (OPA(vec4), OPB(vec4));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_CMUL_F:
|
|
|
|
OPC(vec2) = cmulf (OPA(vec2), OPB(vec2));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_QVMUL_F:
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
{
|
2022-01-16 05:22:04 +00:00
|
|
|
pr_vec4_t v = loadvec3f (&OPB(float));
|
|
|
|
v = qvmulf (OPA(vec4), v);
|
|
|
|
storevec3f (&OPC(float), v);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_VQMUL_F:
|
|
|
|
{
|
|
|
|
pr_vec4_t v = loadvec3f (&OPA(float));
|
|
|
|
v = vqmulf (v, OPB(vec4));
|
|
|
|
storevec3f (&OPC(float), v);
|
|
|
|
}
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_QMUL_F:
|
|
|
|
OPC(vec4) = qmulf (OPA(vec4), OPB(vec4));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_CROSS_D:
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
{
|
2022-01-16 05:22:04 +00:00
|
|
|
pr_dvec4_t a = loadvec3d (&OPA(double));
|
|
|
|
pr_dvec4_t b = loadvec3d (&OPB(double));
|
|
|
|
pr_dvec4_t c = crossd (a, b);
|
|
|
|
storevec3d (&OPC(double), c);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_CDOT_D:
|
|
|
|
OPC(dvec2) = dot2d (OPA(dvec2), OPB(dvec2));
|
|
|
|
break;
|
|
|
|
case OP_VDOT_D:
|
|
|
|
{
|
|
|
|
double d = DotProduct (&OPA(double),
|
|
|
|
&OPB(double));
|
|
|
|
VectorSet (d, d, d, &OPC(double));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_QDOT_D:
|
|
|
|
OPC(dvec4) = dotd (OPA(dvec4), OPB(dvec4));
|
|
|
|
break;
|
|
|
|
case OP_CMUL_D:
|
|
|
|
OPC(dvec2) = cmuld (OPA(dvec2), OPB(dvec2));
|
|
|
|
break;
|
|
|
|
case OP_QVMUL_D:
|
|
|
|
{
|
|
|
|
pr_dvec4_t v = loadvec3d (&OPB(double));
|
|
|
|
v = qvmuld (OPA(dvec4), v);
|
|
|
|
storevec3d (&OPC(double), v);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_VQMUL_D:
|
|
|
|
{
|
|
|
|
pr_dvec4_t v = loadvec3d (&OPA(double));
|
|
|
|
v = vqmuld (v, OPB(dvec4));
|
|
|
|
storevec3d (&OPC(double), v);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_QMUL_D:
|
|
|
|
OPC(dvec4) = qmuld (OPA(dvec4), OPB(dvec4));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
// 1 1100
|
2022-01-13 05:24:11 +00:00
|
|
|
OP_op_T (BITAND, L, long, lvec2, lvec4, &);
|
|
|
|
OP_op_T (BITOR, L, long, lvec2, lvec4, |);
|
|
|
|
OP_op_T (BITXOR, L, long, lvec2, lvec4, ^);
|
|
|
|
OP_uop_T (BITNOT, L, long, lvec2, lvec4, ~);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 1 1101
|
2022-01-11 04:00:54 +00:00
|
|
|
OP_cmp_T (GE, u, int, ivec2, ivec4, >=, uint, uivec2, uivec4);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case OP_MOVE_I:
|
|
|
|
memmove (op_c, op_a, st->b * sizeof (pr_type_t));
|
|
|
|
break;
|
|
|
|
case OP_MOVE_P:
|
2022-01-18 06:50:32 +00:00
|
|
|
memmove (pr->pr_globals + OPC(ptr), pr->pr_globals + OPA(ptr),
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
OPB(uint) * sizeof (pr_type_t));
|
|
|
|
break;
|
|
|
|
case OP_MOVE_PI:
|
2022-01-18 06:50:32 +00:00
|
|
|
memmove (pr->pr_globals + OPC(ptr), pr->pr_globals + OPA(ptr),
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
st->b * sizeof (pr_type_t));
|
|
|
|
break;
|
2022-01-16 10:32:47 +00:00
|
|
|
case OP_STATE_ft:
|
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
|
|
|
float time = *pr->globals.ftime + 0.1;
|
|
|
|
pr->pr_edict_area[nextthink].float_var = time;
|
|
|
|
pr->pr_edict_area[frame].float_var = OPA(float);
|
|
|
|
pr->pr_edict_area[think].func_var = op_b->func_var;
|
2022-01-16 05:22:04 +00:00
|
|
|
}
|
2022-01-03 13:54:34 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
OP_cmp_T (GE, U, long, lvec2, lvec4, >=, ulong, ulvec2, ulvec4);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
case OP_MEMSET_I:
|
2022-01-03 05:45:12 +00:00
|
|
|
pr_memset (op_c, OPA(int), st->b);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case OP_MEMSET_P:
|
2022-01-18 06:50:32 +00:00
|
|
|
pr_memset (pr->pr_globals + OPC(ptr), OPA(int), OPB(uint));
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
|
|
|
case OP_MEMSET_PI:
|
2022-01-18 06:50:32 +00:00
|
|
|
pr_memset (pr->pr_globals + OPC(ptr), OPA(int), st->b);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
break;
|
2022-01-16 10:32:47 +00:00
|
|
|
case OP_STATE_ftt:
|
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
|
|
|
float time = *pr->globals.ftime + OPC(float);
|
|
|
|
pr->pr_edict_area[nextthink].float_var = time;
|
|
|
|
pr->pr_edict_area[frame].float_var = OPA(float);
|
|
|
|
pr->pr_edict_area[think].func_var = op_b->func_var;
|
|
|
|
}
|
2022-01-16 05:22:04 +00:00
|
|
|
break;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
// 1 1110
|
2022-01-11 04:00:54 +00:00
|
|
|
OP_cmp_T (LE, u, int, ivec2, ivec4, <=, uint, uivec2, uivec4);
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_IFZ:
|
|
|
|
if (!OPC(int)) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_IFB:
|
|
|
|
if (OPC(int) < 0) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_IFA:
|
|
|
|
if (OPC(int) > 0) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-16 10:32:47 +00:00
|
|
|
case OP_STATE_dt:
|
2022-01-16 05:22:04 +00:00
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
2022-01-16 10:32:47 +00:00
|
|
|
double time = *pr->globals.dtime + 0.1;
|
|
|
|
*(double *) (&pr->pr_edict_area[nextthink]) = time;
|
2022-01-18 04:21:06 +00:00
|
|
|
pr->pr_edict_area[frame].int_var = OPA(int);
|
2022-01-16 05:22:04 +00:00
|
|
|
pr->pr_edict_area[think].func_var = op_b->func_var;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
OP_cmp_T (LE, U, long, lvec2, lvec4, <=, ulong, ulvec2, ulvec4);
|
|
|
|
case OP_IFNZ:
|
|
|
|
if (OPC(int)) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_IFAE:
|
|
|
|
if (OPC(int) >= 0) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_IFBE:
|
|
|
|
if (OPC(int) <= 0) {
|
|
|
|
pr->pr_xstatement = pr_jump_mode (pr, st, 0);
|
|
|
|
st = pr->pr_statements + pr->pr_xstatement;
|
|
|
|
}
|
|
|
|
break;
|
2022-01-16 10:32:47 +00:00
|
|
|
case OP_STATE_dtt:
|
2022-01-16 05:22:04 +00:00
|
|
|
{
|
|
|
|
int self = *pr->globals.self;
|
|
|
|
int nextthink = pr->fields.nextthink + self;
|
|
|
|
int frame = pr->fields.frame + self;
|
|
|
|
int think = pr->fields.think + self;
|
2022-01-16 10:32:47 +00:00
|
|
|
double time = *pr->globals.dtime + OPC(double);
|
|
|
|
*(double *) (&pr->pr_edict_area[nextthink]) = time;
|
2022-01-18 04:21:06 +00:00
|
|
|
pr->pr_edict_area[frame].int_var = OPA(int);
|
2022-01-16 05:22:04 +00:00
|
|
|
pr->pr_edict_area[think].func_var = op_b->func_var;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
// 1 1111
|
|
|
|
case OP_LEA_A:
|
2022-01-20 05:55:29 +00:00
|
|
|
case OP_LEA_B:
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_LEA_C:
|
|
|
|
case OP_LEA_D:
|
|
|
|
mm = pr_address_mode (pr, st, (st_op - OP_LEA_A));
|
|
|
|
op_c->pointer_var = mm - pr->pr_globals;
|
|
|
|
break;
|
|
|
|
case OP_QV4MUL_F:
|
|
|
|
OPC(vec4) = qvmulf (OPA(vec4), OPB(vec4));
|
|
|
|
break;
|
2022-01-05 10:04:43 +00:00
|
|
|
case OP_V4QMUL_F:
|
2022-01-03 13:54:34 +00:00
|
|
|
OPC(vec4) = vqmulf (OPA(vec4), OPB(vec4));
|
2022-01-03 10:30:32 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_QV4MUL_D:
|
|
|
|
OPC(dvec4) = qvmuld (OPA(dvec4), OPB(dvec4));
|
|
|
|
break;
|
2022-01-05 10:04:43 +00:00
|
|
|
case OP_V4QMUL_D:
|
2022-01-03 13:54:34 +00:00
|
|
|
OPC(dvec4) = vqmuld (OPA(dvec4), OPB(dvec4));
|
2022-01-03 10:55:27 +00:00
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
// 10nn spare
|
2022-01-16 10:32:47 +00:00
|
|
|
case OP_CONV:
|
|
|
|
switch (st->b) {
|
|
|
|
#include "libs/gamecode/pr_convert.cinc"
|
|
|
|
default:
|
|
|
|
PR_RunError (pr, "invalid conversion code: %04o",
|
|
|
|
st->b);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_WITH:
|
|
|
|
pr_with (pr, st);
|
|
|
|
break;
|
2022-01-16 05:22:04 +00:00
|
|
|
// 1110 spare
|
2022-01-16 07:27:33 +00:00
|
|
|
#define OP_hop2(vec, op) ((vec)[0] op (vec)[1])
|
|
|
|
#define OP_hop3(vec, op) ((vec)[0] op (vec)[1] op (vec)[2])
|
|
|
|
#define OP_hop4(vec, op) ((vec)[0] op (vec)[1] op (vec)[2] op (vec)[3])
|
2022-01-16 05:22:04 +00:00
|
|
|
case OP_HOPS:
|
|
|
|
switch (st->b) {
|
2022-01-16 07:27:33 +00:00
|
|
|
#include "libs/gamecode/pr_hops.cinc"
|
2022-01-16 05:22:04 +00:00
|
|
|
default:
|
|
|
|
PR_RunError (pr, "invalid hops code: %04o",
|
|
|
|
st->b);
|
|
|
|
}
|
|
|
|
break;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
default:
|
2022-01-03 08:54:54 +00:00
|
|
|
PR_RunError (pr, "Bad opcode o%03o", st->op & OP_MASK);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
if (pr->watch && pr->watch->int_var != old_val.int_var) {
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
if (!pr->wp_conditional
|
2022-01-18 04:21:06 +00:00
|
|
|
|| pr->watch->int_var == pr->wp_val.int_var) {
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
if (pr->debug_handler) {
|
|
|
|
pr->debug_handler (prd_watchpoint, 0, pr->debug_data);
|
|
|
|
} else {
|
|
|
|
PR_RunError (pr, "watchpoint hit: %d -> %d",
|
2022-01-18 04:21:06 +00:00
|
|
|
old_val.int_var, pr->watch->int_var);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
}
|
2022-01-18 04:21:06 +00:00
|
|
|
old_val.int_var = pr->watch->int_var;
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
exit_program:
|
|
|
|
}
|
2022-01-02 11:16:45 +00:00
|
|
|
/*
|
|
|
|
PR_ExecuteProgram
|
|
|
|
|
|
|
|
The interpretation main loop
|
|
|
|
*/
|
|
|
|
VISIBLE void
|
2022-01-18 06:32:43 +00:00
|
|
|
PR_ExecuteProgram (progs_t *pr, pr_func_t fnum)
|
2022-01-02 11:16:45 +00:00
|
|
|
{
|
|
|
|
Sys_PushSignalHook (signal_hook, pr);
|
|
|
|
Sys_PushErrorHandler (error_handler, pr);
|
|
|
|
|
|
|
|
if (pr->debug_handler) {
|
|
|
|
pr->debug_handler (prd_subenter, &fnum, pr->debug_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
int exitdepth = pr->pr_depth;
|
2022-01-17 10:12:28 +00:00
|
|
|
if (!PR_CallFunction (pr, fnum, pr->pr_return)) {
|
2022-01-02 11:16:45 +00:00
|
|
|
// called a builtin instead of progs code
|
|
|
|
goto exit_program;
|
|
|
|
}
|
2022-01-03 04:56:43 +00:00
|
|
|
if (pr->progs->version < PROG_VERSION) {
|
2022-01-02 11:16:45 +00:00
|
|
|
pr_exec_quakec (pr, exitdepth);
|
[gamecode] Add a new Ruamoko instruction set
When it's finalized (most of the conversion operations will go, probably
the float bit ops, maybe (very undecided) the 3-component vector ops,
and likely the CALLN ops), this will be the actual instruction set for
Ruamoko.
Main features:
- Significant reduction in redundant instructions: no more multiple
opcodes to move the one operand size.
- load, store, push, and pop share unified addressing mode encoding
(with the exception of mode 0 for load as that is redundant with mode
0 for store, thus load mode 0 gives quick access to entity.field).
- Full support for both 32 and 64 bit signed integer, unsigned integer,
and floating point values.
- SIMD for 1, 2, (currently) 3, and 4 components. Transfers support up
to 128-bit wide operations (need two operations to transfer a full
4-component double/long vector), but all math operations support both
128-bit (32-bit components) and 256-bit (64-bit components) vectors.
- "Interpreted" operations for the various vector sizes: complex dot
and multiplication, 3d vector dot and cross product, quaternion dot
and multiplication, along with qv and vq shortcuts.
- 4-component swizzles for both sizes (not yet implemented, but the
instructions are allocated), with the option to zero or negate (thus
conjugates for complex and quaternion values) individual components.
- "Based offsets": all relevant instructions include base register
indices for all three operands allowing for direct access to any of
four areas (eg, current entity, current stack frame, Objective-QC
self, ...) instructions to set a register and push/pop the four
registers to/from the stack.
Remaining work:
- Implement swizzle operations and a few other stragglers.
= Make a decision about conversion operations (if any instructions
remain, they'll be just single-component (at 14 meaningful pairs,
that's a lot of instructions to waste on SIMD versions).
- Decide whether to keep CALL1-CALL8: probably little point in
supporting two different calling conventions, and it would free up
another eight instructions.
- Unit tests for the instructions.
- Teach qfcc to generate code for the new instruction set (hah, biggest
job, I'm sure, though hopefully not as crazy as the rewrite eleven
years ago).
2022-01-02 14:15:15 +00:00
|
|
|
} else {
|
|
|
|
pr_exec_ruamoko (pr, exitdepth);
|
2022-01-02 11:16:45 +00:00
|
|
|
}
|
2016-01-03 14:04:00 +00:00
|
|
|
exit_program:
|
2020-03-26 03:30:32 +00:00
|
|
|
if (pr->debug_handler) {
|
|
|
|
pr->debug_handler (prd_subexit, 0, pr->debug_data);
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|
2020-02-25 08:28:32 +00:00
|
|
|
pr->pr_argc = 0;
|
2016-01-03 14:04:00 +00:00
|
|
|
Sys_PopErrorHandler ();
|
|
|
|
Sys_PopSignalHook ();
|
2001-02-19 21:15:25 +00:00
|
|
|
}
|