Finish Vector4 implementation

2025-02-08 16:52:01 +00:00 · 2022-11-10 15:33:57 +01:00 · 2022-11-10 15:33:57 +01:00 · 31db5847cc
commit 31db5847cc
parent a525233914
15 changed files with 326 additions and 196 deletions
--- a/src/common/scripting/backend/codegen.cpp
+++ b/src/common/scripting/backend/codegen.cpp
@ -484,12 +484,15 @@ int EncodeRegType(ExpEmit reg)
 	else if (reg.RegCount == 2)
 	{
 		regtype |= REGT_MULTIREG2;
-
 	}
 	else if (reg.RegCount == 3)
 	{
 		regtype |= REGT_MULTIREG3;
 	}
+	else if (reg.RegCount == 4)
+	{
+		regtype |= REGT_MULTIREG4;
+	}
 	return regtype;
 }

@ -596,6 +599,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
 {
 	bool fails = false;

+	// Cast every scalar to float64
 	for (auto &a : xyzw)
 	{
 		if (a != nullptr)
@ -604,7 +608,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
 			if (a == nullptr) fails = true;
 			else
 			{
-				if (a->ValueType != TypeVector2)	// a vec3 may be initialized with (vec2, z)
+				if (a->ValueType != TypeVector2 && a->ValueType != TypeVector3)	// smaller vector can be used to initialize another vector
 				{
 					a = new FxFloatCast(a);
 					a = a->Resolve(ctx);
@ -613,59 +617,80 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
 			}
 		}
 	}
+
 	if (fails)
 	{
 		delete this;
 		return nullptr;
 	}
-	// at this point there are five legal cases:
-	// * two floats = vector2
-	// * three floats = vector3
-	// * four floats = vector4
-	// * vector2 + float = vector3
-	// * vector3 + float = vector4
-	if (xyzw[0]->ValueType == TypeVector2)
+
+	// The actual dimension of the Vector does not correspond to the amount of non-null elements in xyzw
+	// For example: '(asdf.xy, 1)' would be Vector3 where xyzw[0]->ValueType == TypeVector2 and xyzw[1]->ValueType == TypeFloat64
+
+	// Handle nesting and figure out the dimension of the vector
+	int vectorDimensions = 0;
+
+	for (int i = 0; i < maxVectorDimensions && xyzw[i]; ++i)
 	{
-		if (xyzw[1]->ValueType != TypeFloat64 || xyzw[2] != nullptr)
+		assert(dynamic_cast<FxExpression*>(xyzw[i]));
+
+		if (xyzw[i]->ValueType == TypeFloat64)
+		{
+			vectorDimensions++;
+		}
+		else if (xyzw[i]->ValueType == TypeVector2 || xyzw[i]->ValueType == TypeVector3 || xyzw[i]->ValueType == TypeVector4)
+		{
+			// Solve nested vector
+			int regCount = xyzw[i]->ValueType->RegCount;
+
+			if (regCount + vectorDimensions > maxVectorDimensions)
+			{
+				vectorDimensions += regCount; // Show proper number
+				goto too_big;
+			}
+
+			// Nested initializer gets simplified
+			if (xyzw[i]->ExprType == EFX_VectorValue)
+			{
+				// Shifts current elements to leave space for unwrapping nested initialization
+				for (int l = maxVectorDimensions - 1; l > i; --l)
+				{
+					xyzw[l] = xyzw[l - regCount + 1];
+				}
+
+				auto vi = static_cast<FxVectorValue*>(xyzw[i]);
+				for (int j = 0; j < regCount; ++j)
+				{
+					xyzw[i + j] = vi->xyzw[j];
+					vi->xyzw[j] = nullptr; // Preserve object after 'delete vi;'
+				}
+				delete vi;
+
+				// We extracted something, let's iterate on that again:
+				--i;
+				continue;
+			}
+			else
+			{
+				vectorDimensions += regCount;
+			}
+		}
+		else
 		{
 			ScriptPosition.Message(MSG_ERROR, "Not a valid vector");
 			delete this;
 			return nullptr;
 		}
-		ValueType = TypeVector3;
-		if (xyzw[0]->ExprType == EFX_VectorValue)
-		{
-			// If two vector initializers are nested, unnest them now.
-			auto vi = static_cast<FxVectorValue*>(xyzw[0]);
-			xyzw[2] = xyzw[1];
-			xyzw[1] = vi->xyzw[1];
-			xyzw[0] = vi->xyzw[0];
-			vi->xyzw[0] = vi->xyzw[1] = nullptr; // Don't delete our own expressions.
-			delete vi;
-		}
-		ValueType = TypeVector4;
-		if (xyzw[0]->ExprType == EFX_VectorValue)
-		{
-			// If two vector initializers are nested, unnest them now.
-			auto vi = static_cast<FxVectorValue*>(xyzw[0]);
-			xyzw[2] = xyzw[1];
-			xyzw[1] = vi->xyzw[1];
-			xyzw[0] = vi->xyzw[0];
-			vi->xyzw[0] = vi->xyzw[1] = nullptr; // Don't delete our own expressions.
-			delete vi;
-		}
 	}
-	else if (xyzw[0]->ValueType == TypeFloat64 && xyzw[1]->ValueType == TypeFloat64)
+
+	switch (vectorDimensions)
 	{
-		ValueType = xyzw[2] == nullptr ? TypeVector2 : TypeVector3;
-	}
-	else if (xyzw[0]->ValueType == TypeFloat64 && xyzw[1]->ValueType == TypeFloat64 && xyzw[2]->ValueType == TypeFloat64)
-	{
-		ValueType = xyzw[3] == nullptr ? TypeVector3 : TypeVector4;
-	}
-	else
-	{
-		ScriptPosition.Message(MSG_ERROR, "Not a valid vector");
+	case 2: ValueType = TypeVector2; break;
+	case 3: ValueType = TypeVector3; break;
+	case 4: ValueType = TypeVector4; break;
+	default:
+	too_big:;
+		ScriptPosition.Message(MSG_ERROR, "Vector of %d dimensions is not supported", vectorDimensions);
 		delete this;
 		return nullptr;
 	}
@ -674,7 +699,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
 	isConst = true;
 	for (auto &a : xyzw)
 	{
-		if (a != nullptr && !a->isConstant()) isConst = false;
+		if (a && !a->isConstant()) isConst = false;
 	}
 	return this;
 }
@ -692,145 +717,96 @@ static ExpEmit EmitKonst(VMFunctionBuilder *build, ExpEmit &emit)

 ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build)
 {
-	// no const handling here. Ultimately it's too rarely used (i.e. the only fully constant vector ever allocated in ZDoom is the 0-vector in a very few places)
-	// and the negatives (excessive allocation of float constants) outweigh the positives (saved a few instructions)
-	assert(xyzw[0] != nullptr);
-	assert(xyzw[1] != nullptr);
-	if (ValueType == TypeVector2)
+	int vectorDimensions = ValueType->RegCount;
+	int vectorElements = 0;
+	for (auto& e : xyzw)
 	{
-		ExpEmit tempxval = xyzw[0]->Emit(build);
-		ExpEmit tempyval = xyzw[1]->Emit(build);
-		ExpEmit xval = EmitKonst(build, tempxval);
-		ExpEmit yval = EmitKonst(build, tempyval);
-		assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT);
-		if (yval.RegNum == xval.RegNum + 1)
-		{
-			// The results are already in two continuous registers so just return them as-is.
-			xval.RegCount++;
-			return xval;
-		}
-		else
-		{
-			// The values are not in continuous registers so they need to be copied together now.
-			ExpEmit out(build, REGT_FLOAT, 2);
-			build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
-			build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum);
-			xval.Free(build);
-			yval.Free(build);
-			return out;
-		}
+		if (e) vectorElements++;
 	}
-	else if (xyzw[0]->ValueType == TypeVector2)	// vec2+float
+	assert(vectorElements > 0);
+
+	ExpEmit* tempVal = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit));
+	ExpEmit* val = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit));
+
+	// Init ExpEmit
+	for (int i = 0; i < vectorElements; ++i)
 	{
-		ExpEmit xyval = xyzw[0]->Emit(build);
-		ExpEmit tempzval = xyzw[1]->Emit(build);
-		ExpEmit zval = EmitKonst(build, tempzval);
-		assert(xyval.RegType == REGT_FLOAT && xyval.RegCount == 2 && zval.RegType == REGT_FLOAT);
-		if (zval.RegNum == xyval.RegNum + 2)
-		{
-			// The results are already in three continuous registers so just return them as-is.
-			xyval.RegCount++;
-			return xyval;
-		}
-		else
-		{
-			// The values are not in continuous registers so they need to be copied together now.
-			ExpEmit out(build, REGT_FLOAT, 4);
-			build->Emit(OP_MOVEV2, out.RegNum, xyval.RegNum);
-			build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
-			xyval.Free(build);
-			zval.Free(build);
-			return out;
-		}
+		new(tempVal + i) ExpEmit(xyzw[i]->Emit(build));
+		new(val + i) ExpEmit(EmitKonst(build, tempVal[i]));
 	}
-	else if (xyzw[0]->ValueType == TypeVector3)	// vec3+float
+
 	{
-		assert(xyzw[2] != nullptr);
-		ExpEmit tempxval = xyzw[0]->Emit(build);
-		ExpEmit tempyval = xyzw[1]->Emit(build);
-		ExpEmit tempzval = xyzw[2]->Emit(build);
-		ExpEmit xval = EmitKonst(build, tempxval);
-		ExpEmit yval = EmitKonst(build, tempyval);
-		ExpEmit zval = EmitKonst(build, tempzval);
-		assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT && zval.RegType == REGT_FLOAT);
-		if (yval.RegNum == xval.RegNum + 1 && zval.RegNum == xval.RegNum + 2)
+		bool isContinuous = true;
+
+		for (int i = 1; i < vectorElements; ++i)
 		{
-			// The results are already in three continuous registers so just return them as-is.
-			xval.RegCount += 2;
-			return xval;
-		}
-		else
-		{
-			// The values are not in continuous registers so they need to be copied together now.
-			ExpEmit out(build, REGT_FLOAT, 4);
-			//Try to optimize a bit...
-			if (yval.RegNum == xval.RegNum + 1)
+			if (val[i - 1].RegNum + val[i - 1].RegCount != val[i].RegNum)
 			{
-				build->Emit(OP_MOVEV2, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
+				isContinuous = false;
+				break;
 			}
-			else if (zval.RegNum == yval.RegNum + 1)
+		}
+
+		// all values are in continuous registers:
+		if (isContinuous)
+		{
+			val[0].RegCount = vectorDimensions;
+			return val[0];
+		}
+	}
+
+	ExpEmit out(build, REGT_FLOAT, vectorDimensions);
+
+	{
+		auto emitRegMove = [&](int regsToMove, int dstRegIndex, int srcRegIndex) {
+			assert(dstRegIndex < vectorDimensions);
+			assert(srcRegIndex < vectorDimensions);
+			assert(regsToMove > 0 && regsToMove <= 4);
+			build->Emit(regsToMove == 1 ? OP_MOVEF : OP_MOVEV2 + regsToMove - 2, out.RegNum + dstRegIndex, val[srcRegIndex].RegNum);
+			static_assert(OP_MOVEV2 + 1 == OP_MOVEV3);
+			static_assert(OP_MOVEV3 + 1 == OP_MOVEV4);
+		};
+
+		int regsToPush = 0;
+		int nextRegNum = val[0].RegNum;
+		int lastElementIndex = 0;
+		int reg = 0;
+
+		// Use larger MOVE OPs for any groups of registers that are continuous including those across individual xyzw[] elements
+		for (int elementIndex = 0; elementIndex < vectorElements; ++elementIndex)
+		{
+			int regCount = xyzw[elementIndex]->ValueType->RegCount;
+
+			if (nextRegNum != val[elementIndex].RegNum)
 			{
-				build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEV2, out.RegNum+1, yval.RegNum);
+				emitRegMove(regsToPush, reg, lastElementIndex);
+				
+				reg += regsToPush;
+				regsToPush = regCount;
+				nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount;
+				lastElementIndex = elementIndex;
 			}
 			else
 			{
-				build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
+				regsToPush += regCount;
+				nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount;
 			}
-			xval.Free(build);
-			yval.Free(build);
-			zval.Free(build);
-			return out;
+		}
+
+		// Emit move instructions on the last register
+		if (regsToPush > 0)
+		{
+			emitRegMove(regsToPush, reg, lastElementIndex);
 		}
 	}
-	else
+
+	for (int i = 0; i < vectorElements; ++i)
 	{
-		assert(xyzw[3] != nullptr);
-		ExpEmit tempxval = xyzw[0]->Emit(build);
-		ExpEmit tempyval = xyzw[1]->Emit(build);
-		ExpEmit tempzval = xyzw[2]->Emit(build);
-		ExpEmit tempwval = xyzw[3]->Emit(build);
-		ExpEmit xval = EmitKonst(build, tempxval);
-		ExpEmit yval = EmitKonst(build, tempyval);
-		ExpEmit zval = EmitKonst(build, tempzval);
-		ExpEmit wval = EmitKonst(build, tempwval);
-		assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT && zval.RegType == REGT_FLOAT && wval.RegType == REGT_FLOAT);
-		if (yval.RegNum == xval.RegNum + 1 && zval.RegNum == xval.RegNum + 2)
-		{
-			// The results are already in three continuous registers so just return them as-is.
-			xval.RegCount += 3;
-			return xval;
-		}
-		else
-		{
-			// The values are not in continuous registers so they need to be copied together now.
-			ExpEmit out(build, REGT_FLOAT, 4);
-			//Try to optimize a bit...
-			if (yval.RegNum == xval.RegNum + 1)
-			{
-				build->Emit(OP_MOVEV2, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
-			}
-			else if (zval.RegNum == yval.RegNum + 1)
-			{
-				build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEV2, out.RegNum+1, yval.RegNum);
-			}
-			else
-			{
-				build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum);
-				build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
-			}
-			xval.Free(build);
-			yval.Free(build);
-			zval.Free(build);
-			return out;
-		}
+		val[i].Free(build);
+		val[i].~ExpEmit();
 	}
+
+	return out;
 }

 //==========================================================================
@ -3196,11 +3172,11 @@ ExpEmit FxMulDiv::Emit(VMFunctionBuilder *build)
 		int op;
 		if (op2.Konst)
 		{
-			op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : OP_MULVF3_RK) : (IsVector2() ? OP_DIVVF2_RK : OP_DIVVF3_RK);
+			op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : IsVector3() ? OP_MULVF3_RK : OP_MULVF4_RK) : (IsVector2() ? OP_DIVVF2_RK : IsVector3() ? OP_DIVVF3_RK : OP_DIVVF4_RK);
 		}
 		else
 		{
-			op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : OP_MULVF3_RR) : (IsVector2() ? OP_DIVVF2_RR : OP_DIVVF3_RR);
+			op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : IsVector3() ? OP_MULVF3_RR : OP_MULVF4_RR) : (IsVector2() ? OP_DIVVF2_RR : IsVector3() ? OP_DIVVF3_RR : OP_DIVVF4_RR);
 		}
 		op1.Free(build);
 		op2.Free(build);
@ -3903,7 +3879,7 @@ ExpEmit FxCompareEq::EmitCommon(VMFunctionBuilder *build, bool forcompare, bool
 			std::swap(op1, op2);
 		}
 		assert(!op1.Konst);
-		assert(op1.RegCount >= 1 && op1.RegCount <= 3);
+		assert(op1.RegCount >= 1 && op1.RegCount <= 4);

 		ExpEmit to(build, REGT_INT);

@ -9222,15 +9198,19 @@ ExpEmit FxVectorBuiltin::Emit(VMFunctionBuilder *build)
 {
 	ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount());
 	ExpEmit op = Self->Emit(build);
+
+	const int vecSize = (Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2) ? 2 
+		: (Self->ValueType == TypeVector3 || Self->ValueType == TypeFVector3) ? 3 : 4;
+
 	if (Function == NAME_Length)
 	{
-		build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : Self->ValueType == TypeFVector3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum);
+		build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum);
 	}
 	else
 	{
 		ExpEmit len(build, REGT_FLOAT);
-		build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : Self->ValueType == TypeFVector3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum);
-		build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_DIVVF2_RR : Self->ValueType == TypeFVector3 ? OP_DIVVF3_RR : OP_DIVVF4_RR, to.RegNum, op.RegNum, len.RegNum);
+		build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, len.RegNum, op.RegNum);
+		build->Emit(vecSize == 2 ? OP_DIVVF2_RR : vecSize == 3 ? OP_DIVVF3_RR : OP_DIVVF4_RR, to.RegNum, op.RegNum, len.RegNum);
 		len.Free(build);
 	}
 	op.Free(build);
@ -10894,11 +10874,12 @@ FxLocalVariableDeclaration::FxLocalVariableDeclaration(PType *type, FName name,
 	// Local FVector isn't different from Vector
 	if (type == TypeFVector2) type = TypeVector2;
 	else if (type == TypeFVector3) type = TypeVector3;
+	else if (type == TypeFVector4) type = TypeVector4;

 	ValueType = type;
 	VarFlags = varflags;
 	Name = name;
-	RegCount = type == TypeVector2 ? 2 : type == TypeVector3 ? 3 : 1;
+	RegCount = type->RegCount;
 	Init = initval;
 	clearExpr = nullptr;
 }
--- a/src/common/scripting/backend/codegen.h
+++ b/src/common/scripting/backend/codegen.h
@ -336,7 +336,7 @@ public:
 	bool IsFloat() const { return ValueType->isFloat(); }
 	bool IsInteger() const { return ValueType->isNumeric() && ValueType->isIntCompatible(); }
 	bool IsPointer() const { return ValueType->isPointer(); }
-	bool IsVector() const { return ValueType == TypeVector2 || ValueType == TypeVector3 || ValueType == TypeFVector2 || ValueType == TypeFVector3; };
+	bool IsVector() const { return IsVector2() || IsVector3() || IsVector4(); };
 	bool IsVector2() const { return ValueType == TypeVector2 || ValueType == TypeFVector2; };
 	bool IsVector3() const { return ValueType == TypeVector3 || ValueType == TypeFVector3; };
 	bool IsVector4() const { return ValueType == TypeVector4 || ValueType == TypeFVector4; };
@ -551,7 +551,9 @@ public:

 class FxVectorValue : public FxExpression
 {
-	FxExpression *xyzw[4];
+	constexpr static int maxVectorDimensions = 4;
+
+	FxExpression *xyzw[maxVectorDimensions];
 	bool isConst;	// gets set to true if all element are const (used by function defaults parser)

 public:
@ -563,8 +565,9 @@ public:
 	FxExpression *Resolve(FCompileContext&);
 	bool isConstVector(int dim)
 	{
-		if (!isConst) return false;
-		return dim == 2 ? xyzw[2] == nullptr : xyzw[2] != nullptr;
+		if (!isConst)
+			return false;
+		return dim >= 0 && dim <= maxVectorDimensions && xyzw[dim - 1] && (dim == maxVectorDimensions || !xyzw[dim]);
 	}

 	ExpEmit Emit(VMFunctionBuilder *build);
--- a/src/common/scripting/backend/vmbuilder.cpp
+++ b/src/common/scripting/backend/vmbuilder.cpp
@ -637,6 +637,7 @@ size_t VMFunctionBuilder::Emit(int opcode, int opa, VM_SHALF opbc)
 		int chg;
 		if (opa & REGT_MULTIREG2) chg = 2;
 		else if (opa & REGT_MULTIREG3) chg = 3;
+		else if (opa & REGT_MULTIREG4) chg = 4;
 		else chg = 1;
 		ParamChange(chg);
 	}
--- a/src/common/scripting/core/types.cpp
+++ b/src/common/scripting/core/types.cpp
@ -359,12 +359,13 @@ void PType::StaticInit()
 	TypeVector4->AddField(NAME_W, TypeFloat64);
 	// allow accessing xyz as a vector3. This is not supposed to be serialized so it's marked transient
 	TypeVector4->Symbols.AddSymbol(Create<PField>(NAME_XYZ, TypeVector3, VARF_Transient, 0));
+	TypeVector4->Symbols.AddSymbol(Create<PField>(NAME_XY, TypeVector2, VARF_Transient, 0));
 	TypeTable.AddType(TypeVector4, NAME_Struct);
 	TypeVector4->loadOp = OP_LV4;
 	TypeVector4->storeOp = OP_SV4;
 	TypeVector4->moveOp = OP_MOVEV4;
 	TypeVector4->RegType = REGT_FLOAT;
-	TypeVector4->RegCount = 3;
+	TypeVector4->RegCount = 4;
 	TypeVector4->isOrdered = true;


@ -398,8 +399,9 @@ void PType::StaticInit()
 	TypeFVector4->AddField(NAME_Y, TypeFloat32);
 	TypeFVector4->AddField(NAME_Z, TypeFloat32);
 	TypeFVector4->AddField(NAME_W, TypeFloat32);
-	// allow accessing xy as a vector2
+	// allow accessing xyz as a vector3
 	TypeFVector4->Symbols.AddSymbol(Create<PField>(NAME_XYZ, TypeFVector3, VARF_Transient, 0));
+	TypeFVector4->Symbols.AddSymbol(Create<PField>(NAME_XY, TypeFVector2, VARF_Transient, 0));
 	TypeTable.AddType(TypeFVector4, NAME_Struct);
 	TypeFVector4->loadOp = OP_LFV4;
 	TypeFVector4->storeOp = OP_SFV4;
--- a/src/common/scripting/core/vmdisasm.cpp
+++ b/src/common/scripting/core/vmdisasm.cpp
@ -639,6 +639,8 @@ static int print_reg(FILE *out, int col, int arg, int mode, int immshift, const
 				return col+printf_wrapper(out, "v%d.2", regnum);
 			case REGT_FLOAT | REGT_MULTIREG3:
 				return col+printf_wrapper(out, "v%d.3", regnum);
+			case REGT_FLOAT | REGT_MULTIREG4:
+				return col+printf_wrapper(out, "v%d.4", regnum);
 			case REGT_INT | REGT_KONST:
 				return col+print_reg(out, 0, regnum, MODE_KI, 0, func);
 			case REGT_FLOAT | REGT_KONST:
--- a/src/common/scripting/frontend/zcc-parse.lemon
+++ b/src/common/scripting/frontend/zcc-parse.lemon
@ -861,6 +861,7 @@ type_name1(X) ::= DOUBLE(T).				{ X.Int = ZCC_Float64; X.SourceLoc = T.SourceLoc
 //type_name1(X) ::= STRING(T).				{ X.Int = ZCC_String; X.SourceLoc = T.SourceLoc; } // [ZZ] it's handled elsewhere. this particular line only causes troubles in the form of String.Format being invalid.
 type_name1(X) ::= VECTOR2(T).				{ X.Int = ZCC_Vector2; X.SourceLoc = T.SourceLoc; }
 type_name1(X) ::= VECTOR3(T).				{ X.Int = ZCC_Vector3; X.SourceLoc = T.SourceLoc; }
+type_name1(X) ::= VECTOR4(T).				{ X.Int = ZCC_Vector4; X.SourceLoc = T.SourceLoc; }
 type_name1(X) ::= NAME(T).					{ X.Int = ZCC_Name; X.SourceLoc = T.SourceLoc; }
 type_name1(X) ::= SOUND(T).					{ X.Int = ZCC_Sound; X.SourceLoc = T.SourceLoc; }
 type_name1(X) ::= STATE(T).					{ X.Int = ZCC_State; X.SourceLoc = T.SourceLoc; }
@ -931,7 +932,7 @@ type_name(X) ::= DOT dottable_id(A).
 /* Type names can also be used as identifiers in contexts where type names
 * are not normally allowed. */
 %fallback IDENTIFIER
-	SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY.
+	SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 VECTOR4 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY.

 /* Aggregate types */
 %type aggregate_type {ZCC_Type *}
@ -1303,6 +1304,17 @@ primary(X) ::= SUPER(T).
 	X = expr;
 }
 primary(X) ::= constant(A).				{ X = A; /*X-overwrites-A*/ }
+primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) COMMA expr(D) RPAREN. [DOT]
+{
+	NEW_AST_NODE(VectorValue, expr, A);
+	expr->Operation = PEX_Vector;
+	expr->Type = TypeVector4;
+	expr->X = A;
+	expr->Y = B;
+	expr->Z = C;
+	expr->W = D;
+	XX = expr;
+}
 primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT]
 {
 	NEW_AST_NODE(VectorValue, expr, A);
@ -1311,6 +1323,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT]
 	expr->X = A;
 	expr->Y = B;
 	expr->Z = C;
+	expr->W = nullptr;
 	XX = expr;
 }
 primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT]
@ -1321,6 +1334,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT]
 	expr->X = A;
 	expr->Y = B;
 	expr->Z = nullptr;
+	expr->W = nullptr;
 	XX = expr;
 }
 primary(X) ::= LPAREN expr(A) RPAREN.
--- a/src/common/scripting/frontend/zcc_compile.cpp
+++ b/src/common/scripting/frontend/zcc_compile.cpp
@ -2353,7 +2353,7 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
 				{
 					auto type = DetermineType(c->Type(), p, f->Name, p->Type, false, false);
 					int flags = 0;
-					if ((type->isStruct() && type != TypeVector2 && type != TypeVector3) || type->isDynArray())
+					if ((type->isStruct() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4) || type->isDynArray())
 					{
 						// Structs are being passed by pointer, but unless marked 'out' that pointer must be readonly.
 						type = NewPointer(type /*, !(p->Flags & ZCC_Out)*/);
--- a/src/common/scripting/frontend/zcc_parser.cpp
+++ b/src/common/scripting/frontend/zcc_parser.cpp
@ -1297,7 +1297,8 @@ ZCC_TreeNode *TreeNodeDeepCopy_Internal(ZCC_AST *ast, ZCC_TreeNode *orig, bool c
 		// ZCC_VectorValue
 		copy->X = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->X, true, copiedNodesList));
 		copy->Y = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->Y, true, copiedNodesList));
-		copy->Z = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList));
+		copy->Z = static_cast<ZCC_Expression*>(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList));
+		copy->W = static_cast<ZCC_Expression*>(TreeNodeDeepCopy_Internal(ast, origCasted->W, true, copiedNodesList));

 		break;
 	}
--- a/src/common/scripting/jit/jit_call.cpp
+++ b/src/common/scripting/jit/jit_call.cpp
@ -182,6 +182,13 @@ int JitCompiler::StoreCallParams()
 			}
 			numparams += 2;
 			break;
+		case REGT_FLOAT | REGT_MULTIREG4:
+			for (int j = 0; j < 4; j++)
+			{
+				cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]);
+			}
+			numparams += 3;
+			break;
 		case REGT_FLOAT | REGT_ADDROF:
 			cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double))));
 			// When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3.
@ -256,6 +263,12 @@ void JitCompiler::LoadCallResult(int type, int regnum, bool addrof)
 			cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
 			cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
 		}
+		else if (type & REGT_MULTIREG4)
+		{
+			cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
+			cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
+			cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
+		}
 		break;
 	case REGT_STRING:
 		// We don't have to do anything in this case. String values are never moved to virtual registers.
@ -408,6 +421,11 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
 					call->setArg(slot + j, regF[bc + j]);
 				numparams += 2;
 				break;
+			case REGT_FLOAT | REGT_MULTIREG4:
+				for (int j = 0; j < 4; j++)
+					call->setArg(slot + j, regF[bc + j]);
+				numparams += 3;
+				break;
 			case REGT_FLOAT | REGT_KONST:
 				tmp = newTempIntPtr();
 				tmp2 = newTempXmmSd();
@ -550,6 +568,12 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
 			cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
 			cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
 			break;
+		case REGT_FLOAT | REGT_MULTIREG4:
+			cc.movsd(regF[regnum], asmjit::x86::qword_ptr(vmframe, offsetF + regnum * sizeof(double)));
+			cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
+			cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
+			cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
+			break;
 		case REGT_STRING:
 			// We don't have to do anything in this case. String values are never moved to virtual registers.
 			break;
@ -624,6 +648,13 @@ asmjit::FuncSignature JitCompiler::CreateFuncSignature()
 				args.Push(TypeIdOf<double>::kTypeId);
 				key += "fff";
 				break;
+			case REGT_FLOAT | REGT_MULTIREG4:
+				args.Push(TypeIdOf<double>::kTypeId);
+				args.Push(TypeIdOf<double>::kTypeId);
+				args.Push(TypeIdOf<double>::kTypeId);
+				args.Push(TypeIdOf<double>::kTypeId);
+				key += "ffff";
+				break;

 			default:
 				I_Error("Unknown REGT value passed to EmitPARAM\n");
--- a/src/common/scripting/jit/jit_flow.cpp
+++ b/src/common/scripting/jit/jit_flow.cpp
@ -110,7 +110,21 @@ void JitCompiler::EmitRET()
 			if (regtype & REGT_KONST)
 			{
 				auto tmp = newTempInt64();
-				if (regtype & REGT_MULTIREG3)
+				if (regtype & REGT_MULTIREG4)
+				{
+					cc.mov(tmp, (((int64_t*)konstf)[regnum]));
+					cc.mov(x86::qword_ptr(location), tmp);
+
+					cc.mov(tmp, (((int64_t*)konstf)[regnum + 1]));
+					cc.mov(x86::qword_ptr(location, 8), tmp);
+
+					cc.mov(tmp, (((int64_t*)konstf)[regnum + 2]));
+					cc.mov(x86::qword_ptr(location, 16), tmp);
+
+					cc.mov(tmp, (((int64_t*)konstf)[regnum + 3]));
+					cc.mov(x86::qword_ptr(location, 24), tmp);
+				}
+				else if (regtype & REGT_MULTIREG3)
 				{
 					cc.mov(tmp, (((int64_t *)konstf)[regnum]));
 					cc.mov(x86::qword_ptr(location), tmp);
@ -137,7 +151,14 @@ void JitCompiler::EmitRET()
 			}
 			else
 			{
-				if (regtype & REGT_MULTIREG3)
+				if (regtype & REGT_MULTIREG4)
+				{
+					cc.movsd(x86::qword_ptr(location), regF[regnum]);
+					cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);
+					cc.movsd(x86::qword_ptr(location, 16), regF[regnum + 2]);
+					cc.movsd(x86::qword_ptr(location, 24), regF[regnum + 3]);
+				}
+				else if (regtype & REGT_MULTIREG3)
 				{
 					cc.movsd(x86::qword_ptr(location), regF[regnum]);
 					cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);
--- a/src/common/scripting/jit/jit_load.cpp
+++ b/src/common/scripting/jit/jit_load.cpp
@ -333,7 +333,7 @@ void JitCompiler::EmitLV4()
 	cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
 	cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
 	cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
-	cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 32));
+	cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
 }

 void JitCompiler::EmitLV4_R()
@ -344,7 +344,7 @@ void JitCompiler::EmitLV4_R()
 	cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
 	cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
 	cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
-	cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 32));
+	cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
 }

 void JitCompiler::EmitLFV2()
@ -403,7 +403,7 @@ void JitCompiler::EmitLFV4()
 	cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
 	cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
 	cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
-	cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 16));
+	cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
 	cc.cvtss2sd(regF[A], regF[A]);
 	cc.cvtss2sd(regF[A + 1], regF[A + 1]);
 	cc.cvtss2sd(regF[A + 2], regF[A + 2]);
@ -418,7 +418,7 @@ void JitCompiler::EmitLFV4_R()
 	cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
 	cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
 	cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
-	cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 16));
+	cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
 	cc.cvtss2sd(regF[A], regF[A]);
 	cc.cvtss2sd(regF[A + 1], regF[A + 1]);
 	cc.cvtss2sd(regF[A + 2], regF[A + 2]);
--- a/src/common/scripting/jit/jit_store.cpp
+++ b/src/common/scripting/jit/jit_store.cpp
@ -170,7 +170,7 @@ void JitCompiler::EmitSV4()
 	cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
 	cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
 	cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
-	cc.movsd(asmjit::x86::qword_ptr(tmp, 32), regF[B + 3]);
+	cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
 }

 void JitCompiler::EmitSV4_R()
@ -182,7 +182,7 @@ void JitCompiler::EmitSV4_R()
 	cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
 	cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
 	cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
-	cc.movsd(asmjit::x86::qword_ptr(tmp, 32), regF[B + 3]);
+	cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
 }

 void JitCompiler::EmitSFV2()
@ -257,7 +257,7 @@ void JitCompiler::EmitSFV4()
 	cc.cvtsd2ss(tmpF, regF[B + 2]);
 	cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
 	cc.cvtsd2ss(tmpF, regF[B + 3]);
-	cc.movss(asmjit::x86::qword_ptr(tmp, 16), tmpF);
+	cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
 }

 void JitCompiler::EmitSFV4_R()
@ -274,7 +274,7 @@ void JitCompiler::EmitSFV4_R()
 	cc.cvtsd2ss(tmpF, regF[B + 2]);
 	cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
 	cc.cvtsd2ss(tmpF, regF[B + 3]);
-	cc.movss(asmjit::x86::qword_ptr(tmp, 16), tmpF);
+	cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
 }

 void JitCompiler::EmitSBIT()
--- a/src/common/scripting/vm/vm.h
+++ b/src/common/scripting/vm/vm.h
@ -80,8 +80,9 @@ enum
 	REGT_KONST		= 4,
 	REGT_MULTIREG2	= 8,
 	REGT_MULTIREG3	= 16,	// (e.g. a vector)
-	REGT_MULTIREG	= 24,
+	REGT_MULTIREG	= 8 | 16 | 64,
 	REGT_ADDROF		= 32,	// used with PARAM: pass address of this register
+	REGT_MULTIREG4	= 64,

 	REGT_NIL		= 128	// parameter was omitted
 };
@ -130,6 +131,22 @@ struct VMReturn
 		assert(RegType == REGT_FLOAT);
 		*(double *)Location = val;
 	}
+	void SetVector4(const double val[4])
+	{
+		assert(RegType == (REGT_FLOAT|REGT_MULTIREG4));
+		((double *)Location)[0] = val[0];
+		((double *)Location)[1] = val[1];
+		((double *)Location)[2] = val[2];
+		((double *)Location)[3] = val[3];
+	}
+	void SetVector4(const DVector4 &val)
+	{
+		assert(RegType == (REGT_FLOAT | REGT_MULTIREG4));
+		((double *)Location)[0] = val[0];
+		((double *)Location)[1] = val[1];
+		((double *)Location)[2] = val[2];
+		((double *)Location)[3] = val[3];
+	}
 	void SetVector(const double val[3])
 	{
 		assert(RegType == (REGT_FLOAT|REGT_MULTIREG3));
--- a/src/common/scripting/vm/vmexec.h
+++ b/src/common/scripting/vm/vmexec.h
@ -512,6 +512,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
 			v[2] = reg.f[B+2];
 		}
 		NEXTOP;
+	OP(SV4):
+		ASSERTA(a); ASSERTF(B+3); ASSERTKD(C);
+		GETADDR(PA,KC,X_WRITE_NIL);
+		{
+			auto v = (double *)ptr;
+			v[0] = reg.f[B];
+			v[1] = reg.f[B+1];
+			v[2] = reg.f[B+2];
+			v[3] = reg.f[B+3];
+		}
+		NEXTOP;
+	OP(SV4_R):
+		ASSERTA(a); ASSERTF(B+3); ASSERTD(C);
+		GETADDR(PA,RC,X_WRITE_NIL);
+		{
+			auto v = (double *)ptr;
+			v[0] = reg.f[B];
+			v[1] = reg.f[B+1];
+			v[2] = reg.f[B+2];
+			v[3] = reg.f[B+3];
+		}
+		NEXTOP;
 	OP(SFV2):
 		ASSERTA(a); ASSERTF(B+1); ASSERTKD(C);
 		GETADDR(PA,KC,X_WRITE_NIL);
@ -550,6 +572,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
 			v[2] = (float)reg.f[B+2];
 		}
 		NEXTOP;
+	OP(SFV4):
+		ASSERTA(a); ASSERTF(B+3); ASSERTKD(C);
+		GETADDR(PA,KC,X_WRITE_NIL);
+		{
+			auto v = (float *)ptr;
+			v[0] = (float)reg.f[B];
+			v[1] = (float)reg.f[B+1];
+			v[2] = (float)reg.f[B+2];
+			v[3] = (float)reg.f[B+3];
+		}
+		NEXTOP;
+	OP(SFV4_R):
+		ASSERTA(a); ASSERTF(B+3); ASSERTD(C);
+		GETADDR(PA,RC,X_WRITE_NIL);
+		{
+			auto v = (float *)ptr;
+			v[0] = (float)reg.f[B];
+			v[1] = (float)reg.f[B+1];
+			v[2] = (float)reg.f[B+2];
+			v[3] = (float)reg.f[B+3];
+		}
+		NEXTOP;
 	OP(SBIT):
 		ASSERTA(a); ASSERTD(B);
 		GETADDR(PA,0,X_WRITE_NIL);
@ -767,6 +811,15 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
 					::new(param + 2) VMValue(reg.f[b + 2]);
 					f->NumParam += 2;
 					break;
+				case REGT_FLOAT | REGT_MULTIREG4:
+					assert(b < f->NumRegF - 3);
+					assert(f->NumParam < sfunc->MaxParam - 2);
+					::new(param) VMValue(reg.f[b]);
+					::new(param + 1) VMValue(reg.f[b + 1]);
+					::new(param + 2) VMValue(reg.f[b + 2]);
+					::new(param + 3) VMValue(reg.f[b + 3]);
+					f->NumParam += 3;
+					break;
 				case REGT_FLOAT | REGT_ADDROF:
 					assert(b < f->NumRegF);
 					::new(param) VMValue(&reg.f[b]);
@ -2173,7 +2226,11 @@ static void SetReturn(const VMRegisters &reg, VMFrame *frame, VMReturn *ret, VM_
 			assert(regnum < frame->NumRegF);
 			src = &reg.f[regnum];
 		}
-		if (regtype & REGT_MULTIREG3)
+		if (regtype & REGT_MULTIREG4)
+		{
+			ret->SetVector4((double*)src);
+		}
+		else if (regtype & REGT_MULTIREG3)
 		{
 			ret->SetVector((double *)src);
 		}
--- a/src/common/scripting/vm/vmops.h
+++ b/src/common/scripting/vm/vmops.h
@ -85,8 +85,8 @@ xx(SV2,		sv2,	RPRVKI,		SV2_R,	4, REGT_INT)		// store vector2
 xx(SV2_R,	sv2,	RPRVRI,		NOP,	0, 0)
 xx(SV3,		sv3,	RPRVKI,		SV3_R,	4, REGT_INT)		// store vector3
 xx(SV3_R,	sv3,	RPRVRI,		NOP,	0, 0)
-xx(SV4,		sv3,	RPRVKI,		SV4_R,	4, REGT_INT)		// store vector4
-xx(SV4_R,	sv3,	RPRVRI,		NOP,	0, 0)
+xx(SV4,		sv4,	RPRVKI,		SV4_R,	4, REGT_INT)		// store vector4
+xx(SV4_R,	sv4,	RPRVRI,		NOP,	0, 0)
 xx(SFV2,	sfv2,	RPRVKI,		SFV2_R,	4, REGT_INT)		// store fvector2
 xx(SFV2_R,	sfv2,	RPRVRI,		NOP,	0, 0)
 xx(SFV3,	sfv3,	RPRVKI,		SFV3_R,	4, REGT_INT)		// store fvector3
@ -265,7 +265,7 @@ xx(LENV3,		lenv3,	RFRV,		NOP,	0, 0)			// fA = vB.Length
 xx(EQV3_R,		beqv3,	CVRR,		NOP,	0, 0)			// if ((vB == vkC) != A) then pc++ (inexact if A & 33)
 xx(EQV3_K,		beqv3,	CVRK,		NOP,	0, 0)			// this will never be used.

-// Vector math (4D/Quaternion)
+// Vector math (4D)
 xx(NEGV4,		negv4,		RVRV,	NOP,	0, 0)			// vA = -vB
 xx(ADDV4_RR,	addv4,		RVRVRV,	NOP,	0, 0)		// vA = vB + vkC
 xx(SUBV4_RR,	subv4,		RVRVRV,	NOP,	0, 0)		// vA = vkB - vkC