From 94287518e011b13e08177784a8dae052c80e3f16 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 10:13:36 +0100 Subject: [PATCH 01/21] - added a virtual CanCollideWith script method that can be overridden to do class specific collision checks. This will get called for both actors taking part in a collision, if one of the two calls returns false it will immediately abort PIT_CheckThing with no collision taking place at all. --- src/actor.h | 1 - src/p_map.cpp | 57 ++++++++++++++++++++++++++++++--- wadsrc/static/zscript/actor.txt | 1 + 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/actor.h b/src/actor.h index 6718e0931..25669dfec 100644 --- a/src/actor.h +++ b/src/actor.h @@ -650,7 +650,6 @@ public: virtual void Touch(AActor *toucher); void CallTouch(AActor *toucher); - // Centaurs and ettins squeal when electrocuted, poisoned, or "holy"-ed // Made a metadata property so no longer virtual void Howl (); diff --git a/src/p_map.cpp b/src/p_map.cpp index f25c33612..5e5d2cfe9 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -46,6 +46,7 @@ #include "r_utility.h" #include "p_blockmap.h" #include "p_3dmidtex.h" +#include "virtual.h" #include "s_sound.h" #include "decallib.h" @@ -1134,6 +1135,12 @@ static bool CanAttackHurt(AActor *victim, AActor *shooter) // //========================================================================== +DEFINE_ACTION_FUNCTION(AActor, CanCollideWith) +{ + // No need to check the parameters, as they are not even used. + ACTION_RETURN_BOOL(true); +} + bool PIT_CheckThing(FMultiBlockThingsIterator &it, FMultiBlockThingsIterator::CheckResult &cres, const FBoundingBox &box, FCheckPosition &tm) { AActor *thing = cres.thing; @@ -1222,15 +1229,55 @@ bool PIT_CheckThing(FMultiBlockThingsIterator &it, FMultiBlockThingsIterator::Ch (thing->flags5 & MF5_DONTRIP) || ((tm.thing->flags6 & MF6_NOBOSSRIP) && (thing->flags2 & MF2_BOSS))) { - if (tm.thing->flags3 & thing->flags3 & MF3_DONTOVERLAP) - { // Some things prefer not to overlap each other, if possible - return unblocking; + // Some things prefer not to overlap each other, if possible (Q: Is this even needed anymore? It was just for dealing with some deficiencies in the code below in Heretic.) + if (!(tm.thing->flags3 & thing->flags3 & MF3_DONTOVERLAP)) + { + if ((tm.thing->Z() >= topz) || (tm.thing->Top() <= thing->Z())) + return true; } - if ((tm.thing->Z() >= topz) || (tm.thing->Top() <= thing->Z())) - return true; + // If they are not allowed to overlap, the rest of this function still needs to be executed. } } + // Call the script callback. This must be done before any other checks that perform some actual action or may already return a 'block'. + // The checks here are to do this only for conditions that would later result in an action, calling this for everything would be too much of a drag if + // too many scripted overrides were being used, as PIT_CheckThing is even called for touching all the monster corpses lying around. + if (((thing->flags & MF_SOLID) || (thing->flags6 & (MF6_TOUCHY | MF6_BUMPSPECIAL))) && + ((tm.thing->flags & (MF_SOLID|MF_MISSILE)) || (tm.thing->flags6 & MF6_BLOCKEDBYSOLIDACTORS) || (tm.thing->BounceFlags & BOUNCE_MBF))) + { + static unsigned VIndex = ~0u; + if (VIndex == ~0u) + { + VIndex = GetVirtualIndex(RUNTIME_CLASS(AActor), "CanCollideWith"); + assert(VIndex != ~0u); + } + + VMValue params[3] = { tm.thing, thing, false }; + VMReturn ret; + int retval; + ret.IntAt(&retval); + + auto clss = tm.thing->GetClass(); + VMFunction *func = clss->Virtuals.Size() > VIndex ? clss->Virtuals[VIndex] : nullptr; + if (func != nullptr) + { + GlobalVMStack.Call(func, params, 3, &ret, 1, nullptr); + if (!retval) return true; + } + std::swap(params[0].a, params[1].a); + params[2].i = true; + + // re-get for the other actor. + clss = thing->GetClass(); + func = clss->Virtuals.Size() > VIndex ? clss->Virtuals[VIndex] : nullptr; + if (func != nullptr) + { + GlobalVMStack.Call(func, params, 3, &ret, 1, nullptr); + if (!retval) return true; + } + } + + if (tm.thing->player == NULL || !(tm.thing->player->cheats & CF_PREDICTING)) { // touchy object is alive, toucher is solid diff --git a/wadsrc/static/zscript/actor.txt b/wadsrc/static/zscript/actor.txt index ee4c92add..1e5c39416 100644 --- a/wadsrc/static/zscript/actor.txt +++ b/wadsrc/static/zscript/actor.txt @@ -268,6 +268,7 @@ class Actor : Thinker native virtual native void Die(Actor source, Actor inflictor, int dmgflags = 0); virtual native bool Slam(Actor victim); virtual native void Touch(Actor toucher); + virtual native bool CanCollideWith(Actor other, bool passive); // This is an empty native function, it's native for the sole reason of performance as this is in a performance critical spot. // Called when an actor is to be reflected by a disc of repulsion. // Returns true to continue normal blast processing. From c99a051a2aaad2c8800fb12470857d476ae1bd9b Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Sun, 27 Nov 2016 10:42:03 +0100 Subject: [PATCH 02/21] - Added lambda feature to FString::(Strip|Replace)Chars. Use it in the other (Strip|Replace)Chars methods to show how it would work. --- src/zstring.cpp | 58 +++++++++++-------------------------------------- src/zstring.h | 36 +++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/zstring.cpp b/src/zstring.cpp index 1999f54fe..70ce9dc68 100644 --- a/src/zstring.cpp +++ b/src/zstring.cpp @@ -863,66 +863,34 @@ void FString::Insert (size_t index, const char *instr, size_t instrlen) void FString::ReplaceChars (char oldchar, char newchar) { - size_t i, j; + if (oldchar == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (Chars[i] == oldchar) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldchar](char c){ return c == oldchar; }, newchar); } void FString::ReplaceChars (const char *oldcharset, char newchar) { - size_t i, j; + if (oldcharset == NULL || oldcharset[0] == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (strchr (oldcharset, Chars[i]) != NULL) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldcharset](char c){ return strchr(oldcharset, c) != NULL; }, newchar); } void FString::StripChars (char killchar) { - size_t read, write, mylen; + if (killchar == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (Chars[read] != killchar) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killchar](char c){ return c == killchar; }); } -void FString::StripChars (const char *killchars) +void FString::StripChars (const char *killcharset) { - size_t read, write, mylen; + if (killcharset == NULL || killcharset[0] == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (strchr (killchars, Chars[read]) == NULL) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killcharset](char c){ return strchr(killcharset, c) != NULL; }); } void FString::MergeChars (char merger) diff --git a/src/zstring.h b/src/zstring.h index ba9208719..6138f38aa 100644 --- a/src/zstring.h +++ b/src/zstring.h @@ -236,11 +236,45 @@ public: void Insert (size_t index, const char *instr); void Insert (size_t index, const char *instr, size_t instrlen); + template + void ReplaceChars (Func IsOldChar, char newchar) + { + size_t i, j; + + LockBuffer(); + for (i = 0, j = Len(); i < j; ++i) + { + if (IsOldChar(Chars[i])) + { + Chars[i] = newchar; + } + } + UnlockBuffer(); + } + void ReplaceChars (char oldchar, char newchar); void ReplaceChars (const char *oldcharset, char newchar); + template + void StripChars (Func IsKillChar) + { + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ++read) + { + if (!IsKillChar(Chars[read])) + { + Chars[write++] = Chars[read]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); + } + void StripChars (char killchar); - void StripChars (const char *killchars); + void StripChars (const char *killcharset); void MergeChars (char merger); void MergeChars (char merger, char newchar); From 0cff4439450ac48e6ea37e9c9b29e242819220b9 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Wed, 30 Nov 2016 20:59:09 +0100 Subject: [PATCH 03/21] - Fixed missing linebreak in 'currentpos' error message. --- src/c_cmds.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_cmds.cpp b/src/c_cmds.cpp index f9cea4005..56739502e 100644 --- a/src/c_cmds.cpp +++ b/src/c_cmds.cpp @@ -1144,7 +1144,7 @@ CCMD(currentpos) } else { - Printf("You are not in game!"); + Printf("You are not in game!\n"); } } From 18b953dd5004e65e5a3340a54b93ab520b97b0e1 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 10:34:36 +0100 Subject: [PATCH 04/21] - fixed: Blasted objects should also be subject to being checked with CanCollideWith. --- src/p_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/p_map.cpp b/src/p_map.cpp index 5e5d2cfe9..8cbe38f0e 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -1243,7 +1243,7 @@ bool PIT_CheckThing(FMultiBlockThingsIterator &it, FMultiBlockThingsIterator::Ch // The checks here are to do this only for conditions that would later result in an action, calling this for everything would be too much of a drag if // too many scripted overrides were being used, as PIT_CheckThing is even called for touching all the monster corpses lying around. if (((thing->flags & MF_SOLID) || (thing->flags6 & (MF6_TOUCHY | MF6_BUMPSPECIAL))) && - ((tm.thing->flags & (MF_SOLID|MF_MISSILE)) || (tm.thing->flags6 & MF6_BLOCKEDBYSOLIDACTORS) || (tm.thing->BounceFlags & BOUNCE_MBF))) + ((tm.thing->flags & (MF_SOLID|MF_MISSILE)) || (tm.thing->flags2 & MF2_BLASTED) || (tm.thing->flags6 & MF6_BLOCKEDBYSOLIDACTORS) || (tm.thing->BounceFlags & BOUNCE_MBF))) { static unsigned VIndex = ~0u; if (VIndex == ~0u) From ebdc672985061681a7fcb5d79271809efbc6d8ce Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 10:45:20 +0100 Subject: [PATCH 05/21] - fixed: A_Saw was using the wrong angle for adjusting the facing direction at the end. --- wadsrc/static/zscript/doom/weaponchainsaw.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wadsrc/static/zscript/doom/weaponchainsaw.txt b/wadsrc/static/zscript/doom/weaponchainsaw.txt index 0653bc16d..88a8c87cd 100644 --- a/wadsrc/static/zscript/doom/weaponchainsaw.txt +++ b/wadsrc/static/zscript/doom/weaponchainsaw.txt @@ -148,14 +148,14 @@ extend class StateProvider if (anglediff < 0.0) { if (anglediff < -4.5) - angle = ang + 90.0 / 21; + angle = t.angleFromSource + 90.0 / 21; else angle -= 4.5; } else { if (anglediff > 4.5) - angle = ang - 90.0 / 21; + angle = t.angleFromSource - 90.0 / 21; else angle += 4.5; } From 0ed0c47a45909d18a8c230311db1f6511f641ecb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 13:57:54 +0100 Subject: [PATCH 06/21] Optimize capped sky rendering by writing 4 pixels at a time in 5 bands (solid, fade, texture, fade, solid) --- src/r_draw.cpp | 337 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 242 insertions(+), 95 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5f592223b..0f3d04884 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2201,59 +2201,126 @@ void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + int textureheight0 = bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - int textureheight0 = bufheight[0]; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } @@ -2321,66 +2388,146 @@ void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; + int textureheight0 = bufheight[0]; + uint32_t maxtextureheight1 = bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - const uint8_t *source1 = bufplce2[col]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; if (fg == 0) { uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; + fg = source1[col][sample_index2]; } + output[col] = fg; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } From 5a08768eaffe0b5559645bfe2cef5cbd53aed064 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 14:29:03 +0100 Subject: [PATCH 07/21] - removed unused label. --- src/d_main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index a2cb181f1..0f2d5af92 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -2653,7 +2653,6 @@ void D_DoomMain (void) } D_DoomLoop (); // this only returns if a 'restart' CCMD is given. -maxberestart: // // Clean up after a restart // From d0cf34890c580585ed7552d9d8c7b17b9630b7e8 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:01:48 +0100 Subject: [PATCH 08/21] - disabled R_DrawColumnHorizP_ASM because that function appears prone to crashing when rendering decals. --- src/r_draw.cpp | 2 +- src/r_draw.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0f3d04884..099904f25 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2556,7 +2556,7 @@ void R_InitColumnDrawers () { #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; diff --git a/src/r_draw.h b/src/r_draw.h index fa84e5ae9..053149382 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -202,9 +202,10 @@ extern "C" void R_DrawFuzzColumnP_ASM (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); +void R_DrawColumnHorizP_C(void); + #else -void R_DrawColumnHorizP_C (void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); void R_DrawTranslatedColumnP_C (void); From 93163d12f121704a0c70a915e5c56d500524fcad Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:31:08 +0100 Subject: [PATCH 09/21] - removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes. --- src/asm_ia32/tmap.asm | 227 ------------------------------------------ 1 file changed, 227 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index fb372d488..d9e689ee1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM: ret -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnHorizP_ASM -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnHorizP_ASM@0 -GLOBAL _R_DrawColumnHorizP_ASM -GLOBAL R_DrawColumnHorizP_ASM - - align 16 - -@R_DrawColumnHorizP_ASM@0: -_R_DrawColumnHorizP_ASM: -R_DrawColumnHorizP_ASM: - -; count = dc_yh - dc_yl; - - mov eax,[dc_yh] - mov ecx,[dc_yl] - sub eax,ecx - mov edx,[dc_x] - - jl near .leave ; count < 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - - inc eax ; make 0 count mean 0 pixels - and edx,3 - push eax - mov eax,[dc_temp] - mov esi,[dc_ctspan+edx*4] - add eax,edx - lea eax,[eax+ecx*4] ; eax = top of column in buffer - mov ebp,[dc_yh] - mov [esi],ecx - mov [esi+4],ebp - add esi,8 - mov edi,[dc_source] - mov [dc_ctspan+edx*4],esi - mov esi,[dc_iscale] - mov ecx,[dc_texturefrac] ; ecx = frac - mov dl,[edi] ; load cache - mov ebx,[esp] - and ebx,0xfffffff8 - jnz .mthan8 - -; Register usage in the following code is: -; -; eax: dest -; edi: source -; ecx: frac (16.16) -; esi: fracstep (16.16) -; ebx: add1 -; ebp: add2 -; dl: texel1 -; dh: texel2 -;[esp] count - -; there are fewer than 8 pixels to draw - - mov ebx,[esp] -.lthan8 shr ebx,1 - jnc .even - -; do one pixel before loop (little opportunity for pairing) - - mov ebp,ecx ; copy frac to ebx - add ecx,esi ; increment frac - shr ebp,16 ; shift frac over to low end - add eax,4 - mov dl,[edi+ebp] - mov [eax-4],dl - -.even test ebx,ebx - jz near .done - -.loop2 mov [esp],ebx ; save counter - mov ebx,ecx ; copy frac for texel1 to ebx - shr ebx,16 ; shift frac for texel1 to low end - add ecx,esi ; increment frac - mov ebp,ecx ; copy frac for texel2 to ebp - shr ebp,16 ; shift frac for texel2 to low end - add ecx,esi ; increment frac - mov dl,[edi+ebx] ; read texel1 - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; read texel2 - mov [eax],dl ; write texel1 - mov [eax+4],dh ; write texel2 - add eax,8 ; increment dest - dec ebx ; decrement counter - jnz .loop2 ; loop until it hits 0 - - jmp .done - -; there are more than 8 pixels to draw. position eax as close to a 32 byte -; boundary as possible, then do whatever is left. - -.mthan8 test eax,4 - jz .try2 - - mov ebp,ecx ; frac: in ebp - add ecx,esi ; step - shr ebp,16 ; frac: shift - add eax,4 ; increment dest - mov ebx,[esp] ; fetch counter - mov dl,[edi+ebp] ; tex: read - dec ebx ; decrement counter - mov [eax-4],dl ; tex: write - mov [esp],ebx ; store counter - -.try2 test eax,8 - jz .try4 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex2: read - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - sub ebx,2 ; decrement counter - add eax,8 ; increment dest - mov [esp],ebx ; store counter - -.try4 test eax,16 - jz .try8 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - shr ebx,16 ; frac3: shift - mov dh,[edi+ebp] ; tex2: read - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex4: read - sub ebx,4 ; decrement counter - mov [esp],ebx ; store counter - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add eax,16 ; increment dest - -.try8 mov ebx,[esp] ; make counter count groups of 8 - sub esp,4 - shr ebx,3 - jmp .tail8 - - align 16 - -.loop8 mov [esp],ebx ; save counter - mov ebx,ecx ; frac1: in ebx - shr ebx,16 ; frac1: shift - add ecx,esi ; step - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - mov dh,[edi+ebp] ; tex2: read - shr ebx,16 ; frac3: shift - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,ecx ; frac5: in ebx - mov dh,[edi+ebp] ; tex4: read - shr ebx,16 ; frac5: shift - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add ecx,esi ; step - mov ebp,ecx ; frac6: in ebp - shr ebp,16 ; frac6: shift - mov dl,[edi+ebx] ; tex5: read - add ecx,esi ; step - mov ebx,ecx ; frac7: in ebx - mov [eax+16],dl ; tex5: write - shr ebx,16 ; frac7: shift - mov dh,[edi+ebp] ; tex6: read - add ecx,esi ; step - mov ebp,ecx ; frac8: in ebp - mov [eax+20],dh ; tex6: write - shr ebp,16 ; frac8: shift - add eax,32 ; increment dest pointer - mov dl,[edi+ebx] ; tex7: read - mov ebx,[esp] ; fetch counter - mov [eax-8],dl ; tex7: write - mov dh,[edi+ebp] ; tex8: read - add ecx,esi ; step - mov [eax-4],dh ; tex8: write - mov dl,[eax] ; load cache - dec ebx ; decrement counter -.tail8 jnz near .loop8 ; loop if more to do - - pop ebp - mov ebx,[esp] - and ebx,7 - jnz near .lthan8 - -.done pop eax - pop esi - pop edi - pop ebx - pop ebp -.leave ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm From 8fd03bc4a314b080e563a80925c07d068cad876a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 16:57:10 +0100 Subject: [PATCH 10/21] - fixed a few prototypes. --- src/r_draw.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 053149382..7d574c1c0 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -194,10 +194,8 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnHorizP_ASM (void); extern "C" void R_DrawColumnP_ASM (void); extern "C" void R_DrawFuzzColumnP_ASM (void); - void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); @@ -208,13 +206,14 @@ void R_DrawColumnHorizP_C(void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); -void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); #endif +void R_DrawColumnHorizP_C(void); +void R_DrawTranslatedColumnP_C(void); void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 13efb349646e8de132b78e58d9b703c94758113b Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 18:05:34 +0100 Subject: [PATCH 11/21] - removed the asm version of R_DrawColumn because it doesn't provide any significant benefit. On modern systems it is actually slower than the C version, only on old ones it is marginally faster - but the overall execution time for this function is so low that even in the worst case scenario the minor loss of performance on older systems is still not relevant. --- src/asm_ia32/tmap.asm | 103 ------------------------------------------ src/r_draw.cpp | 25 +++------- src/r_draw.h | 17 +++---- 3 files changed, 13 insertions(+), 132 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index d9e689ee1..4ffcc370d 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -598,106 +598,6 @@ dmsdone add esp,8 -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnP_ASM@0 -GLOBAL _R_DrawColumnP_ASM -GLOBAL R_DrawColumnP_ASM - - align 16 - -R_DrawColumnP_ASM: -_R_DrawColumnP_ASM: -@R_DrawColumnP_ASM@0: - -; count = dc_yh - dc_yl; - - mov ecx,[dc_count] - test ecx,ecx - jle near rdcpret ; count <= 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - -; dest = ylookup[dc_yl] + dc_x + dc_destorg; - - mov edi,[dc_dest] - mov ebp,ecx - mov ebx,[dc_texturefrac] ; ebx = frac -rdcp1: sub edi,SPACEFILLER4 - mov ecx,ebx - shr ecx,16 - mov esi,[dc_source] - mov edx,[dc_iscale] - mov eax,[dc_colormap] - - cmp BYTE [CPU+66],byte 5 - jg rdcploop2 - - align 16 - -; The registers should now look like this: -; -; [31 .. 16][15 .. 8][7 .. 0] -; eax [colormap ] -; ebx [yi ][yf ] -; ecx [scratch ] -; edx [dyi ][dyf ] -; esi [source texture column ] -; edi [destination screen pointer ] -; ebp [counter ] -; - - -; Note the partial register stalls on anything better than a Pentium -; That's why there are two versions of this loop. - -rdcploop: - mov cl,[esi+ecx] ; Fetch texel - xor ch,ch - add ebx,edx ; increment frac -rdcp2: add edi,SPACEFILLER4 ; increment destination pointer - mov cl,[eax+ecx] ; colormap texel - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop ; loop - - pop esi - pop edi - pop ebx - pop ebp -rdcpret: - ret - - align 16 - -rdcploop2: - movzx ecx,byte [esi+ecx] ; Fetch texel - add ebx,edx ; increment frac - mov cl,[eax+ecx] ; colormap texel -rdcp3: add edi,SPACEFILLER4 ; increment destination pointer - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop2 ; loop - - pop esi - pop edi - pop ebx - pop ebp - ret - - - ;*---------------------------------------------------------------------- ;* ;* R_DrawFuzzColumnP @@ -1648,9 +1548,6 @@ ASM_PatchPitch: _ASM_PatchPitch: @ASM_PatchPitch@0: mov eax,[dc_pitch] - mov [rdcp1+2],eax - mov [rdcp2+2],eax - mov [rdcp3+2],eax mov [s4p+1],eax mov [a4p+1],eax mov [ac4p+1],eax diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 099904f25..7d8ebe036 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -69,16 +69,11 @@ int scaledviewwidth; // These get changed depending on the current // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); -void (*R_DrawColumn)(void); void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); void (*R_DrawSpanMasked)(void); -void (*R_DrawSpanTranslucent)(void); -void (*R_DrawSpanMaskedTranslucent)(void); -void (*R_DrawSpanAddClamp)(void); -void (*R_DrawSpanMaskedAddClamp)(void); void (*rt_map4cols)(int,int,int); // @@ -171,7 +166,6 @@ void R_InitShadeMaps() /* */ /************************************/ -#ifndef X86_ASM // // A column is a vertical slice/span from a wall texture that, // given the DOOM style restrictions on the view orientation, @@ -179,7 +173,7 @@ void R_InitShadeMaps() // Thus a special case loop for very fast rendering can // be used. It has also been used with Wolfenstein 3D. // -void R_DrawColumnP_C (void) +void R_DrawColumn (void) { int count; BYTE* dest; @@ -222,7 +216,7 @@ void R_DrawColumnP_C (void) } while (--count); } } -#endif + // [RH] Just fills a column with a color void R_FillColumnP (void) @@ -1192,7 +1186,7 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanTranslucentP_C (void) +void R_DrawSpanTranslucent (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1252,7 +1246,7 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_C (void) +void R_DrawSpanMaskedTranslucent (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1326,7 +1320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanAddClampP_C (void) +void R_DrawSpanAddClamp (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1392,7 +1386,7 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_C (void) +void R_DrawSpanMaskedAddClamp (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -2550,12 +2544,10 @@ const BYTE *R_GetColumn (FTexture *tex, int col) return tex->GetColumn (col, NULL); } - // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { #ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; @@ -2572,7 +2564,6 @@ void R_InitColumnDrawers () } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawColumn = R_DrawColumnP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; @@ -2580,10 +2571,6 @@ void R_InitColumnDrawers () R_DrawSpanMasked = R_DrawSpanMaskedP_C; rt_map4cols = rt_map4cols_c; #endif - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; } // [RH] Choose column drawers in a single place diff --git a/src/r_draw.h b/src/r_draw.h index 7d574c1c0..c5e99e610 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -65,7 +65,6 @@ extern "C" unsigned int horizspans[4]; // The span blitting interface. // Hook in assembler or system specific BLT here. -extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); @@ -103,16 +102,16 @@ void R_SetSpanSource(const BYTE *pixels); extern void (*R_DrawSpanMasked)(void); // Span drawing for translucent textures. -extern void (*R_DrawSpanTranslucent)(void); +void R_DrawSpanTranslucent(void); // Span drawing for masked, translucent textures. -extern void (*R_DrawSpanMaskedTranslucent)(void); +void R_DrawSpanMaskedTranslucent(void); // Span drawing for translucent, additive textures. -extern void (*R_DrawSpanAddClamp)(void); +void R_DrawSpanAddClamp(void); // Span drawing for masked, translucent, additive textures. -extern void (*R_DrawSpanMaskedAddClamp)(void); +void R_DrawSpanMaskedAddClamp(void); // [RH] Span blit into an interleaved intermediate buffer extern void (*R_DrawColumnHoriz)(void); @@ -193,8 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM -extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnP_ASM (void); extern "C" void R_DrawFuzzColumnP_ASM (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); @@ -204,7 +201,6 @@ void R_DrawColumnHorizP_C(void); #else -void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); @@ -212,10 +208,11 @@ void R_DrawSpanMaskedP_C (void); #endif +void R_DrawColumn(); void R_DrawColumnHorizP_C(void); void R_DrawTranslatedColumnP_C(void); -void R_DrawSpanTranslucentP_C (void); -void R_DrawSpanMaskedTranslucentP_C (void); +void R_DrawSpanTranslucent (void); +void R_DrawSpanMaskedTranslucent (void); void R_DrawTlatedLucentColumnP_C (void); #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C From c9caaf08c8f0f94c46326b85a0130d84c05dc930 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 18:38:38 +0100 Subject: [PATCH 12/21] - removed asm version of R_DrawFuzzColumn, because even on my 9 year old laptop this was significantly slower than the C version. --- src/asm_ia32/tmap.asm | 140 ------------------------------------------ src/r_draw.cpp | 8 +-- src/r_draw.h | 4 +- 3 files changed, 3 insertions(+), 149 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index 4ffcc370d..cbfadd5d1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -597,146 +597,6 @@ dmsdone add esp,8 - -;*---------------------------------------------------------------------- -;* -;* R_DrawFuzzColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawFuzzColumnP_ASM@0 -GLOBAL _R_DrawFuzzColumnP_ASM -GLOBAL R_DrawFuzzColumnP_ASM - - align 16 - -R_DrawFuzzColumnP_ASM: -_R_DrawFuzzColumnP_ASM: -@R_DrawFuzzColumnP_ASM@0: - -; Adjust borders. Low... - mov eax,[dc_yl] - push ebx - push esi - push edi - push ebp - - cmp eax,0 - jg .ylok - - mov eax,1 - nop - -; ...and high. -.ylok mov edx,[fuzzviewheight] - mov esi,[dc_yh] - cmp esi,edx - jle .yhok - - mov esi,edx - nop - -.yhok mov edx,[dc_x] - sub esi,eax ; esi = count - js near .dfcdone ; Zero length (or less) - - mov edi,[ylookup+eax*4] - mov ebx,edx - add edi,[dc_destorg] - mov eax,[NormalLight] - mov ecx,[fuzzpos] - add edi,ebx - add eax,256*6 - inc esi - mov ebp,[dc_pitch] - mov edx,FUZZTABLE - test ecx,ecx - je .fuzz0 - -; -; esi = count -; edi = dest -; ecx = fuzzpos -; eax = colormap 6 -; - -; first loop: end with fuzzpos or count 0, whichever happens first - - sub edx,ecx ; edx = # of entries left in fuzzoffset - mov ebx,esi - cmp esi,edx - jle .enuf - mov esi,edx -.enuf sub ebx,esi - mov edx,[fuzzoffset+ecx*4] - push ebx - xor ebx,ebx - -.loop1 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop1 - -; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those - - pop esi - cmp ecx,FUZZTABLE - jl .savefuzzpos - xor ecx,ecx - nop -.fuzz0 cmp esi,FUZZTABLE - jl .chunked - -.oloop lea edx,[esi-FUZZTABLE] - mov esi,FUZZTABLE - push edx - mov edx,[fuzzoffset+ecx*4] - -.iloop inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .iloop - - pop esi - xor ecx,ecx - cmp esi,FUZZTABLE - jge .oloop - -; third loop: Do whatever is left - -.chunked: - test esi,esi - jle .savefuzzpos - mov edx,[fuzzoffset+ecx*4] - nop - -.loop3 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop3 - -.savefuzzpos: - mov [fuzzpos],ecx -.dfcdone: - pop ebp - pop edi - pop esi - pop ebx - ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7d8ebe036..a48fc7195 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -69,7 +69,6 @@ int scaledviewwidth; // These get changed depending on the current // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); -void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); @@ -408,13 +407,12 @@ void R_InitFuzzTable (int fuzzoff) } } -#ifndef X86_ASM // // Creates a fuzzy image by copying pixels from adjacent ones above and below. // Used with an all black colormap, this could create the SHADOW effect, // i.e. spectres and invisible players. // -void R_DrawFuzzColumnP_C (void) +void R_DrawFuzzColumn (void) { int count; BYTE *dest; @@ -484,7 +482,6 @@ void R_DrawFuzzColumnP_C (void) fuzzpos = fuzz; } } -#endif // // R_DrawTranlucentColumn @@ -2544,12 +2541,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) return tex->GetColumn (col, NULL); } + // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { #ifdef X86_ASM R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_ASM; @@ -2564,7 +2561,6 @@ void R_InitColumnDrawers () } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; diff --git a/src/r_draw.h b/src/r_draw.h index c5e99e610..8b1c8ffbc 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -83,7 +83,7 @@ extern void setupmvline (int); extern void setuptmvline (int); // The Spectre/Invisibility effect. -extern void (*R_DrawFuzzColumn)(void); +extern void R_DrawFuzzColumn(void); // [RH] Draw shaded column extern void (*R_DrawShadedColumn)(void); @@ -192,7 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM -extern "C" void R_DrawFuzzColumnP_ASM (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); @@ -201,7 +200,6 @@ void R_DrawColumnHorizP_C(void); #else -void R_DrawFuzzColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); From f4454d2e00f3b078007e9a25b7ede8aebc7f394b Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 19:32:54 +0100 Subject: [PATCH 13/21] - minor, but very effective optimization for R_DrawSpanMasked: Do not store the texel value in a byte. Store it in a local int variable. This allows the compiler to read it with a zero extending instruction instead of using a byte reading instruction and then later having it to convert to an int anyway. This removes one instruction from the loop which results in a 10% performance increase on 32 bit. --- src/r_draw.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a48fc7195..fba01cbfe 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1037,7 +1037,7 @@ void R_SetupSpanBits(FTexture *tex) { ds_xbits--; } - if ((1 << ds_ybits) > tex->GetHeight()) + if ((1 << ds_ybits) > tex->GetHeight()) { ds_ybits--; } @@ -1048,7 +1048,7 @@ void R_SetupSpanBits(FTexture *tex) // // Draws the actual span. -#ifndef X86_ASM +//#ifndef X86_ASM void R_DrawSpanP_C (void) { dsfixed_t xfrac; @@ -1147,7 +1147,7 @@ void R_DrawSpanMaskedP_C (void) // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + int texdata; spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); texdata = source[spot]; @@ -1167,7 +1167,7 @@ void R_DrawSpanMaskedP_C (void) int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + int texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; @@ -1181,7 +1181,7 @@ void R_DrawSpanMaskedP_C (void) } while (--count); } } -#endif +//#endif void R_DrawSpanTranslucent (void) { From 86fcc3fd21ca4b66daa3f3e0d7287de6908e2ef1 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 23:53:36 +0100 Subject: [PATCH 14/21] - added a heavily optimized version of vlinec4 for x64. The original loaded everything from the global variables. While this is acceptable in 32 bit code because it has an immediate register load instruction, for 64 bit this does not exist. Accessing these variables from the stack or a register doubles the execution speed of this function and on a Core i7-3770 from 2012 is even faster than the assembly version. Right now the assembly version is still there, pending a benchmark run on an older 64 bit system. --- src/r_draw.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index fba01cbfe..0e217c2d7 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1673,6 +1673,7 @@ DWORD vlinec1 () return frac; } +#ifndef _M_X64 void vlinec4 () { BYTE *dest = dc_dest; @@ -1689,6 +1690,43 @@ void vlinec4 () dest += dc_pitch; } while (--count); } +#else +// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though +// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them. +void vlinec4() +{ + BYTE *dest = dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + auto pal0 = palookupoffse[0]; + auto pal1 = palookupoffse[1]; + auto pal2 = palookupoffse[2]; + auto pal3 = palookupoffse[3]; + auto buf0 = bufplce[0]; + auto buf1 = bufplce[1]; + auto buf2 = bufplce[2]; + auto buf3 = bufplce[3]; + const auto vince0 = vince[0]; + const auto vince1 = vince[1]; + const auto vince2 = vince[2]; + const auto vince3 = vince[3]; + auto vplce0 = vplce[0]; + auto vplce1 = vplce[1]; + auto vplce2 = vplce[2]; + auto vplce3 = vplce[3]; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += dc_pitch; + } while (--count); +} +#endif + #endif void setupmvline (int fracbits) From f6fb27b6835f586a381d86e6d212ba7626546a7d Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 00:46:58 +0100 Subject: [PATCH 15/21] - deleted rt_copy*col and rt_map*col assembly versions after running benchmarks that show inferior performance to the C++ versions on both older and newer CPUs. --- src/asm_ia32/tmap.asm | 421 ------------------------------------------ src/r_draw.cpp | 11 +- src/r_draw.h | 23 +-- src/r_drawt.cpp | 10 +- 4 files changed, 11 insertions(+), 454 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index cbfadd5d1..2096b9222 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -597,427 +597,6 @@ dmsdone add esp,8 -;*---------------------------------------------------------------------- -;* -;* rt_copy1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy1col_asm@16 -GLOBAL _rt_copy1col_asm -GLOBAL rt_copy1col_asm - - align 16 - -rt_copy1col_asm: -_rt_copy1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_copy1col_asm@16: - mov eax, [esp+4] - push ebx - mov ebx, [esp+12] - push esi - sub ebx, eax - push edi - js .done - - lea esi,[eax*4] - inc ebx ; ebx = count - mov eax,edx - add ecx,esi - mov edi,[ylookup+esi] - add ecx,[dc_temp] ; ecx = source - mov esi,[dc_pitch] ; esi = pitch - add eax,edi ; eax = dest - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ecx] - add ecx,4 - mov [eax],dl - add eax,esi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ecx] - mov dh,[ecx+4] - mov [eax],dl - mov [eax+esi],dh - add ecx,8 - lea eax,[eax+esi*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_copy4cols_asm -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy4cols_asm@12 -GLOBAL _rt_copy4cols_asm -GLOBAL rt_copy4cols_asm - - align 16 - -rt_copy4cols_asm: -_rt_copy4cols_asm: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_copy4cols_asm@12: - push ebx - mov ebx,[esp+8] - push esi - sub ebx,edx - push edi - js .done - - inc ebx ; ebx = count - mov eax,ecx - mov esi,[ylookup+edx*4] - mov ecx,[dc_temp] - add eax,esi ; eax = dest - add eax,[dc_destorg] - lea ecx,[ecx+edx*4] ; ecx = source - mov edx,[dc_pitch] ; edx = pitch - - shr ebx,1 - jnc .even - - mov esi,[ecx] - add ecx,4 - mov [eax],esi - add eax,edx - -.even and ebx,ebx - jz .done - -.loop mov esi,[ecx] - mov edi,[ecx+4] - mov [eax],esi - mov [eax+edx],edi - add ecx,8 - lea eax,[eax+edx*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 4 - -;*---------------------------------------------------------------------- -;* -;* rt_map1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map1col_asm@16 -GLOBAL _rt_map1col_asm -GLOBAL rt_map1col_asm - - align 16 - -rt_map1col_asm: -_rt_map1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_map1col_asm@16: - mov eax,[esp+4] - push ebx - mov ebx,[esp+12] - push ebp - push esi - sub ebx, eax - push edi - js .done - - lea edi,[eax*4] - mov esi,[dc_colormap] ; esi = colormap - inc ebx ; ebx = count - mov eax,edx - lea ebp,[ecx+edi] ; ebp = source - add ebp,[dc_temp] - mov ecx,[ylookup+edi] - mov edi,[dc_pitch] ; edi = pitch - add eax,ecx ; eax = dest - xor ecx,ecx - xor edx,edx - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ebp] - add ebp,4 - mov dl,[esi+edx] - mov [eax],dl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ebp] - mov cl,[ebp+4] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+edi],cl - dec ebx - lea eax,[eax+edi*2] - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_map4cols_asm -;* -;* rt_map4cols_asm1 is for PPro and above -;* rt_map4cols_asm2 is for Pentium and below -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map4cols_asm1@12 -GLOBAL _rt_map4cols_asm1 -GLOBAL rt_map4cols_asm1 - - align 16 - -rt_map4cols_asm1: -_rt_map4cols_asm1: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm1@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-6] - mov cl,[ebp-5] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - mov dl,[ebp-4] - mov cl,[ebp-3] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi],dl - mov [eax+edi+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi+2],dl - mov [eax+edi+3],cl - lea eax,[eax+edi*2] - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - -GLOBAL @rt_map4cols_asm2@12 -GLOBAL _rt_map4cols_asm2 -GLOBAL rt_map4cols_asm2 - - align 16 - -rt_map4cols_asm2: -_rt_map4cols_asm2: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm2@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp+3] - mov ch,[esi+edx] - mov dl,[ebp+2] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+1] - mov ch,[esi+edx] - mov dl,[ebp] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - - mov dl,[ebp+7] - mov ch,[esi+edx] - mov dl,[ebp+6] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+5] - mov ch,[esi+edx] - mov dl,[ebp+4] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - add ebp,8 - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - - align 16 GLOBAL rt_shaded4cols_asm GLOBAL _rt_shaded4cols_asm diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0e217c2d7..7e966c8ab 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -73,7 +73,6 @@ void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); void (*R_DrawSpanMasked)(void); -void (*rt_map4cols)(int,int,int); // // R_DrawColumn @@ -2589,21 +2588,13 @@ void R_InitColumnDrawers () R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_ASM; R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) - { - rt_map4cols = rt_map4cols_asm2; - } - else - { - rt_map4cols = rt_map4cols_asm1; - } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols_c; + rt_map4cols = rt_map4cols; #endif } diff --git a/src/r_draw.h b/src/r_draw.h index 8b1c8ffbc..6713d4091 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -120,16 +120,19 @@ extern void (*R_DrawColumnHoriz)(void); void R_InitColumnDrawers (); // [RH] Moves data from the temporary buffer to the screen. + +void rt_copy1col(int hx, int sx, int yl, int yh); +void rt_copy4cols(int sx, int yl, int yh); +void rt_map4cols(int sx, int yl, int yh); + extern "C" { -void rt_copy1col_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_c (int sx, int yl, int yh); void rt_shaded1col (int hx, int sx, int yl, int yh); void rt_shaded4cols_c (int sx, int yl, int yh); void rt_shaded4cols_asm (int sx, int yl, int yh); -void rt_map1col_c (int hx, int sx, int yl, int yh); +void rt_map1col (int hx, int sx, int yl, int yh); void rt_add1col (int hx, int sx, int yl, int yh); void rt_addclamp1col (int hx, int sx, int yl, int yh); void rt_subclamp1col (int hx, int sx, int yl, int yh); @@ -141,7 +144,6 @@ void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); -void rt_map4cols_c (int sx, int yl, int yh); void rt_add4cols_c (int sx, int yl, int yh); void rt_addclamp4cols_c (int sx, int yl, int yh); void rt_subclamp4cols (int sx, int yl, int yh); @@ -153,29 +155,16 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh); void rt_tlatesubclamp4cols (int sx, int yl, int yh); void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); -void rt_copy1col_asm (int hx, int sx, int yl, int yh); -void rt_map1col_asm (int hx, int sx, int yl, int yh); - -void rt_copy4cols_asm (int sx, int yl, int yh); -void rt_map4cols_asm1 (int sx, int yl, int yh); -void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); } -extern void (*rt_map4cols)(int sx, int yl, int yh); #ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm #define rt_shaded4cols rt_shaded4cols_asm #define rt_add4cols rt_add4cols_asm #define rt_addclamp4cols rt_addclamp4cols_asm #else -#define rt_copy1col rt_copy1col_c -#define rt_copy4cols rt_copy4cols_c -#define rt_map1col rt_map1col_c #define rt_shaded4cols rt_shaded4cols_c #define rt_add4cols rt_add4cols_c #define rt_addclamp4cols rt_addclamp4cols_c diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index cb228cce0..a4f581d12 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -69,9 +69,8 @@ extern "C" void R_SetupAddCol(); extern "C" void R_SetupAddClampCol(); #endif -#ifndef X86_ASM // Copies one span at hx to the screen at sx. -void rt_copy1col_c (int hx, int sx, int yl, int yh) +void rt_copy1col (int hx, int sx, int yl, int yh) { BYTE *source; BYTE *dest; @@ -112,7 +111,7 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_c (int sx, int yl, int yh) +void rt_copy4cols (int sx, int yl, int yh) { int *source; int *dest; @@ -145,7 +144,7 @@ void rt_copy4cols_c (int sx, int yl, int yh) } // Maps one span at hx to the screen at sx. -void rt_map1col_c (int hx, int sx, int yl, int yh) +void rt_map1col (int hx, int sx, int yl, int yh) { BYTE *colormap; BYTE *source; @@ -180,7 +179,7 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) } // Maps all four spans to the screen starting at sx. -void rt_map4cols_c (int sx, int yl, int yh) +void rt_map4cols (int sx, int yl, int yh) { BYTE *colormap; BYTE *source; @@ -222,7 +221,6 @@ void rt_map4cols_c (int sx, int yl, int yh) dest += pitch*2; } while (--count); } -#endif void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { From c0a622eb544f0b99de1ff5d74e3e7abcaba7ccae Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 02:08:26 +0100 Subject: [PATCH 16/21] - removed pointless assignment. --- src/r_draw.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7e966c8ab..6f58ec2a3 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2594,7 +2594,6 @@ void R_InitColumnDrawers () R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols; #endif } From 7624973ef331409d635de8447779524cc79863e7 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 11:50:41 +0100 Subject: [PATCH 17/21] - updated list of render styles in UDMF spec. --- specs/udmf_zdoom.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/specs/udmf_zdoom.txt b/specs/udmf_zdoom.txt index 9662af1bb..835f3780e 100644 --- a/specs/udmf_zdoom.txt +++ b/specs/udmf_zdoom.txt @@ -252,9 +252,10 @@ Note: All fields default to false unless mentioned otherwise. // negative values are used as their absolute. Default = 1. renderstyle = ; // Set per-actor render style, overriding the class default. Possible values can be "normal", - // "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil", - // "translucent", "fuzzy", "optfuzzy", "soultrans". Default is an empty string for no change. - fillcolor = ; // Fill color used by the "stencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000. + // "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil", + // "addstencil", "shaded", "addshaded", "translucent", "fuzzy", "optfuzzy", "soultrans" and "shadow". + // Default is an empty string for no change. + fillcolor = ; // Fill color used by the "stencil", "addstencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000. alpha = ; // Translucency of this actor (if applicable to renderstyle), default is 1.0. score = ; // Score value of this actor, overriding the class default if not null. Default = 0. pitch = ; // Pitch of thing in degrees. Default = 0 (horizontal). From f722967abe036476c8aaca5e01f787308a881f13 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 13:24:42 +0100 Subject: [PATCH 18/21] - added automatic type deduction capabilities for local variables. If you type 'let variable = value;' the type of 'variable' will be deduced from the given value. This is mostly of interest for type casting pointers, because 'let p = Classtype(objectvar);' does not require writing the class type name twice. --- src/dobjtype.cpp | 2 ++ src/dobjtype.h | 3 ++- src/namedef.h | 1 + src/sc_man_scanner.re | 1 + src/sc_man_tokens.h | 1 + src/scripting/codegeneration/codegen.cpp | 30 ++++++++++++++++++++++-- src/scripting/zscript/zcc-parse.lemon | 1 + src/scripting/zscript/zcc_compile.cpp | 4 ++++ src/scripting/zscript/zcc_parser.cpp | 1 + src/scripting/zscript/zcc_parser.h | 1 + 10 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/dobjtype.cpp b/src/dobjtype.cpp index 5e1316d71..ae26bc37d 100644 --- a/src/dobjtype.cpp +++ b/src/dobjtype.cpp @@ -74,6 +74,7 @@ TArray PClass::AllClasses; bool PClass::bShutdown; PErrorType *TypeError; +PErrorType *TypeAuto; PVoidType *TypeVoid; PInt *TypeSInt8, *TypeUInt8; PInt *TypeSInt16, *TypeUInt16; @@ -570,6 +571,7 @@ void PType::StaticInit() // Create types and add them type the type table. TypeTable.AddType(TypeError = new PErrorType); + TypeTable.AddType(TypeAuto = new PErrorType(2)); TypeTable.AddType(TypeVoid = new PVoidType); TypeTable.AddType(TypeSInt8 = new PInt(1, false)); TypeTable.AddType(TypeUInt8 = new PInt(1, true)); diff --git a/src/dobjtype.h b/src/dobjtype.h index ad626ee8b..c310f9863 100644 --- a/src/dobjtype.h +++ b/src/dobjtype.h @@ -370,7 +370,7 @@ class PErrorType : public PType { DECLARE_CLASS(PErrorType, PType); public: - PErrorType() : PType(0, 1) {} + PErrorType(int which = 1) : PType(0, which) {} }; class PVoidType : public PType @@ -930,6 +930,7 @@ PPrototype *NewPrototype(const TArray &rettypes, const TArray // Built-in types ----------------------------------------------------------- extern PErrorType *TypeError; +extern PErrorType *TypeAuto; extern PVoidType *TypeVoid; extern PInt *TypeSInt8, *TypeUInt8; extern PInt *TypeSInt16, *TypeUInt16; diff --git a/src/namedef.h b/src/namedef.h index c3866a0be..62d513437 100644 --- a/src/namedef.h +++ b/src/namedef.h @@ -721,6 +721,7 @@ xx(State) xx(Fixed) xx(Vector2) xx(Vector3) +xx(let) xx(Min) xx(Max) diff --git a/src/sc_man_scanner.re b/src/sc_man_scanner.re index a57cc34ad..8e4b42f69 100644 --- a/src/sc_man_scanner.re +++ b/src/sc_man_scanner.re @@ -183,6 +183,7 @@ std2: 'deprecated' { RET(TK_Deprecated); } 'action' { RET(TK_Action); } 'readonly' { RET(TK_ReadOnly); } + 'let' { RET(TK_Let); } /* Actor state options */ 'bright' { RET(StateOptions ? TK_Bright : TK_Identifier); } diff --git a/src/sc_man_tokens.h b/src/sc_man_tokens.h index c1ba10044..efa479cf3 100644 --- a/src/sc_man_tokens.h +++ b/src/sc_man_tokens.h @@ -142,4 +142,5 @@ xx(TK_NoDelay, "'nodelay'") xx(TK_Offset, "'offset'") xx(TK_Slow, "'slow'") xx(TK_Bright, "'bright'") +xx(TK_Let, "'let'") #undef xx diff --git a/src/scripting/codegeneration/codegen.cpp b/src/scripting/codegeneration/codegen.cpp index a0491f69d..0402ed9ab 100644 --- a/src/scripting/codegeneration/codegen.cpp +++ b/src/scripting/codegeneration/codegen.cpp @@ -9964,17 +9964,43 @@ FxExpression *FxLocalVariableDeclaration::Resolve(FCompileContext &ctx) delete this; return nullptr; } - if (ValueType->RegType == REGT_NIL) + if (ValueType->RegType == REGT_NIL && ValueType != TypeAuto) { auto sfunc = static_cast(ctx.Function->Variants[0].Implementation); StackOffset = sfunc->AllocExtraStack(ValueType); // Todo: Process the compound initializer once implemented. + if (Init != nullptr) + { + ScriptPosition.Message(MSG_ERROR, "Cannot initialize non-scalar variable %s here", Name.GetChars()); + delete this; + return nullptr; + } } - else + else if (ValueType !=TypeAuto) { if (Init) Init = new FxTypeCast(Init, ValueType, false); SAFE_RESOLVE_OPT(Init, ctx); } + else + { + if (Init == nullptr) + { + ScriptPosition.Message(MSG_ERROR, "Automatic type deduction requires an initializer for variable %s", Name.GetChars()); + delete this; + return nullptr; + } + SAFE_RESOLVE_OPT(Init, ctx); + if (Init->ValueType->RegType == REGT_NIL) + { + ScriptPosition.Message(MSG_ERROR, "Cannot initialize non-scalar variable %s here", Name.GetChars()); + delete this; + return nullptr; + } + ValueType = Init->ValueType; + // check for undersized ints and floats. These are not allowed as local variables. + if (IsInteger() && ValueType->Align < sizeof(int)) ValueType = TypeSInt32; + else if (IsFloat() && ValueType->Align < sizeof(double)) ValueType = TypeFloat64; + } if (Name != NAME_None) { for (auto l : ctx.Block->LocalVars) diff --git a/src/scripting/zscript/zcc-parse.lemon b/src/scripting/zscript/zcc-parse.lemon index 5ba5a204e..9305b9f71 100644 --- a/src/scripting/zscript/zcc-parse.lemon +++ b/src/scripting/zscript/zcc-parse.lemon @@ -704,6 +704,7 @@ type_name1(X) ::= NAME(T). { X.Int = ZCC_Name; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= SOUND(T). { X.Int = ZCC_Sound; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= STATE(T). { X.Int = ZCC_State; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= COLOR(T). { X.Int = ZCC_Color; X.SourceLoc = T.SourceLoc; } +type_name1(X) ::= LET(T). { X.Int = ZCC_Let; X.SourceLoc = T.SourceLoc; } type_name(X) ::= type_name1(A). { diff --git a/src/scripting/zscript/zcc_compile.cpp b/src/scripting/zscript/zcc_compile.cpp index 98a1a5ff3..856802a97 100644 --- a/src/scripting/zscript/zcc_compile.cpp +++ b/src/scripting/zscript/zcc_compile.cpp @@ -1449,6 +1449,10 @@ PType *ZCCCompiler::DetermineType(PType *outertype, ZCC_TreeNode *field, FName n retval = TypeSound; break; + case ZCC_Let: + retval = TypeAuto; + break; + case ZCC_UserType: // statelabel et.al. are not tokens - there really is no need to, it works just as well as an identifier. Maybe the same should be done for some other types, too? switch (btype->UserType->Id) diff --git a/src/scripting/zscript/zcc_parser.cpp b/src/scripting/zscript/zcc_parser.cpp index 8a1618127..44f72da28 100644 --- a/src/scripting/zscript/zcc_parser.cpp +++ b/src/scripting/zscript/zcc_parser.cpp @@ -201,6 +201,7 @@ static void InitTokenMap() TOKENDEF2(TK_State, ZCC_STATE, NAME_State); TOKENDEF2(TK_Color, ZCC_COLOR, NAME_Color); TOKENDEF2(TK_Sound, ZCC_SOUND, NAME_Sound); + TOKENDEF2(TK_Let, ZCC_LET, NAME_let); TOKENDEF (TK_Identifier, ZCC_IDENTIFIER); TOKENDEF (TK_StringConst, ZCC_STRCONST); diff --git a/src/scripting/zscript/zcc_parser.h b/src/scripting/zscript/zcc_parser.h index 7a763b2c1..d02b1a12f 100644 --- a/src/scripting/zscript/zcc_parser.h +++ b/src/scripting/zscript/zcc_parser.h @@ -131,6 +131,7 @@ enum EZCCBuiltinType ZCC_Sound, ZCC_UserType, + ZCC_Let, ZCC_NUM_BUILT_IN_TYPES }; From 092461ed34024ebb8899f757e04e3890c22f4b76 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 14:52:34 +0100 Subject: [PATCH 19/21] - make dynamic object casts a dedicated VM instruction instead of a builtin function. This can see some heavy use in iterators where saving several hundreds of function calls can be achieved. In these cases, using a function to do the job will become a significant time waster. --- src/scripting/codegeneration/codegen.cpp | 95 +++++++----------------- src/scripting/codegeneration/codegen.h | 3 +- src/scripting/vm/vmexec.h | 12 +++ src/scripting/vm/vmops.h | 2 + wadsrc/static/zscript/doom/bossbrain.txt | 2 +- wadsrc/static/zscript/raven/minotaur.txt | 4 +- 6 files changed, 45 insertions(+), 73 deletions(-) diff --git a/src/scripting/codegeneration/codegen.cpp b/src/scripting/codegeneration/codegen.cpp index 0402ed9ab..3895441d8 100644 --- a/src/scripting/codegeneration/codegen.cpp +++ b/src/scripting/codegeneration/codegen.cpp @@ -4324,58 +4324,32 @@ FxExpression *FxTypeCheck::Resolve(FCompileContext& ctx) // //========================================================================== -PPrototype *FxTypeCheck::ReturnProto() +ExpEmit FxTypeCheck::EmitCommon(VMFunctionBuilder *build) { - EmitTail = true; - return FxExpression::ReturnProto(); + ExpEmit castee = left->Emit(build); + ExpEmit casttype = right->Emit(build); + castee.Free(build); + casttype.Free(build); + ExpEmit ares(build, REGT_POINTER); + build->Emit(casttype.Konst ? OP_DYNCAST_K : OP_DYNCAST_R, ares.RegNum, castee.RegNum, casttype.RegNum); + return ares; } - -//========================================================================== -// -// -// -//========================================================================== - -int BuiltinTypeCheck(VMValue *param, TArray &defaultparam, int numparam, VMReturn *ret, int numret) -{ - assert(numparam == 2); - PARAM_POINTER_AT(0, obj, DObject); - PARAM_CLASS_AT(1, cls, DObject); - ACTION_RETURN_BOOL(obj && obj->IsKindOf(cls)); -} - -//========================================================================== -// -// -// -//========================================================================== - ExpEmit FxTypeCheck::Emit(VMFunctionBuilder *build) { - EmitParameter(build, left, ScriptPosition); - EmitParameter(build, right, ScriptPosition); + ExpEmit ares = EmitCommon(build); + ares.Free(build); + ExpEmit bres(build, REGT_INT); + build->Emit(OP_CASTB, bres.RegNum, ares.RegNum, CASTB_A); + return bres; +} - - PSymbol *sym = FindBuiltinFunction(NAME_BuiltinTypeCheck, BuiltinTypeCheck); - - assert(sym->IsKindOf(RUNTIME_CLASS(PSymbolVMFunction))); - assert(((PSymbolVMFunction *)sym)->Function != nullptr); - auto callfunc = ((PSymbolVMFunction *)sym)->Function; - - int opcode = (EmitTail ? OP_TAIL_K : OP_CALL_K); - build->Emit(opcode, build->GetConstantAddress(callfunc, ATAG_OBJECT), 2, 1); - - if (EmitTail) - { - ExpEmit call; - call.Final = true; - return call; - } - - ExpEmit out(build, REGT_INT); - build->Emit(OP_RESULT, 0, REGT_INT, out.RegNum); - return out; +void FxTypeCheck::EmitCompare(VMFunctionBuilder *build, bool invert, TArray &patchspots_yes, TArray &patchspots_no) +{ + ExpEmit ares = EmitCommon(build); + ares.Free(build); + build->Emit(OP_EQA_K, !invert, ares.RegNum, build->GetConstantAddress(nullptr, ATAG_OBJECT)); + patchspots_no.Push(build->Emit(OP_JMP, 0)); } //========================================================================== @@ -4443,27 +4417,11 @@ FxExpression *FxDynamicCast::Resolve(FCompileContext& ctx) ExpEmit FxDynamicCast::Emit(VMFunctionBuilder *build) { - ExpEmit in = expr->Emit(build); - ExpEmit out = in.Fixed ? ExpEmit(build, in.RegType) : in; - ExpEmit check(build, REGT_INT); - assert(out.RegType == REGT_POINTER); - - if (in.Fixed) build->Emit(OP_MOVEA, out.RegNum, in.RegNum); - build->Emit(OP_PARAM, 0, REGT_POINTER, in.RegNum); - build->Emit(OP_PARAM, 0, REGT_POINTER | REGT_KONST, build->GetConstantAddress(CastType, ATAG_OBJECT)); - - PSymbol *sym = FindBuiltinFunction(NAME_BuiltinTypeCheck, BuiltinTypeCheck); - assert(sym->IsKindOf(RUNTIME_CLASS(PSymbolVMFunction))); - assert(((PSymbolVMFunction *)sym)->Function != nullptr); - auto callfunc = ((PSymbolVMFunction *)sym)->Function; - - build->Emit(OP_CALL_K, build->GetConstantAddress(callfunc, ATAG_OBJECT), 2, 1); - build->Emit(OP_RESULT, 0, REGT_INT, check.RegNum); - build->Emit(OP_EQ_K, 0, check.RegNum, build->GetConstantInt(0)); - auto patch = build->Emit(OP_JMP, 0); - build->Emit(OP_LKP, out.RegNum, build->GetConstantAddress(nullptr, ATAG_OBJECT)); - build->BackpatchToHere(patch); - return out; + ExpEmit castee = expr->Emit(build); + castee.Free(build); + ExpEmit ares(build, REGT_POINTER); + build->Emit(OP_DYNCAST_K, ares.RegNum, castee.RegNum, build->GetConstantAddress(CastType, ATAG_OBJECT)); + return ares; } //========================================================================== @@ -9586,8 +9544,7 @@ int BuiltinNameToClass(VMValue *param, TArray &defaultparam, int numpar if (!cls->IsDescendantOf(desttype)) { - // Let the caller check this. The message can be enabled for diagnostic purposes. - DPrintf(DMSG_SPAMMY, "class '%s' is not compatible with '%s'\n", clsname.GetChars(), desttype->TypeName.GetChars()); + // Let the caller check this. Making this an error with a message is only taking away options from the user. cls = nullptr; } ret->SetPointer(const_cast(cls), ATAG_OBJECT); diff --git a/src/scripting/codegeneration/codegen.h b/src/scripting/codegeneration/codegen.h index 49999206d..c18754cb8 100644 --- a/src/scripting/codegeneration/codegen.h +++ b/src/scripting/codegeneration/codegen.h @@ -1078,9 +1078,10 @@ public: FxTypeCheck(FxExpression*, FxExpression*); ~FxTypeCheck(); FxExpression *Resolve(FCompileContext&); - PPrototype *ReturnProto(); + ExpEmit EmitCommon(VMFunctionBuilder *build); ExpEmit Emit(VMFunctionBuilder *build); + void EmitCompare(VMFunctionBuilder *build, bool invert, TArray &patchspots_yes, TArray &patchspots_no); }; //========================================================================== diff --git a/src/scripting/vm/vmexec.h b/src/scripting/vm/vmexec.h index 4152facc1..76a7156cb 100644 --- a/src/scripting/vm/vmexec.h +++ b/src/scripting/vm/vmexec.h @@ -414,6 +414,18 @@ begin: reg.f[a+1] = reg.f[B+1]; reg.f[a+2] = reg.f[B+2]; NEXTOP; + OP(DYNCAST_R) : + ASSERTA(a); ASSERTA(B); ASSERTA(C); + b = B; + reg.a[a] = (reg.a[b] && ((DObject*)(reg.a[b]))->IsKindOf((PClass*)(reg.a[C]))) ? reg.a[b] : nullptr; + reg.atag[a] = ATAG_OBJECT; + NEXTOP; + OP(DYNCAST_K) : + ASSERTA(a); ASSERTA(B); ASSERTKA(C); + b = B; + reg.a[a] = (reg.a[b] && ((DObject*)(reg.a[b]))->IsKindOf((PClass*)(konsta[C].o))) ? reg.a[b] : nullptr; + reg.atag[a] = ATAG_OBJECT; + NEXTOP; OP(CAST): if (C == CAST_I2F) { diff --git a/src/scripting/vm/vmops.h b/src/scripting/vm/vmops.h index 826bdb4e3..889706726 100644 --- a/src/scripting/vm/vmops.h +++ b/src/scripting/vm/vmops.h @@ -84,6 +84,8 @@ xx(MOVEV2, mov2, RFRF, NOP, 0, 0), // fA = fB (2 elements) xx(MOVEV3, mov3, RFRF, NOP, 0, 0), // fA = fB (3 elements) xx(CAST, cast, CAST, NOP, 0, 0), // xA = xB, conversion specified by C xx(CASTB, castb, CAST, NOP, 0, 0), // xA = !!xB, type specified by C +xx(DYNCAST_R, dyncast, RPRPRP, NOP, 0, 0), // aA = dyn_cast(aB); +xx(DYNCAST_K, dyncast, RPRPKP, NOP, 0, 0), // aA = dyn_cast(aB); // Control flow. xx(TEST, test, RII16, NOP, 0, 0), // if (dA != BC) then pc++ diff --git a/wadsrc/static/zscript/doom/bossbrain.txt b/wadsrc/static/zscript/doom/bossbrain.txt index 2a3f39265..f22e4dd2b 100644 --- a/wadsrc/static/zscript/doom/bossbrain.txt +++ b/wadsrc/static/zscript/doom/bossbrain.txt @@ -193,7 +193,7 @@ extend class Actor if (sv_killbossmonst) { int count; // Repeat until we have no more boss-spawned monsters. - ThinkerIterator it = ThinkerIterator.Create(); + ThinkerIterator it = ThinkerIterator.Create("Actor"); do // (e.g. Pain Elementals can spawn more to kill upon death.) { Actor mo; diff --git a/wadsrc/static/zscript/raven/minotaur.txt b/wadsrc/static/zscript/raven/minotaur.txt index 5e3d0cfe3..01cd63541 100644 --- a/wadsrc/static/zscript/raven/minotaur.txt +++ b/wadsrc/static/zscript/raven/minotaur.txt @@ -388,7 +388,7 @@ class Minotaur : Actor // In case pain caused him to skip his fade in. A_SetRenderStyle(1, STYLE_Normal); - MinotaurFriend mf = MinotaurFriend(self); + let mf = MinotaurFriend(self); if (mf) { if (mf.StartTime >= 0 && (level.maptime - mf.StartTime) >= MAULATORTICS) @@ -501,7 +501,7 @@ class Minotaur : Actor void A_MinotaurChase() { - MinotaurFriend mf = MinotaurFriend(self); + let mf = MinotaurFriend(self); if (!mf) { A_Chase(); From daaa6e7831b0c4c0424e530d763e653b97af459c Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 6 Dec 2016 00:00:56 +0100 Subject: [PATCH 20/21] - fixed bad parameter in AActor::ModifyDamage. --- src/p_mobj.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 88653fca9..b527bf681 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -7418,7 +7418,7 @@ DEFINE_ACTION_FUNCTION(AActor, ClearCounters) int AActor::GetModifiedDamage(FName damagetype, int damage, bool passive) { if (Inventory != nullptr) - Inventory->ModifyDamage(damage, damagetype, damage, false); + Inventory->ModifyDamage(damage, damagetype, damage, passive); return damage; } From b2d1b0d7a6dd7c459050db7a0c092359370fbb35 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 6 Dec 2016 11:04:54 +0100 Subject: [PATCH 21/21] - fixed: FTranslatedLineTarget::angleFromSource returned the attack angle, not the angle between actors when returned from P_LineAttack. For most attack functions this is wrong, it's only the Hexen fighter attack needing this particular value, so it has been split up into two return values now. --- src/actor.h | 1 + src/p_map.cpp | 5 ++++- wadsrc/static/zscript/constants.txt | 1 + wadsrc/static/zscript/hexen/baseweapons.txt | 6 +----- wadsrc/static/zscript/hexen/fighteraxe.txt | 2 +- wadsrc/static/zscript/hexen/fighterfist.txt | 2 +- wadsrc/static/zscript/hexen/fighterhammer.txt | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/actor.h b/src/actor.h index 25669dfec..12bb4dc28 100644 --- a/src/actor.h +++ b/src/actor.h @@ -1540,6 +1540,7 @@ struct FTranslatedLineTarget { AActor *linetarget; DAngle angleFromSource; + DAngle attackAngleFromSource; bool unlinked; // found by a trace that went through an unlinked portal. }; diff --git a/src/p_map.cpp b/src/p_map.cpp index 8cbe38f0e..16a5933d5 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -3588,6 +3588,7 @@ struct aim_t res.linetarget = th; res.pitch = pitch; res.angleFromSource = (th->Pos() - startpos).Angle(); + res.attackAngleFromSource = res.angleFromSource; // at this point we do not have an attack angle so it's the same as the actual angle between actors. res.unlinked = unlinked; res.frac = frac; } @@ -4506,7 +4507,9 @@ AActor *P_LineAttack(AActor *t1, DAngle angle, double distance, if (victim != NULL) { victim->linetarget = trace.Actor; - victim->angleFromSource = trace.SrcAngleFromTarget; + victim->attackAngleFromSource = trace.SrcAngleFromTarget; + // With arbitrary portals this cannot be calculated so using the actual attack angle is the only option. + victim->angleFromSource = trace.unlinked? victim->attackAngleFromSource : t1->AngleTo(trace.Actor); victim->unlinked = trace.unlinked; } } diff --git a/wadsrc/static/zscript/constants.txt b/wadsrc/static/zscript/constants.txt index c19925899..147b23e4e 100644 --- a/wadsrc/static/zscript/constants.txt +++ b/wadsrc/static/zscript/constants.txt @@ -846,6 +846,7 @@ struct FTranslatedLineTarget { Actor linetarget; double angleFromSource; + double attackAngleFromSource; bool unlinked; // found by a trace that went through an unlinked portal. native void TraceBleed(int damage, Actor missile); diff --git a/wadsrc/static/zscript/hexen/baseweapons.txt b/wadsrc/static/zscript/hexen/baseweapons.txt index a1e1af9bd..f08794648 100644 --- a/wadsrc/static/zscript/hexen/baseweapons.txt +++ b/wadsrc/static/zscript/hexen/baseweapons.txt @@ -40,11 +40,7 @@ extend class Actor void AdjustPlayerAngle(FTranslatedLineTarget t) { - // normally this will adjust relative to the actual direction to the target, - // but with arbitrary portals that cannot be calculated so using the actual - // attack angle is the only option. - double atkangle = t.unlinked ? t.angleFromSource : AngleTo(t.linetarget); - double difference = deltaangle(Angle, atkangle); + double difference = t.angleFromSource; if (abs(difference) > MAX_ANGLE_ADJUST) { if (difference > 0) diff --git a/wadsrc/static/zscript/hexen/fighteraxe.txt b/wadsrc/static/zscript/hexen/fighteraxe.txt index a0d1deba1..1ff44a52f 100644 --- a/wadsrc/static/zscript/hexen/fighteraxe.txt +++ b/wadsrc/static/zscript/hexen/fighteraxe.txt @@ -253,7 +253,7 @@ class FWeapAxe : FighterWeapon { if (t.linetarget.bIsMonster || t.linetarget.player) { - t.linetarget.Thrust(power, t.angleFromSource); + t.linetarget.Thrust(power, t.attackAngleFromSource); } AdjustPlayerAngle(t); diff --git a/wadsrc/static/zscript/hexen/fighterfist.txt b/wadsrc/static/zscript/hexen/fighterfist.txt index 712055e6e..289822789 100644 --- a/wadsrc/static/zscript/hexen/fighterfist.txt +++ b/wadsrc/static/zscript/hexen/fighterfist.txt @@ -77,7 +77,7 @@ class FWeapFist : FighterWeapon (t.linetarget.Mass < 10000000 && (t.linetarget.bIsMonster))) { if (!t.linetarget.bDontThrust) - t.linetarget.Thrust(power, t.angleFromSource); + t.linetarget.Thrust(power, t.attackAngleFromSource); } AdjustPlayerAngle(t); return true; diff --git a/wadsrc/static/zscript/hexen/fighterhammer.txt b/wadsrc/static/zscript/hexen/fighterhammer.txt index 655e59379..f1b06b75c 100644 --- a/wadsrc/static/zscript/hexen/fighterhammer.txt +++ b/wadsrc/static/zscript/hexen/fighterhammer.txt @@ -80,7 +80,7 @@ class FWeapHammer : FighterWeapon AdjustPlayerAngle(t); if (t.linetarget.bIsMonster || t.linetarget.player) { - t.linetarget.Thrust(10, t.angleFromSource); + t.linetarget.Thrust(10, t.attackAngleFromSource); } weaponspecial = false; // Don't throw a hammer return;