From 547973c8bab2818125207a87ba987974a08d5b76 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 26 Nov 2016 23:28:21 +0100 Subject: [PATCH 01/23] Rewrite OWallMost and WallMost to contain no build code --- src/r_segs.cpp | 288 ++++++++++++++++--------------------------------- 1 file changed, 90 insertions(+), 198 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 369722242..1defd253b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2508,232 +2508,124 @@ void R_StoreWallRange (int start, int stop) ds_p++; } -int OWallMost (short *mostbuf, double z, const FWallCoords *wallc) +int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) { - int bad, ix1, ix2; - double y, iy1, iy2; - double s1, s2, s3, s4; + float y1 = (float)(CenterY - z1 * InvZtoScale / wallc->sz1); + float y2 = (float)(CenterY - z2 * InvZtoScale / wallc->sz2); - z = -z; - s1 = globaluclip * wallc->sz1; s2 = globaluclip * wallc->sz2; - s3 = globaldclip * wallc->sz1; s4 = globaldclip * wallc->sz2; - bad = (zs3)<<2)+((z>s4)<<3); - - if ((bad&3) == 3) - { // entire line is above the screen - memset (&mostbuf[wallc->sx1], 0, (wallc->sx2 - wallc->sx1)*sizeof(mostbuf[0])); - return bad; - } - - if ((bad&12) == 12) - { // entire line is below the screen - clearbufshort (&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); - return bad; - } - ix1 = wallc->sx1; iy1 = wallc->sz1; - ix2 = wallc->sx2; iy2 = wallc->sz2; - if (bad & 3) - { // the line intersects the top of the screen - double t = (z-s1) / (s2-s1); - double inty = wallc->sz1 + t * (wallc->sz2 - wallc->sz1); - int xcross = xs_RoundToInt(wallc->sx1 + (t * wallc->sz2 * (wallc->sx2 - wallc->sx1)) / inty); - - if ((bad & 3) == 2) - { // the right side is above the screen - if (wallc->sx1 <= xcross) { iy2 = inty; ix2 = xcross; } - if (wallc->sx2 > xcross) memset (&mostbuf[xcross], 0, (wallc->sx2-xcross)*sizeof(mostbuf[0])); - } - else - { // the left side is above the screen - if (xcross <= wallc->sx2) { iy1 = inty; ix1 = xcross; } - if (xcross > wallc->sx1) memset (&mostbuf[wallc->sx1], 0, (xcross-wallc->sx1)*sizeof(mostbuf[0])); - } - } - - if (bad & 12) - { // the line intersects the bottom of the screen - double t = (z-s3) / (s4-s3); - double inty = wallc->sz1 + t * (wallc->sz2 - wallc->sz1); - int xcross = xs_RoundToInt(wallc->sx1 + (t * wallc->sz2 * (wallc->sx2 - wallc->sx1)) / inty); - - if ((bad & 12) == 8) - { // the right side is below the screen - if (wallc->sx1 <= xcross) { iy2 = inty; ix2 = xcross; } - if (wallc->sx2 > xcross) clearbufshort (&mostbuf[xcross], wallc->sx2 - xcross, viewheight); - } - else - { // the left side is below the screen - if (xcross <= wallc->sx2) { iy1 = inty; ix1 = xcross; } - if (xcross > wallc->sx1) clearbufshort (&mostbuf[wallc->sx1], xcross - wallc->sx1, viewheight); - } - } - - y = z * InvZtoScale / iy1; - if (ix2 == ix1) + if (y1 < 0 && y2 < 0) // entire line is above screen { - mostbuf[ix1] = (short)xs_RoundToInt(y + CenterY); + memset(&mostbuf[wallc->sx1], 0, (wallc->sx2 - wallc->sx1) * sizeof(mostbuf[0])); + return 3; + } + else if (y1 > viewheight && y2 > viewheight) // entire line is below screen + { + clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + return 12; + } + + if (wallc->sx2 <= wallc->sx1) + return 0; + + float rcp_delta = 1.0f / (wallc->sx2 - wallc->sx1); + if (y1 >= 0.0f && y2 >= 0.0f && xs_RoundToInt(y1) <= viewheight && xs_RoundToInt(y2) <= viewheight) + { + for (int x = wallc->sx1; x < wallc->sx2; x++) + { + float t = (x - wallc->sx1) * rcp_delta; + float y = y1 * (1.0f - t) + y2 * t; + mostbuf[x] = (short)xs_RoundToInt(y); + } } else { - fixed_t yinc = FLOAT2FIXED(((z * InvZtoScale / iy2) - y) / (ix2 - ix1)); - qinterpolatedown16short (&mostbuf[ix1], ix2-ix1, FLOAT2FIXED(y + CenterY) + FRACUNIT/2, yinc); + for (int x = wallc->sx1; x < wallc->sx2; x++) + { + float t = (x - wallc->sx1) * rcp_delta; + float y = y1 * (1.0f - t) + y2 * t; + mostbuf[x] = (short)clamp(xs_RoundToInt(y), 0, viewheight); + } } - return bad; + + return 0; } -int WallMost (short *mostbuf, const secplane_t &plane, const FWallCoords *wallc) +int OWallMost(short *mostbuf, double z, const FWallCoords *wallc) +{ + return WallMostAny(mostbuf, z, z, wallc); +} + +int WallMost(short *mostbuf, const secplane_t &plane, const FWallCoords *wallc) { if (!plane.isSlope()) { return OWallMost(mostbuf, plane.Zat0() - ViewPos.Z, wallc); } - - double x, y, den, z1, z2, oz1, oz2; - double s1, s2, s3, s4; - int bad, ix1, ix2; - double iy1, iy2; - - // Get Z coordinates at both ends of the line - if (MirrorFlags & RF_XFLIP) - { - x = curline->v2->fX(); - y = curline->v2->fY(); - if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) - { - double frac = (wallc->tleft.Y + wallc->tleft.X) / den; - x -= frac * (x - curline->v1->fX()); - y -= frac * (y - curline->v1->fY()); - } - z1 = ViewPos.Z - plane.ZatPoint(x, y); - - if (wallc->sx2 > wallc->sx1 + 1) - { - x = curline->v1->fX(); - y = curline->v1->fY(); - if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) - { - double frac = (wallc->tright.Y - wallc->tright.X) / den; - x += frac * (curline->v2->fX() - x); - y += frac * (curline->v2->fY() - y); - } - z2 = ViewPos.Z - plane.ZatPoint(x, y); - } - else - { - z2 = z1; - } - } else { - x = curline->v1->fX(); - y = curline->v1->fY(); - if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) - { - double frac = (wallc->tleft.Y + wallc->tleft.X) / den; - x += frac * (curline->v2->fX() - x); - y += frac * (curline->v2->fY() - y); - } - z1 = ViewPos.Z - plane.ZatPoint(x, y); - - if (wallc->sx2 > wallc->sx1 + 1) + // Get Z coordinates at both ends of the line + double x, y, den, z1, z2; + if (MirrorFlags & RF_XFLIP) { x = curline->v2->fX(); y = curline->v2->fY(); - if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) + if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) { - double frac = (wallc->tright.Y - wallc->tright.X) / den; + double frac = (wallc->tleft.Y + wallc->tleft.X) / den; x -= frac * (x - curline->v1->fX()); y -= frac * (y - curline->v1->fY()); } - z2 = ViewPos.Z - plane.ZatPoint(x, y); + z1 = plane.ZatPoint(x, y) - ViewPos.Z; + + if (wallc->sx2 > wallc->sx1 + 1) + { + x = curline->v1->fX(); + y = curline->v1->fY(); + if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) + { + double frac = (wallc->tright.Y - wallc->tright.X) / den; + x += frac * (curline->v2->fX() - x); + y += frac * (curline->v2->fY() - y); + } + z2 = plane.ZatPoint(x, y) - ViewPos.Z; + } + else + { + z2 = z1; + } } else { - z2 = z1; + x = curline->v1->fX(); + y = curline->v1->fY(); + if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) + { + double frac = (wallc->tleft.Y + wallc->tleft.X) / den; + x += frac * (curline->v2->fX() - x); + y += frac * (curline->v2->fY() - y); + } + z1 = plane.ZatPoint(x, y) - ViewPos.Z; + + if (wallc->sx2 > wallc->sx1 + 1) + { + x = curline->v2->fX(); + y = curline->v2->fY(); + if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) + { + double frac = (wallc->tright.Y - wallc->tright.X) / den; + x -= frac * (x - curline->v1->fX()); + y -= frac * (y - curline->v1->fY()); + } + z2 = plane.ZatPoint(x, y) - ViewPos.Z; + } + else + { + z2 = z1; + } } + + return WallMostAny(mostbuf, z1, z2, wallc); } - - s1 = globaluclip * wallc->sz1; s2 = globaluclip * wallc->sz2; - s3 = globaldclip * wallc->sz1; s4 = globaldclip * wallc->sz2; - bad = (z1s3)<<2)+((z2>s4)<<3); - - ix1 = wallc->sx1; ix2 = wallc->sx2; - iy1 = wallc->sz1; iy2 = wallc->sz2; - oz1 = z1; oz2 = z2; - - if ((bad&3) == 3) - { // The entire line is above the screen - memset (&mostbuf[ix1], 0, (ix2-ix1)*sizeof(mostbuf[0])); - return bad; - } - - if ((bad&12) == 12) - { // The entire line is below the screen - clearbufshort (&mostbuf[ix1], ix2-ix1, viewheight); - return bad; - - } - - if (bad&3) - { // The line intersects the top of the screen - //inty = intz / (globaluclip>>16) - double t = (oz1-s1) / (s2-s1+oz1-oz2); - double inty = wallc->sz1 + t * (wallc->sz2-wallc->sz1); - double intz = oz1 + t * (oz2-oz1); - int xcross = wallc->sx1 + xs_RoundToInt((t * wallc->sz2 * (wallc->sx2-wallc->sx1)) / inty); - - //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4)); - //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t); - //intz = z1 + mulscale30(z2-z1,t); - - if ((bad&3) == 2) - { // The right side of the line is above the screen - if (wallc->sx1 <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; } - memset (&mostbuf[xcross], 0, (wallc->sx2-xcross)*sizeof(mostbuf[0])); - } - else - { // The left side of the line is above the screen - if (xcross <= wallc->sx2) { z1 = intz; iy1 = inty; ix1 = xcross; } - memset (&mostbuf[wallc->sx1], 0, (xcross-wallc->sx1)*sizeof(mostbuf[0])); - } - } - - if (bad&12) - { // The line intersects the bottom of the screen - //inty = intz / (globaldclip>>16) - double t = (oz1-s3) / (s4-s3+oz1-oz2); - double inty = wallc->sz1 + t * (wallc->sz2-wallc->sz1); - double intz = oz1 + t * (oz2-oz1); - int xcross = wallc->sx1 + xs_RoundToInt((t * wallc->sz2 * (wallc->sx2-wallc->sx1)) / inty); - - //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4)); - //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t); - //intz = z1 + mulscale30(z2-z1,t); - - if ((bad&12) == 8) - { // The right side of the line is below the screen - if (wallc->sx1 <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; } - if (wallc->sx2 > xcross) clearbufshort (&mostbuf[xcross], wallc->sx2-xcross, viewheight); - } - else - { // The left side of the line is below the screen - if (xcross <= wallc->sx2) { z1 = intz; iy1 = inty; ix1 = xcross; } - if (xcross > wallc->sx1) clearbufshort (&mostbuf[wallc->sx1], xcross-wallc->sx1, viewheight); - } - } - - y = z1 * InvZtoScale / iy1; - if (ix2 == ix1) - { - mostbuf[ix1] = (short)xs_RoundToInt(y + CenterY); - } - else - { - fixed_t yinc = FLOAT2FIXED(((z2 * InvZtoScale / iy2) - y) / (ix2-ix1)); - qinterpolatedown16short (&mostbuf[ix1], ix2-ix1, FLOAT2FIXED(y + CenterY) + FRACUNIT/2, yinc); - } - - return bad; } static void PrepWallRoundFix(fixed_t *lwall, fixed_t walxrepeat, int x1, int x2) From 6417c1a7a39b6b5ee40f678219ffdb6c59f5473b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 27 Nov 2016 00:43:32 +0100 Subject: [PATCH 02/23] Rewrite PrepWall and PrepLWall, plus make them aware of pixel centers --- src/r_segs.cpp | 120 ++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 71 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1defd253b..ac5683b9b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2628,98 +2628,76 @@ int WallMost(short *mostbuf, const secplane_t &plane, const FWallCoords *wallc) } } -static void PrepWallRoundFix(fixed_t *lwall, fixed_t walxrepeat, int x1, int x2) +void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2) { - // fix for rounding errors - walxrepeat = abs(walxrepeat); - fixed_t fix = (MirrorFlags & RF_XFLIP) ? walxrepeat-1 : 0; - int x; + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + float uGradient = WallT.UoverZstep; + float zGradient = WallT.InvZstep; + float xrepeat = (float)walxrepeat; + float depthScale = (float)(WallT.InvZstep * WallTMapScale2); + float depthOrg = (float)(-WallT.UoverZstep * WallTMapScale2); - if (x1 > 0) + if (xrepeat < 0.0f) { - for (x = x1; x < x2; x++) + for (int x = x1; x < x2; x++) { - if ((unsigned)lwall[x] >= (unsigned)walxrepeat) - { - lwall[x] = fix; - } - else - { - break; - } + float u = uOverZ / invZ; + + upos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); + vstep[x] = depthOrg + u * depthScale; + + uOverZ += uGradient; + invZ += zGradient; } } - fix = walxrepeat - 1 - fix; - for (x = x2-1; x >= x1; x--) + else { - if ((unsigned)lwall[x] >= (unsigned)walxrepeat) + for (int x = x1; x < x2; x++) { - lwall[x] = fix; - } - else - { - break; + float u = uOverZ / invZ; + + upos[x] = (fixed_t)(u * xrepeat * FRACUNIT); + vstep[x] = depthOrg + u * depthScale; + + uOverZ += uGradient; + invZ += zGradient; } } } -void PrepWall (float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2) -{ // swall = scale, lwall = texturecolumn - double top, bot, i; - double xrepeat = fabs(walxrepeat * 65536); - double depth_scale = WallT.InvZstep * WallTMapScale2; - double depth_org = -WallT.UoverZstep * WallTMapScale2; +void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2) +{ + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + float uGradient = WallT.UoverZstep; + float zGradient = WallT.InvZstep; + float xrepeat = (float)walxrepeat; - i = x1 - centerx; - top = WallT.UoverZorg + WallT.UoverZstep * i; - bot = WallT.InvZorg + WallT.InvZstep * i; - - for (int x = x1; x < x2; x++) + if (xrepeat < 0.0f) { - double frac = top / bot; - if (walxrepeat < 0) + for (int x = x1; x < x2; x++) { - lwall[x] = xs_RoundToInt(xrepeat - frac * xrepeat); + float u = uOverZ / invZ * xrepeat - xrepeat; + + upos[x] = (fixed_t)(u * FRACUNIT); + + uOverZ += uGradient; + invZ += zGradient; } - else - { - lwall[x] = xs_RoundToInt(frac * xrepeat); - } - swall[x] = float(frac * depth_scale + depth_org); - top += WallT.UoverZstep; - bot += WallT.InvZstep; } - PrepWallRoundFix(lwall, FLOAT2FIXED(walxrepeat), x1, x2); -} - -void PrepLWall (fixed_t *lwall, double walxrepeat, int x1, int x2) -{ // lwall = texturecolumn - double top, bot, i; - double xrepeat = fabs(walxrepeat * 65536); - double topstep, botstep; - - i = x1 - centerx; - top = WallT.UoverZorg + WallT.UoverZstep * i; - bot = WallT.InvZorg + WallT.InvZstep * i; - - top *= xrepeat; - topstep = WallT.UoverZstep * xrepeat; - botstep = WallT.InvZstep; - - for (int x = x1; x < x2; x++) + else { - if (walxrepeat < 0) + for (int x = x1; x < x2; x++) { - lwall[x] = xs_RoundToInt(xrepeat - top / bot); + float u = uOverZ / invZ * xrepeat; + + upos[x] = (fixed_t)(u * FRACUNIT); + + uOverZ += uGradient; + invZ += zGradient; } - else - { - lwall[x] = xs_RoundToInt(top / bot); - } - top += topstep; - bot += botstep; } - PrepWallRoundFix(lwall, FLOAT2FIXED(walxrepeat), x1, x2); } // pass = 0: when seg is first drawn From 2e99681cedc42749ccd98a4bd58b38581934fa52 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 27 Nov 2016 16:22:23 +0100 Subject: [PATCH 03/23] - fixed: When trying to calculate the damage for a blasted actor hitting another one, the velocity checks used the signed values, causing totally random damaging effects depending on the direction the objects were moving. This bug had been present in the original Hexen source as well. --- src/p_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/p_map.cpp b/src/p_map.cpp index d102307bc..475cd3569 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -1256,7 +1256,7 @@ bool PIT_CheckThing(FMultiBlockThingsIterator &it, FMultiBlockThingsIterator::Ch { // ideally this should take the mass factor into account thing->Vel += tm.thing->Vel.XY(); - if ((thing->Vel.X + thing->Vel.Y) > 3.) + if (fabs(thing->Vel.X) + fabs(thing->Vel.Y) > 3.) { int newdam; damage = (tm.thing->Mass / 100) + 1; From 0488b18f8f1d381e49105256727d02abe7ed2c4e Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sat, 26 Nov 2016 13:26:44 +0200 Subject: [PATCH 04/23] Fixed junk character(s) left after removing portion of string See http://forum.zdoom.org/viewtopic.php?t=54379 --- src/zstring.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zstring.cpp b/src/zstring.cpp index e8e867323..1999f54fe 100644 --- a/src/zstring.cpp +++ b/src/zstring.cpp @@ -397,6 +397,7 @@ void FString::Remove(size_t index, size_t remlen) if (Data()->RefCount == 1) { // Can do this in place memmove(Chars + index, Chars + index + remlen, Len() - index - remlen); + memset(Chars + Len() - remlen, 0, remlen); Data()->Len -= (unsigned)remlen; } else From 9dbfa6d04ed14044f134634fc63a7261dfe906f6 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Fri, 2 Dec 2016 13:22:42 +0100 Subject: [PATCH 05/23] - Fixed uninitialized line number info in thing map parsing. --- src/p_things.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/p_things.cpp b/src/p_things.cpp index d94ba6a01..ba377abeb 100644 --- a/src/p_things.cpp +++ b/src/p_things.cpp @@ -601,6 +601,7 @@ static void ParseSpawnMap(FScanner &sc, SpawnMap & themap, const char *descript) } defined[ednum] = true; editem.classname = sc.String; + editem.linenum = sc.Line; themap.Insert(ednum, editem); } From 60b1f5c25ab1ef89b1cc17075c5684fae35855e4 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 2 Dec 2016 19:37:54 +0100 Subject: [PATCH 06/23] - fixed: FPNGTexture::fr could be left uninitialized. --- src/textures/pngtexture.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index d34b00607..d24cd92d1 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -211,6 +211,7 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename int i; if (lumpnum == -1) fr = &lump; + else fr = nullptr; UseType = TEX_MiscPatch; LeftOffset = 0; From 87d2991256ffa089159783b31a9c7ab78c229c94 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 2 Dec 2016 20:13:30 +0100 Subject: [PATCH 07/23] - removed all cluster music definitions so that the default from the gameinfo section can be used to change it. --- wadsrc/static/mapinfo/chex.txt | 4 ---- wadsrc/static/mapinfo/doom1.txt | 4 ---- wadsrc/static/mapinfo/doom2.txt | 7 ------- wadsrc/static/mapinfo/heretic.txt | 5 ----- wadsrc/static/mapinfo/hexen.txt | 4 ---- wadsrc/static/mapinfo/plutonia.txt | 6 ------ wadsrc/static/mapinfo/tnt.txt | 6 ------ 7 files changed, 36 deletions(-) diff --git a/wadsrc/static/mapinfo/chex.txt b/wadsrc/static/mapinfo/chex.txt index 00904b134..bf42fcbcd 100644 --- a/wadsrc/static/mapinfo/chex.txt +++ b/wadsrc/static/mapinfo/chex.txt @@ -255,28 +255,24 @@ map E1M5 lookup "CHUSTR_E1M5" cluster 1 { flat = "FLOOR4_8" - music = "$MUSIC_VICTOR" exittext = lookup, "CE1TEXT" } cluster 2 { flat = "SFLR6_1" - music = "$MUSIC_VICTOR" exittext = lookup, "CE2TEXT" } cluster 3 { flat = "MFLR8_4" - music = "$MUSIC_VICTOR" exittext = lookup, "CE3TEXT" } cluster 4 { flat = "MFLR8_3" - music = "$MUSIC_VICTOR" exittext = lookup, "CE4TEXT" } diff --git a/wadsrc/static/mapinfo/doom1.txt b/wadsrc/static/mapinfo/doom1.txt index 62040ca15..2baba9a5d 100644 --- a/wadsrc/static/mapinfo/doom1.txt +++ b/wadsrc/static/mapinfo/doom1.txt @@ -506,28 +506,24 @@ map E4M9 lookup "HUSTR_E4M9" cluster 1 { flat = "$bgflatE1" - music = "$MUSIC_VICTOR" exittext = lookup, "E1TEXT" } cluster 2 { flat = "$bgflatE2" - music = "$MUSIC_VICTOR" exittext = lookup, "E2TEXT" } cluster 3 { flat = "$bgflatE3" - music = "$MUSIC_VICTOR" exittext = lookup, "E3TEXT" } cluster 4 { flat = "$bgflatE4" - music = "$MUSIC_VICTOR" exittext = lookup, "E4TEXT" } diff --git a/wadsrc/static/mapinfo/doom2.txt b/wadsrc/static/mapinfo/doom2.txt index 166d2d202..6268f52c9 100644 --- a/wadsrc/static/mapinfo/doom2.txt +++ b/wadsrc/static/mapinfo/doom2.txt @@ -380,7 +380,6 @@ map MAP32 lookup "HUSTR_32" cluster 5 { flat = "$BGFLAT06" - music = "$MUSIC_READ_M" exittext = lookup, "C1TEXT" } @@ -390,7 +389,6 @@ cluster 5 cluster 6 { flat = "$BGFLAT11" - music = "$MUSIC_READ_M" exittext = lookup, "C2TEXT" } @@ -400,7 +398,6 @@ cluster 6 cluster 7 { flat = "$BGFLAT20" - music = "$MUSIC_READ_M" exittext = lookup, "C3TEXT" } @@ -409,7 +406,6 @@ cluster 7 cluster 8 { flat = "$BGFLAT30" - music = "$MUSIC_READ_M" exittext = lookup, "C4TEXT" } @@ -418,7 +414,6 @@ cluster 8 cluster 9 { flat = "$BGFLAT15" - music = "$MUSIC_READ_M" entertext = lookup, "C5TEXT" } @@ -427,7 +422,6 @@ cluster 9 cluster 10 { flat = "$BGFLAT31" - music = "$MUSIC_READ_M" entertext = lookup, "C6TEXT" } @@ -537,7 +531,6 @@ map LEVEL09 lookup "NHUSTR_9" cluster 11 { flat = "SLIME16" - music = "$MUSIC_READ_M" exittext = lookup, "NERVETEXT" } diff --git a/wadsrc/static/mapinfo/heretic.txt b/wadsrc/static/mapinfo/heretic.txt index a1e2efb0c..5f822d870 100644 --- a/wadsrc/static/mapinfo/heretic.txt +++ b/wadsrc/static/mapinfo/heretic.txt @@ -805,35 +805,30 @@ map E6M3 "Untitled" cluster 1 { flat = "FLOOR25" - music = "MUS_CPTD" exittext = lookup, "HE1TEXT" } cluster 2 { flat = "FLATHUH1" - music = "MUS_CPTD" exittext = lookup, "HE2TEXT" } cluster 3 { flat = "FLTWAWA2" - music = "MUS_CPTD" exittext = lookup, "HE3TEXT" } cluster 4 { flat = "FLOOR28" - music = "MUS_CPTD" exittext = lookup, "HE4TEXT" } cluster 5 { flat = "FLOOR08" - music = "MUS_CPTD" exittext = lookup, "HE5TEXT" } diff --git a/wadsrc/static/mapinfo/hexen.txt b/wadsrc/static/mapinfo/hexen.txt index c2dbf959a..be9a6ef65 100644 --- a/wadsrc/static/mapinfo/hexen.txt +++ b/wadsrc/static/mapinfo/hexen.txt @@ -464,7 +464,6 @@ cluster 1 hub exittext = "clus1msg" exittextislump - music = "hub" pic = "interpic" } @@ -473,7 +472,6 @@ cluster 2 hub exittext = "clus2msg" exittextislump - music = "hub" pic = "interpic" } @@ -482,7 +480,6 @@ cluster 3 hub exittext = "clus3msg" exittextislump - music = "hub" pic = "interpic" } @@ -491,7 +488,6 @@ cluster 4 hub exittext = "clus4msg" exittextislump - music = "hub" pic = "interpic" } diff --git a/wadsrc/static/mapinfo/plutonia.txt b/wadsrc/static/mapinfo/plutonia.txt index 7126c5605..d4fefba86 100644 --- a/wadsrc/static/mapinfo/plutonia.txt +++ b/wadsrc/static/mapinfo/plutonia.txt @@ -374,7 +374,6 @@ map MAP32 lookup "PHUSTR_32" cluster 5 { flat = "$BGFLAT06" - music = "$MUSIC_READ_M" exittext = lookup, "P1TEXT" } @@ -383,7 +382,6 @@ cluster 5 cluster 6 { flat = "$BGFLAT11" - music = "$MUSIC_READ_M" exittext = lookup, "P2TEXT" } @@ -392,7 +390,6 @@ cluster 6 cluster 7 { flat = "$BGFLAT20" - music = "$MUSIC_READ_M" exittext = lookup, "P3TEXT" } @@ -401,7 +398,6 @@ cluster 7 cluster 8 { flat = "$BGFLAT30" - music = "$MUSIC_READ_M" exittext = lookup, "P4TEXT" } @@ -410,7 +406,6 @@ cluster 8 cluster 9 { flat = "$BGFLAT15" - music = "$MUSIC_READ_M" entertext = lookup, "P5TEXT" } @@ -419,7 +414,6 @@ cluster 9 cluster 10 { flat = "$BGFLAT31" - music = "$MUSIC_READ_M" entertext = lookup, "P6TEXT" } diff --git a/wadsrc/static/mapinfo/tnt.txt b/wadsrc/static/mapinfo/tnt.txt index 15f71389d..12768a5c3 100644 --- a/wadsrc/static/mapinfo/tnt.txt +++ b/wadsrc/static/mapinfo/tnt.txt @@ -374,7 +374,6 @@ map MAP32 lookup "THUSTR_32" cluster 5 { flat = "$BGFLAT06" - music = "$MUSIC_READ_M" exittext = lookup, "T1TEXT" } @@ -383,7 +382,6 @@ cluster 5 cluster 6 { flat = "$BGFLAT11" - music = "$MUSIC_READ_M" exittext = lookup, "T2TEXT" } @@ -392,7 +390,6 @@ cluster 6 cluster 7 { flat = "$BGFLAT20" - music = "$MUSIC_READ_M" exittext = lookup, "T3TEXT" } @@ -401,7 +398,6 @@ cluster 7 cluster 8 { flat = "$BGFLAT30" - music = "$MUSIC_READ_M" exittext = lookup, "T4TEXT" } @@ -410,7 +406,6 @@ cluster 8 cluster 9 { flat = "$BGFLAT15" - music = "$MUSIC_READ_M" entertext = lookup, "T5TEXT" } @@ -419,6 +414,5 @@ cluster 9 cluster 10 { flat = "$BGFLAT31" - music = "$MUSIC_READ_M" entertext = lookup, "T6TEXT" } From 28604bad62ab89b50d791f6bf2af424f4aee02d6 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 3 Dec 2016 14:06:51 +0100 Subject: [PATCH 08/23] - fixed: The polyobject init specials must be cleared after Polyobject initialization is done, because they can block usage of regular lines colinear with the polyobject --- src/po_man.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/po_man.cpp b/src/po_man.cpp index c779a8049..05d067e52 100644 --- a/src/po_man.cpp +++ b/src/po_man.cpp @@ -1773,7 +1773,14 @@ void PO_Init (void) } } } - + // clear all polyobj specials so that they do not obstruct using other lines. + for (int i = 0; i < numlines; i++) + { + if (lines[i].special == Polyobj_ExplicitLine || lines[i].special == Polyobj_StartLine) + { + lines[i].special = 0; + } + } } //========================================================================== From 5117b32431f946ea3ce08277dcf80c0a1fe2ad2c Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 3 Dec 2016 14:42:06 +0100 Subject: [PATCH 09/23] - fixed: The math for emulating the old slop overflow was not correct and made the affected sectors in void.wad display incorrectly. - set compat_polyobj for void.wad because its polyobjects glitch quite a bit with the normal setting. --- src/p_slopes.cpp | 4 ++-- wadsrc/static/compatibility.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/p_slopes.cpp b/src/p_slopes.cpp index 66b3d0747..7413922e2 100644 --- a/src/p_slopes.cpp +++ b/src/p_slopes.cpp @@ -178,8 +178,8 @@ void P_SetSlope (secplane_t *plane, bool setCeil, int xyangi, int zangi, const D if (ib_compatflags & BCOMPATF_SETSLOPEOVERFLOW) { // We have to consider an integer multiplication overflow here. - norm[0] = FixedToFloat(FloatToFixed(zang.Cos()) * FloatToFixed(xyang.Cos())); - norm[1] = FixedToFloat(FloatToFixed(zang.Cos()) * FloatToFixed(xyang.Sin())); + norm[0] = FixedToFloat(FloatToFixed(zang.Cos()) * FloatToFixed(xyang.Cos())) / 65536.; + norm[1] = FixedToFloat(FloatToFixed(zang.Cos()) * FloatToFixed(xyang.Sin())) / 65536.; } else { diff --git a/wadsrc/static/compatibility.txt b/wadsrc/static/compatibility.txt index d964cbf75..73a4f5af2 100644 --- a/wadsrc/static/compatibility.txt +++ b/wadsrc/static/compatibility.txt @@ -27,6 +27,7 @@ setwallyscale 717 front bot 1.090909 setslopeoverflow + polyobj } B2D8DA03489D1C67F60DC87FBC4EA338 // map01 - Massmouth 2 From 30cbce051e08a7ff6b8f7161cee44159e24ad90e Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 3 Dec 2016 15:44:46 +0100 Subject: [PATCH 10/23] - fixed: Custom automap colors were not invalidated on restart. - fixed: D_DoomMain has 3 calls to D_DoomLoop but only the main call of these was capable of a clean restart. --- src/am_map.cpp | 5 ++++ src/am_map.h | 1 + src/d_main.cpp | 69 +++++++++++++++++++++++++++----------------------- 3 files changed, 43 insertions(+), 32 deletions(-) diff --git a/src/am_map.cpp b/src/am_map.cpp index d74927b68..70153404a 100644 --- a/src/am_map.cpp +++ b/src/am_map.cpp @@ -473,6 +473,11 @@ static AMColorset AMMod; static AMColorset AMModOverlay; +void AM_ClearColorsets() +{ + AMModOverlay.defined = false; + AMMod.defined = false; +} //============================================================================= // // diff --git a/src/am_map.h b/src/am_map.h index dadacde55..fd9079c1e 100644 --- a/src/am_map.h +++ b/src/am_map.h @@ -27,6 +27,7 @@ class FSerializer; void AM_StaticInit(); +void AM_ClearColorsets(); // reset data for a restart. // Called by main loop. bool AM_Responder (event_t* ev, bool last); diff --git a/src/d_main.cpp b/src/d_main.cpp index dad2b86d9..1f9fcdbca 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -2592,44 +2592,48 @@ void D_DoomMain (void) G_DeferedPlayDemo (v); D_DoomLoop (); // never returns } - - v = Args->CheckValue ("-timedemo"); - if (v) + else { - G_TimeDemo (v); - D_DoomLoop (); // never returns - } - - if (gameaction != ga_loadgame && gameaction != ga_loadgamehidecon) - { - if (autostart || netgame) + v = Args->CheckValue("-timedemo"); + if (v) { - // Do not do any screenwipes when autostarting a game. - if (!Args->CheckParm("-warpwipe")) - { - NoWipe = TICRATE; - } - CheckWarpTransMap (startmap, true); - if (demorecording) - G_BeginRecording (startmap); - G_InitNew (startmap, false); - if (StoredWarp.IsNotEmpty()) - { - AddCommandString(StoredWarp.LockBuffer()); - StoredWarp = NULL; - } + G_TimeDemo(v); + D_DoomLoop(); // never returns } else { - D_StartTitle (); // start up intro loop + if (gameaction != ga_loadgame && gameaction != ga_loadgamehidecon) + { + if (autostart || netgame) + { + // Do not do any screenwipes when autostarting a game. + if (!Args->CheckParm("-warpwipe")) + { + NoWipe = TICRATE; + } + CheckWarpTransMap(startmap, true); + if (demorecording) + G_BeginRecording(startmap); + G_InitNew(startmap, false); + if (StoredWarp.IsNotEmpty()) + { + AddCommandString(StoredWarp.LockBuffer()); + StoredWarp = NULL; + } + } + else + { + D_StartTitle(); // start up intro loop + } + } + else if (demorecording) + { + G_BeginRecording(NULL); + } + + atterm(D_QuitNetGame); // killough } } - else if (demorecording) - { - G_BeginRecording (NULL); - } - - atterm (D_QuitNetGame); // killough } else { @@ -2643,7 +2647,7 @@ void D_DoomMain (void) } D_DoomLoop (); // this only returns if a 'restart' CCMD is given. - +maxberestart: // // Clean up after a restart // @@ -2654,6 +2658,7 @@ void D_DoomMain (void) M_ClearMenus(); // close menu if open F_EndFinale(); // If an intermission is active, end it now + AM_ClearColorsets(); // clean up game state ST_Clear(); From c99a051a2aaad2c8800fb12470857d476ae1bd9b Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Sun, 27 Nov 2016 10:42:03 +0100 Subject: [PATCH 11/23] - Added lambda feature to FString::(Strip|Replace)Chars. Use it in the other (Strip|Replace)Chars methods to show how it would work. --- src/zstring.cpp | 58 +++++++++++-------------------------------------- src/zstring.h | 36 +++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/zstring.cpp b/src/zstring.cpp index 1999f54fe..70ce9dc68 100644 --- a/src/zstring.cpp +++ b/src/zstring.cpp @@ -863,66 +863,34 @@ void FString::Insert (size_t index, const char *instr, size_t instrlen) void FString::ReplaceChars (char oldchar, char newchar) { - size_t i, j; + if (oldchar == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (Chars[i] == oldchar) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldchar](char c){ return c == oldchar; }, newchar); } void FString::ReplaceChars (const char *oldcharset, char newchar) { - size_t i, j; + if (oldcharset == NULL || oldcharset[0] == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (strchr (oldcharset, Chars[i]) != NULL) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldcharset](char c){ return strchr(oldcharset, c) != NULL; }, newchar); } void FString::StripChars (char killchar) { - size_t read, write, mylen; + if (killchar == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (Chars[read] != killchar) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killchar](char c){ return c == killchar; }); } -void FString::StripChars (const char *killchars) +void FString::StripChars (const char *killcharset) { - size_t read, write, mylen; + if (killcharset == NULL || killcharset[0] == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (strchr (killchars, Chars[read]) == NULL) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killcharset](char c){ return strchr(killcharset, c) != NULL; }); } void FString::MergeChars (char merger) diff --git a/src/zstring.h b/src/zstring.h index ba9208719..6138f38aa 100644 --- a/src/zstring.h +++ b/src/zstring.h @@ -236,11 +236,45 @@ public: void Insert (size_t index, const char *instr); void Insert (size_t index, const char *instr, size_t instrlen); + template + void ReplaceChars (Func IsOldChar, char newchar) + { + size_t i, j; + + LockBuffer(); + for (i = 0, j = Len(); i < j; ++i) + { + if (IsOldChar(Chars[i])) + { + Chars[i] = newchar; + } + } + UnlockBuffer(); + } + void ReplaceChars (char oldchar, char newchar); void ReplaceChars (const char *oldcharset, char newchar); + template + void StripChars (Func IsKillChar) + { + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ++read) + { + if (!IsKillChar(Chars[read])) + { + Chars[write++] = Chars[read]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); + } + void StripChars (char killchar); - void StripChars (const char *killchars); + void StripChars (const char *killcharset); void MergeChars (char merger); void MergeChars (char merger, char newchar); From 0cff4439450ac48e6ea37e9c9b29e242819220b9 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Wed, 30 Nov 2016 20:59:09 +0100 Subject: [PATCH 12/23] - Fixed missing linebreak in 'currentpos' error message. --- src/c_cmds.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_cmds.cpp b/src/c_cmds.cpp index f9cea4005..56739502e 100644 --- a/src/c_cmds.cpp +++ b/src/c_cmds.cpp @@ -1144,7 +1144,7 @@ CCMD(currentpos) } else { - Printf("You are not in game!"); + Printf("You are not in game!\n"); } } From 0ed0c47a45909d18a8c230311db1f6511f641ecb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 13:57:54 +0100 Subject: [PATCH 13/23] Optimize capped sky rendering by writing 4 pixels at a time in 5 bands (solid, fade, texture, fade, solid) --- src/r_draw.cpp | 337 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 242 insertions(+), 95 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5f592223b..0f3d04884 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2201,59 +2201,126 @@ void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + int textureheight0 = bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - int textureheight0 = bufheight[0]; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } @@ -2321,66 +2388,146 @@ void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; + int textureheight0 = bufheight[0]; + uint32_t maxtextureheight1 = bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - const uint8_t *source1 = bufplce2[col]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; if (fg == 0) { uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; + fg = source1[col][sample_index2]; } + output[col] = fg; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } From d0cf34890c580585ed7552d9d8c7b17b9630b7e8 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:01:48 +0100 Subject: [PATCH 14/23] - disabled R_DrawColumnHorizP_ASM because that function appears prone to crashing when rendering decals. --- src/r_draw.cpp | 2 +- src/r_draw.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0f3d04884..099904f25 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2556,7 +2556,7 @@ void R_InitColumnDrawers () { #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; diff --git a/src/r_draw.h b/src/r_draw.h index fa84e5ae9..053149382 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -202,9 +202,10 @@ extern "C" void R_DrawFuzzColumnP_ASM (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); +void R_DrawColumnHorizP_C(void); + #else -void R_DrawColumnHorizP_C (void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); void R_DrawTranslatedColumnP_C (void); From 93163d12f121704a0c70a915e5c56d500524fcad Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:31:08 +0100 Subject: [PATCH 15/23] - removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes. --- src/asm_ia32/tmap.asm | 227 ------------------------------------------ 1 file changed, 227 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index fb372d488..d9e689ee1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM: ret -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnHorizP_ASM -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnHorizP_ASM@0 -GLOBAL _R_DrawColumnHorizP_ASM -GLOBAL R_DrawColumnHorizP_ASM - - align 16 - -@R_DrawColumnHorizP_ASM@0: -_R_DrawColumnHorizP_ASM: -R_DrawColumnHorizP_ASM: - -; count = dc_yh - dc_yl; - - mov eax,[dc_yh] - mov ecx,[dc_yl] - sub eax,ecx - mov edx,[dc_x] - - jl near .leave ; count < 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - - inc eax ; make 0 count mean 0 pixels - and edx,3 - push eax - mov eax,[dc_temp] - mov esi,[dc_ctspan+edx*4] - add eax,edx - lea eax,[eax+ecx*4] ; eax = top of column in buffer - mov ebp,[dc_yh] - mov [esi],ecx - mov [esi+4],ebp - add esi,8 - mov edi,[dc_source] - mov [dc_ctspan+edx*4],esi - mov esi,[dc_iscale] - mov ecx,[dc_texturefrac] ; ecx = frac - mov dl,[edi] ; load cache - mov ebx,[esp] - and ebx,0xfffffff8 - jnz .mthan8 - -; Register usage in the following code is: -; -; eax: dest -; edi: source -; ecx: frac (16.16) -; esi: fracstep (16.16) -; ebx: add1 -; ebp: add2 -; dl: texel1 -; dh: texel2 -;[esp] count - -; there are fewer than 8 pixels to draw - - mov ebx,[esp] -.lthan8 shr ebx,1 - jnc .even - -; do one pixel before loop (little opportunity for pairing) - - mov ebp,ecx ; copy frac to ebx - add ecx,esi ; increment frac - shr ebp,16 ; shift frac over to low end - add eax,4 - mov dl,[edi+ebp] - mov [eax-4],dl - -.even test ebx,ebx - jz near .done - -.loop2 mov [esp],ebx ; save counter - mov ebx,ecx ; copy frac for texel1 to ebx - shr ebx,16 ; shift frac for texel1 to low end - add ecx,esi ; increment frac - mov ebp,ecx ; copy frac for texel2 to ebp - shr ebp,16 ; shift frac for texel2 to low end - add ecx,esi ; increment frac - mov dl,[edi+ebx] ; read texel1 - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; read texel2 - mov [eax],dl ; write texel1 - mov [eax+4],dh ; write texel2 - add eax,8 ; increment dest - dec ebx ; decrement counter - jnz .loop2 ; loop until it hits 0 - - jmp .done - -; there are more than 8 pixels to draw. position eax as close to a 32 byte -; boundary as possible, then do whatever is left. - -.mthan8 test eax,4 - jz .try2 - - mov ebp,ecx ; frac: in ebp - add ecx,esi ; step - shr ebp,16 ; frac: shift - add eax,4 ; increment dest - mov ebx,[esp] ; fetch counter - mov dl,[edi+ebp] ; tex: read - dec ebx ; decrement counter - mov [eax-4],dl ; tex: write - mov [esp],ebx ; store counter - -.try2 test eax,8 - jz .try4 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex2: read - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - sub ebx,2 ; decrement counter - add eax,8 ; increment dest - mov [esp],ebx ; store counter - -.try4 test eax,16 - jz .try8 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - shr ebx,16 ; frac3: shift - mov dh,[edi+ebp] ; tex2: read - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex4: read - sub ebx,4 ; decrement counter - mov [esp],ebx ; store counter - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add eax,16 ; increment dest - -.try8 mov ebx,[esp] ; make counter count groups of 8 - sub esp,4 - shr ebx,3 - jmp .tail8 - - align 16 - -.loop8 mov [esp],ebx ; save counter - mov ebx,ecx ; frac1: in ebx - shr ebx,16 ; frac1: shift - add ecx,esi ; step - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - mov dh,[edi+ebp] ; tex2: read - shr ebx,16 ; frac3: shift - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,ecx ; frac5: in ebx - mov dh,[edi+ebp] ; tex4: read - shr ebx,16 ; frac5: shift - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add ecx,esi ; step - mov ebp,ecx ; frac6: in ebp - shr ebp,16 ; frac6: shift - mov dl,[edi+ebx] ; tex5: read - add ecx,esi ; step - mov ebx,ecx ; frac7: in ebx - mov [eax+16],dl ; tex5: write - shr ebx,16 ; frac7: shift - mov dh,[edi+ebp] ; tex6: read - add ecx,esi ; step - mov ebp,ecx ; frac8: in ebp - mov [eax+20],dh ; tex6: write - shr ebp,16 ; frac8: shift - add eax,32 ; increment dest pointer - mov dl,[edi+ebx] ; tex7: read - mov ebx,[esp] ; fetch counter - mov [eax-8],dl ; tex7: write - mov dh,[edi+ebp] ; tex8: read - add ecx,esi ; step - mov [eax-4],dh ; tex8: write - mov dl,[eax] ; load cache - dec ebx ; decrement counter -.tail8 jnz near .loop8 ; loop if more to do - - pop ebp - mov ebx,[esp] - and ebx,7 - jnz near .lthan8 - -.done pop eax - pop esi - pop edi - pop ebx - pop ebp -.leave ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm From 8fd03bc4a314b080e563a80925c07d068cad876a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 16:57:10 +0100 Subject: [PATCH 16/23] - fixed a few prototypes. --- src/r_draw.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 053149382..7d574c1c0 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -194,10 +194,8 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnHorizP_ASM (void); extern "C" void R_DrawColumnP_ASM (void); extern "C" void R_DrawFuzzColumnP_ASM (void); - void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); @@ -208,13 +206,14 @@ void R_DrawColumnHorizP_C(void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); -void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); #endif +void R_DrawColumnHorizP_C(void); +void R_DrawTranslatedColumnP_C(void); void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 13efb349646e8de132b78e58d9b703c94758113b Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 18:05:34 +0100 Subject: [PATCH 17/23] - removed the asm version of R_DrawColumn because it doesn't provide any significant benefit. On modern systems it is actually slower than the C version, only on old ones it is marginally faster - but the overall execution time for this function is so low that even in the worst case scenario the minor loss of performance on older systems is still not relevant. --- src/asm_ia32/tmap.asm | 103 ------------------------------------------ src/r_draw.cpp | 25 +++------- src/r_draw.h | 17 +++---- 3 files changed, 13 insertions(+), 132 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index d9e689ee1..4ffcc370d 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -598,106 +598,6 @@ dmsdone add esp,8 -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnP_ASM@0 -GLOBAL _R_DrawColumnP_ASM -GLOBAL R_DrawColumnP_ASM - - align 16 - -R_DrawColumnP_ASM: -_R_DrawColumnP_ASM: -@R_DrawColumnP_ASM@0: - -; count = dc_yh - dc_yl; - - mov ecx,[dc_count] - test ecx,ecx - jle near rdcpret ; count <= 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - -; dest = ylookup[dc_yl] + dc_x + dc_destorg; - - mov edi,[dc_dest] - mov ebp,ecx - mov ebx,[dc_texturefrac] ; ebx = frac -rdcp1: sub edi,SPACEFILLER4 - mov ecx,ebx - shr ecx,16 - mov esi,[dc_source] - mov edx,[dc_iscale] - mov eax,[dc_colormap] - - cmp BYTE [CPU+66],byte 5 - jg rdcploop2 - - align 16 - -; The registers should now look like this: -; -; [31 .. 16][15 .. 8][7 .. 0] -; eax [colormap ] -; ebx [yi ][yf ] -; ecx [scratch ] -; edx [dyi ][dyf ] -; esi [source texture column ] -; edi [destination screen pointer ] -; ebp [counter ] -; - - -; Note the partial register stalls on anything better than a Pentium -; That's why there are two versions of this loop. - -rdcploop: - mov cl,[esi+ecx] ; Fetch texel - xor ch,ch - add ebx,edx ; increment frac -rdcp2: add edi,SPACEFILLER4 ; increment destination pointer - mov cl,[eax+ecx] ; colormap texel - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop ; loop - - pop esi - pop edi - pop ebx - pop ebp -rdcpret: - ret - - align 16 - -rdcploop2: - movzx ecx,byte [esi+ecx] ; Fetch texel - add ebx,edx ; increment frac - mov cl,[eax+ecx] ; colormap texel -rdcp3: add edi,SPACEFILLER4 ; increment destination pointer - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop2 ; loop - - pop esi - pop edi - pop ebx - pop ebp - ret - - - ;*---------------------------------------------------------------------- ;* ;* R_DrawFuzzColumnP @@ -1648,9 +1548,6 @@ ASM_PatchPitch: _ASM_PatchPitch: @ASM_PatchPitch@0: mov eax,[dc_pitch] - mov [rdcp1+2],eax - mov [rdcp2+2],eax - mov [rdcp3+2],eax mov [s4p+1],eax mov [a4p+1],eax mov [ac4p+1],eax diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 099904f25..7d8ebe036 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -69,16 +69,11 @@ int scaledviewwidth; // These get changed depending on the current // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); -void (*R_DrawColumn)(void); void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); void (*R_DrawSpanMasked)(void); -void (*R_DrawSpanTranslucent)(void); -void (*R_DrawSpanMaskedTranslucent)(void); -void (*R_DrawSpanAddClamp)(void); -void (*R_DrawSpanMaskedAddClamp)(void); void (*rt_map4cols)(int,int,int); // @@ -171,7 +166,6 @@ void R_InitShadeMaps() /* */ /************************************/ -#ifndef X86_ASM // // A column is a vertical slice/span from a wall texture that, // given the DOOM style restrictions on the view orientation, @@ -179,7 +173,7 @@ void R_InitShadeMaps() // Thus a special case loop for very fast rendering can // be used. It has also been used with Wolfenstein 3D. // -void R_DrawColumnP_C (void) +void R_DrawColumn (void) { int count; BYTE* dest; @@ -222,7 +216,7 @@ void R_DrawColumnP_C (void) } while (--count); } } -#endif + // [RH] Just fills a column with a color void R_FillColumnP (void) @@ -1192,7 +1186,7 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanTranslucentP_C (void) +void R_DrawSpanTranslucent (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1252,7 +1246,7 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_C (void) +void R_DrawSpanMaskedTranslucent (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1326,7 +1320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanAddClampP_C (void) +void R_DrawSpanAddClamp (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1392,7 +1386,7 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_C (void) +void R_DrawSpanMaskedAddClamp (void) { dsfixed_t xfrac; dsfixed_t yfrac; @@ -2550,12 +2544,10 @@ const BYTE *R_GetColumn (FTexture *tex, int col) return tex->GetColumn (col, NULL); } - // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { #ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; @@ -2572,7 +2564,6 @@ void R_InitColumnDrawers () } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawColumn = R_DrawColumnP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; @@ -2580,10 +2571,6 @@ void R_InitColumnDrawers () R_DrawSpanMasked = R_DrawSpanMaskedP_C; rt_map4cols = rt_map4cols_c; #endif - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; } // [RH] Choose column drawers in a single place diff --git a/src/r_draw.h b/src/r_draw.h index 7d574c1c0..c5e99e610 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -65,7 +65,6 @@ extern "C" unsigned int horizspans[4]; // The span blitting interface. // Hook in assembler or system specific BLT here. -extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); @@ -103,16 +102,16 @@ void R_SetSpanSource(const BYTE *pixels); extern void (*R_DrawSpanMasked)(void); // Span drawing for translucent textures. -extern void (*R_DrawSpanTranslucent)(void); +void R_DrawSpanTranslucent(void); // Span drawing for masked, translucent textures. -extern void (*R_DrawSpanMaskedTranslucent)(void); +void R_DrawSpanMaskedTranslucent(void); // Span drawing for translucent, additive textures. -extern void (*R_DrawSpanAddClamp)(void); +void R_DrawSpanAddClamp(void); // Span drawing for masked, translucent, additive textures. -extern void (*R_DrawSpanMaskedAddClamp)(void); +void R_DrawSpanMaskedAddClamp(void); // [RH] Span blit into an interleaved intermediate buffer extern void (*R_DrawColumnHoriz)(void); @@ -193,8 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM -extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnP_ASM (void); extern "C" void R_DrawFuzzColumnP_ASM (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); @@ -204,7 +201,6 @@ void R_DrawColumnHorizP_C(void); #else -void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); @@ -212,10 +208,11 @@ void R_DrawSpanMaskedP_C (void); #endif +void R_DrawColumn(); void R_DrawColumnHorizP_C(void); void R_DrawTranslatedColumnP_C(void); -void R_DrawSpanTranslucentP_C (void); -void R_DrawSpanMaskedTranslucentP_C (void); +void R_DrawSpanTranslucent (void); +void R_DrawSpanMaskedTranslucent (void); void R_DrawTlatedLucentColumnP_C (void); #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C From c9caaf08c8f0f94c46326b85a0130d84c05dc930 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 18:38:38 +0100 Subject: [PATCH 18/23] - removed asm version of R_DrawFuzzColumn, because even on my 9 year old laptop this was significantly slower than the C version. --- src/asm_ia32/tmap.asm | 140 ------------------------------------------ src/r_draw.cpp | 8 +-- src/r_draw.h | 4 +- 3 files changed, 3 insertions(+), 149 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index 4ffcc370d..cbfadd5d1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -597,146 +597,6 @@ dmsdone add esp,8 - -;*---------------------------------------------------------------------- -;* -;* R_DrawFuzzColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawFuzzColumnP_ASM@0 -GLOBAL _R_DrawFuzzColumnP_ASM -GLOBAL R_DrawFuzzColumnP_ASM - - align 16 - -R_DrawFuzzColumnP_ASM: -_R_DrawFuzzColumnP_ASM: -@R_DrawFuzzColumnP_ASM@0: - -; Adjust borders. Low... - mov eax,[dc_yl] - push ebx - push esi - push edi - push ebp - - cmp eax,0 - jg .ylok - - mov eax,1 - nop - -; ...and high. -.ylok mov edx,[fuzzviewheight] - mov esi,[dc_yh] - cmp esi,edx - jle .yhok - - mov esi,edx - nop - -.yhok mov edx,[dc_x] - sub esi,eax ; esi = count - js near .dfcdone ; Zero length (or less) - - mov edi,[ylookup+eax*4] - mov ebx,edx - add edi,[dc_destorg] - mov eax,[NormalLight] - mov ecx,[fuzzpos] - add edi,ebx - add eax,256*6 - inc esi - mov ebp,[dc_pitch] - mov edx,FUZZTABLE - test ecx,ecx - je .fuzz0 - -; -; esi = count -; edi = dest -; ecx = fuzzpos -; eax = colormap 6 -; - -; first loop: end with fuzzpos or count 0, whichever happens first - - sub edx,ecx ; edx = # of entries left in fuzzoffset - mov ebx,esi - cmp esi,edx - jle .enuf - mov esi,edx -.enuf sub ebx,esi - mov edx,[fuzzoffset+ecx*4] - push ebx - xor ebx,ebx - -.loop1 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop1 - -; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those - - pop esi - cmp ecx,FUZZTABLE - jl .savefuzzpos - xor ecx,ecx - nop -.fuzz0 cmp esi,FUZZTABLE - jl .chunked - -.oloop lea edx,[esi-FUZZTABLE] - mov esi,FUZZTABLE - push edx - mov edx,[fuzzoffset+ecx*4] - -.iloop inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .iloop - - pop esi - xor ecx,ecx - cmp esi,FUZZTABLE - jge .oloop - -; third loop: Do whatever is left - -.chunked: - test esi,esi - jle .savefuzzpos - mov edx,[fuzzoffset+ecx*4] - nop - -.loop3 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop3 - -.savefuzzpos: - mov [fuzzpos],ecx -.dfcdone: - pop ebp - pop edi - pop esi - pop ebx - ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7d8ebe036..a48fc7195 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -69,7 +69,6 @@ int scaledviewwidth; // These get changed depending on the current // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); -void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); @@ -408,13 +407,12 @@ void R_InitFuzzTable (int fuzzoff) } } -#ifndef X86_ASM // // Creates a fuzzy image by copying pixels from adjacent ones above and below. // Used with an all black colormap, this could create the SHADOW effect, // i.e. spectres and invisible players. // -void R_DrawFuzzColumnP_C (void) +void R_DrawFuzzColumn (void) { int count; BYTE *dest; @@ -484,7 +482,6 @@ void R_DrawFuzzColumnP_C (void) fuzzpos = fuzz; } } -#endif // // R_DrawTranlucentColumn @@ -2544,12 +2541,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) return tex->GetColumn (col, NULL); } + // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { #ifdef X86_ASM R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_ASM; @@ -2564,7 +2561,6 @@ void R_InitColumnDrawers () } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; diff --git a/src/r_draw.h b/src/r_draw.h index c5e99e610..8b1c8ffbc 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -83,7 +83,7 @@ extern void setupmvline (int); extern void setuptmvline (int); // The Spectre/Invisibility effect. -extern void (*R_DrawFuzzColumn)(void); +extern void R_DrawFuzzColumn(void); // [RH] Draw shaded column extern void (*R_DrawShadedColumn)(void); @@ -192,7 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM -extern "C" void R_DrawFuzzColumnP_ASM (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); @@ -201,7 +200,6 @@ void R_DrawColumnHorizP_C(void); #else -void R_DrawFuzzColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); From f4454d2e00f3b078007e9a25b7ede8aebc7f394b Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 19:32:54 +0100 Subject: [PATCH 19/23] - minor, but very effective optimization for R_DrawSpanMasked: Do not store the texel value in a byte. Store it in a local int variable. This allows the compiler to read it with a zero extending instruction instead of using a byte reading instruction and then later having it to convert to an int anyway. This removes one instruction from the loop which results in a 10% performance increase on 32 bit. --- src/r_draw.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a48fc7195..fba01cbfe 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1037,7 +1037,7 @@ void R_SetupSpanBits(FTexture *tex) { ds_xbits--; } - if ((1 << ds_ybits) > tex->GetHeight()) + if ((1 << ds_ybits) > tex->GetHeight()) { ds_ybits--; } @@ -1048,7 +1048,7 @@ void R_SetupSpanBits(FTexture *tex) // // Draws the actual span. -#ifndef X86_ASM +//#ifndef X86_ASM void R_DrawSpanP_C (void) { dsfixed_t xfrac; @@ -1147,7 +1147,7 @@ void R_DrawSpanMaskedP_C (void) // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + int texdata; spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); texdata = source[spot]; @@ -1167,7 +1167,7 @@ void R_DrawSpanMaskedP_C (void) int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + int texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; @@ -1181,7 +1181,7 @@ void R_DrawSpanMaskedP_C (void) } while (--count); } } -#endif +//#endif void R_DrawSpanTranslucent (void) { From 86fcc3fd21ca4b66daa3f3e0d7287de6908e2ef1 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 23:53:36 +0100 Subject: [PATCH 20/23] - added a heavily optimized version of vlinec4 for x64. The original loaded everything from the global variables. While this is acceptable in 32 bit code because it has an immediate register load instruction, for 64 bit this does not exist. Accessing these variables from the stack or a register doubles the execution speed of this function and on a Core i7-3770 from 2012 is even faster than the assembly version. Right now the assembly version is still there, pending a benchmark run on an older 64 bit system. --- src/r_draw.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index fba01cbfe..0e217c2d7 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1673,6 +1673,7 @@ DWORD vlinec1 () return frac; } +#ifndef _M_X64 void vlinec4 () { BYTE *dest = dc_dest; @@ -1689,6 +1690,43 @@ void vlinec4 () dest += dc_pitch; } while (--count); } +#else +// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though +// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them. +void vlinec4() +{ + BYTE *dest = dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + auto pal0 = palookupoffse[0]; + auto pal1 = palookupoffse[1]; + auto pal2 = palookupoffse[2]; + auto pal3 = palookupoffse[3]; + auto buf0 = bufplce[0]; + auto buf1 = bufplce[1]; + auto buf2 = bufplce[2]; + auto buf3 = bufplce[3]; + const auto vince0 = vince[0]; + const auto vince1 = vince[1]; + const auto vince2 = vince[2]; + const auto vince3 = vince[3]; + auto vplce0 = vplce[0]; + auto vplce1 = vplce[1]; + auto vplce2 = vplce[2]; + auto vplce3 = vplce[3]; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += dc_pitch; + } while (--count); +} +#endif + #endif void setupmvline (int fracbits) From f6fb27b6835f586a381d86e6d212ba7626546a7d Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 00:46:58 +0100 Subject: [PATCH 21/23] - deleted rt_copy*col and rt_map*col assembly versions after running benchmarks that show inferior performance to the C++ versions on both older and newer CPUs. --- src/asm_ia32/tmap.asm | 421 ------------------------------------------ src/r_draw.cpp | 11 +- src/r_draw.h | 23 +-- src/r_drawt.cpp | 10 +- 4 files changed, 11 insertions(+), 454 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index cbfadd5d1..2096b9222 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -597,427 +597,6 @@ dmsdone add esp,8 -;*---------------------------------------------------------------------- -;* -;* rt_copy1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy1col_asm@16 -GLOBAL _rt_copy1col_asm -GLOBAL rt_copy1col_asm - - align 16 - -rt_copy1col_asm: -_rt_copy1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_copy1col_asm@16: - mov eax, [esp+4] - push ebx - mov ebx, [esp+12] - push esi - sub ebx, eax - push edi - js .done - - lea esi,[eax*4] - inc ebx ; ebx = count - mov eax,edx - add ecx,esi - mov edi,[ylookup+esi] - add ecx,[dc_temp] ; ecx = source - mov esi,[dc_pitch] ; esi = pitch - add eax,edi ; eax = dest - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ecx] - add ecx,4 - mov [eax],dl - add eax,esi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ecx] - mov dh,[ecx+4] - mov [eax],dl - mov [eax+esi],dh - add ecx,8 - lea eax,[eax+esi*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_copy4cols_asm -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy4cols_asm@12 -GLOBAL _rt_copy4cols_asm -GLOBAL rt_copy4cols_asm - - align 16 - -rt_copy4cols_asm: -_rt_copy4cols_asm: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_copy4cols_asm@12: - push ebx - mov ebx,[esp+8] - push esi - sub ebx,edx - push edi - js .done - - inc ebx ; ebx = count - mov eax,ecx - mov esi,[ylookup+edx*4] - mov ecx,[dc_temp] - add eax,esi ; eax = dest - add eax,[dc_destorg] - lea ecx,[ecx+edx*4] ; ecx = source - mov edx,[dc_pitch] ; edx = pitch - - shr ebx,1 - jnc .even - - mov esi,[ecx] - add ecx,4 - mov [eax],esi - add eax,edx - -.even and ebx,ebx - jz .done - -.loop mov esi,[ecx] - mov edi,[ecx+4] - mov [eax],esi - mov [eax+edx],edi - add ecx,8 - lea eax,[eax+edx*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 4 - -;*---------------------------------------------------------------------- -;* -;* rt_map1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map1col_asm@16 -GLOBAL _rt_map1col_asm -GLOBAL rt_map1col_asm - - align 16 - -rt_map1col_asm: -_rt_map1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_map1col_asm@16: - mov eax,[esp+4] - push ebx - mov ebx,[esp+12] - push ebp - push esi - sub ebx, eax - push edi - js .done - - lea edi,[eax*4] - mov esi,[dc_colormap] ; esi = colormap - inc ebx ; ebx = count - mov eax,edx - lea ebp,[ecx+edi] ; ebp = source - add ebp,[dc_temp] - mov ecx,[ylookup+edi] - mov edi,[dc_pitch] ; edi = pitch - add eax,ecx ; eax = dest - xor ecx,ecx - xor edx,edx - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ebp] - add ebp,4 - mov dl,[esi+edx] - mov [eax],dl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ebp] - mov cl,[ebp+4] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+edi],cl - dec ebx - lea eax,[eax+edi*2] - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_map4cols_asm -;* -;* rt_map4cols_asm1 is for PPro and above -;* rt_map4cols_asm2 is for Pentium and below -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map4cols_asm1@12 -GLOBAL _rt_map4cols_asm1 -GLOBAL rt_map4cols_asm1 - - align 16 - -rt_map4cols_asm1: -_rt_map4cols_asm1: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm1@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-6] - mov cl,[ebp-5] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - mov dl,[ebp-4] - mov cl,[ebp-3] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi],dl - mov [eax+edi+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi+2],dl - mov [eax+edi+3],cl - lea eax,[eax+edi*2] - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - -GLOBAL @rt_map4cols_asm2@12 -GLOBAL _rt_map4cols_asm2 -GLOBAL rt_map4cols_asm2 - - align 16 - -rt_map4cols_asm2: -_rt_map4cols_asm2: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm2@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp+3] - mov ch,[esi+edx] - mov dl,[ebp+2] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+1] - mov ch,[esi+edx] - mov dl,[ebp] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - - mov dl,[ebp+7] - mov ch,[esi+edx] - mov dl,[ebp+6] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+5] - mov ch,[esi+edx] - mov dl,[ebp+4] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - add ebp,8 - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - - align 16 GLOBAL rt_shaded4cols_asm GLOBAL _rt_shaded4cols_asm diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0e217c2d7..7e966c8ab 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -73,7 +73,6 @@ void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); void (*R_DrawSpan)(void); void (*R_DrawSpanMasked)(void); -void (*rt_map4cols)(int,int,int); // // R_DrawColumn @@ -2589,21 +2588,13 @@ void R_InitColumnDrawers () R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_ASM; R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) - { - rt_map4cols = rt_map4cols_asm2; - } - else - { - rt_map4cols = rt_map4cols_asm1; - } #else R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols_c; + rt_map4cols = rt_map4cols; #endif } diff --git a/src/r_draw.h b/src/r_draw.h index 8b1c8ffbc..6713d4091 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -120,16 +120,19 @@ extern void (*R_DrawColumnHoriz)(void); void R_InitColumnDrawers (); // [RH] Moves data from the temporary buffer to the screen. + +void rt_copy1col(int hx, int sx, int yl, int yh); +void rt_copy4cols(int sx, int yl, int yh); +void rt_map4cols(int sx, int yl, int yh); + extern "C" { -void rt_copy1col_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_c (int sx, int yl, int yh); void rt_shaded1col (int hx, int sx, int yl, int yh); void rt_shaded4cols_c (int sx, int yl, int yh); void rt_shaded4cols_asm (int sx, int yl, int yh); -void rt_map1col_c (int hx, int sx, int yl, int yh); +void rt_map1col (int hx, int sx, int yl, int yh); void rt_add1col (int hx, int sx, int yl, int yh); void rt_addclamp1col (int hx, int sx, int yl, int yh); void rt_subclamp1col (int hx, int sx, int yl, int yh); @@ -141,7 +144,6 @@ void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); -void rt_map4cols_c (int sx, int yl, int yh); void rt_add4cols_c (int sx, int yl, int yh); void rt_addclamp4cols_c (int sx, int yl, int yh); void rt_subclamp4cols (int sx, int yl, int yh); @@ -153,29 +155,16 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh); void rt_tlatesubclamp4cols (int sx, int yl, int yh); void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); -void rt_copy1col_asm (int hx, int sx, int yl, int yh); -void rt_map1col_asm (int hx, int sx, int yl, int yh); - -void rt_copy4cols_asm (int sx, int yl, int yh); -void rt_map4cols_asm1 (int sx, int yl, int yh); -void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); } -extern void (*rt_map4cols)(int sx, int yl, int yh); #ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm #define rt_shaded4cols rt_shaded4cols_asm #define rt_add4cols rt_add4cols_asm #define rt_addclamp4cols rt_addclamp4cols_asm #else -#define rt_copy1col rt_copy1col_c -#define rt_copy4cols rt_copy4cols_c -#define rt_map1col rt_map1col_c #define rt_shaded4cols rt_shaded4cols_c #define rt_add4cols rt_add4cols_c #define rt_addclamp4cols rt_addclamp4cols_c diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index cb228cce0..a4f581d12 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -69,9 +69,8 @@ extern "C" void R_SetupAddCol(); extern "C" void R_SetupAddClampCol(); #endif -#ifndef X86_ASM // Copies one span at hx to the screen at sx. -void rt_copy1col_c (int hx, int sx, int yl, int yh) +void rt_copy1col (int hx, int sx, int yl, int yh) { BYTE *source; BYTE *dest; @@ -112,7 +111,7 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_c (int sx, int yl, int yh) +void rt_copy4cols (int sx, int yl, int yh) { int *source; int *dest; @@ -145,7 +144,7 @@ void rt_copy4cols_c (int sx, int yl, int yh) } // Maps one span at hx to the screen at sx. -void rt_map1col_c (int hx, int sx, int yl, int yh) +void rt_map1col (int hx, int sx, int yl, int yh) { BYTE *colormap; BYTE *source; @@ -180,7 +179,7 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) } // Maps all four spans to the screen starting at sx. -void rt_map4cols_c (int sx, int yl, int yh) +void rt_map4cols (int sx, int yl, int yh) { BYTE *colormap; BYTE *source; @@ -222,7 +221,6 @@ void rt_map4cols_c (int sx, int yl, int yh) dest += pitch*2; } while (--count); } -#endif void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { From c0a622eb544f0b99de1ff5d74e3e7abcaba7ccae Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 02:08:26 +0100 Subject: [PATCH 22/23] - removed pointless assignment. --- src/r_draw.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7e966c8ab..6f58ec2a3 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2594,7 +2594,6 @@ void R_InitColumnDrawers () R_DrawShadedColumn = R_DrawShadedColumnP_C; R_DrawSpan = R_DrawSpanP_C; R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols; #endif } From 7624973ef331409d635de8447779524cc79863e7 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 5 Dec 2016 11:50:41 +0100 Subject: [PATCH 23/23] - updated list of render styles in UDMF spec. --- specs/udmf_zdoom.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/specs/udmf_zdoom.txt b/specs/udmf_zdoom.txt index 9662af1bb..835f3780e 100644 --- a/specs/udmf_zdoom.txt +++ b/specs/udmf_zdoom.txt @@ -252,9 +252,10 @@ Note: All fields default to false unless mentioned otherwise. // negative values are used as their absolute. Default = 1. renderstyle = ; // Set per-actor render style, overriding the class default. Possible values can be "normal", - // "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil", - // "translucent", "fuzzy", "optfuzzy", "soultrans". Default is an empty string for no change. - fillcolor = ; // Fill color used by the "stencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000. + // "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil", + // "addstencil", "shaded", "addshaded", "translucent", "fuzzy", "optfuzzy", "soultrans" and "shadow". + // Default is an empty string for no change. + fillcolor = ; // Fill color used by the "stencil", "addstencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000. alpha = ; // Translucency of this actor (if applicable to renderstyle), default is 1.0. score = ; // Score value of this actor, overriding the class default if not null. Default = 0. pitch = ; // Pitch of thing in degrees. Default = 0 (horizontal).