From c99a051a2aaad2c8800fb12470857d476ae1bd9b Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Sun, 27 Nov 2016 10:42:03 +0100 Subject: [PATCH 1/6] - Added lambda feature to FString::(Strip|Replace)Chars. Use it in the other (Strip|Replace)Chars methods to show how it would work. --- src/zstring.cpp | 58 +++++++++++-------------------------------------- src/zstring.h | 36 +++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/zstring.cpp b/src/zstring.cpp index 1999f54fe..70ce9dc68 100644 --- a/src/zstring.cpp +++ b/src/zstring.cpp @@ -863,66 +863,34 @@ void FString::Insert (size_t index, const char *instr, size_t instrlen) void FString::ReplaceChars (char oldchar, char newchar) { - size_t i, j; + if (oldchar == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (Chars[i] == oldchar) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldchar](char c){ return c == oldchar; }, newchar); } void FString::ReplaceChars (const char *oldcharset, char newchar) { - size_t i, j; + if (oldcharset == NULL || oldcharset[0] == '\0') + return; - LockBuffer(); - for (i = 0, j = Len(); i < j; ++i) - { - if (strchr (oldcharset, Chars[i]) != NULL) - { - Chars[i] = newchar; - } - } - UnlockBuffer(); + ReplaceChars([&oldcharset](char c){ return strchr(oldcharset, c) != NULL; }, newchar); } void FString::StripChars (char killchar) { - size_t read, write, mylen; + if (killchar == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (Chars[read] != killchar) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killchar](char c){ return c == killchar; }); } -void FString::StripChars (const char *killchars) +void FString::StripChars (const char *killcharset) { - size_t read, write, mylen; + if (killcharset == NULL || killcharset[0] == '\0') + return; - LockBuffer(); - for (read = write = 0, mylen = Len(); read < mylen; ++read) - { - if (strchr (killchars, Chars[read]) == NULL) - { - Chars[write++] = Chars[read]; - } - } - Chars[write] = '\0'; - ReallocBuffer (write); - UnlockBuffer(); + StripChars([&killcharset](char c){ return strchr(killcharset, c) != NULL; }); } void FString::MergeChars (char merger) diff --git a/src/zstring.h b/src/zstring.h index ba9208719..6138f38aa 100644 --- a/src/zstring.h +++ b/src/zstring.h @@ -236,11 +236,45 @@ public: void Insert (size_t index, const char *instr); void Insert (size_t index, const char *instr, size_t instrlen); + template + void ReplaceChars (Func IsOldChar, char newchar) + { + size_t i, j; + + LockBuffer(); + for (i = 0, j = Len(); i < j; ++i) + { + if (IsOldChar(Chars[i])) + { + Chars[i] = newchar; + } + } + UnlockBuffer(); + } + void ReplaceChars (char oldchar, char newchar); void ReplaceChars (const char *oldcharset, char newchar); + template + void StripChars (Func IsKillChar) + { + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ++read) + { + if (!IsKillChar(Chars[read])) + { + Chars[write++] = Chars[read]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); + } + void StripChars (char killchar); - void StripChars (const char *killchars); + void StripChars (const char *killcharset); void MergeChars (char merger); void MergeChars (char merger, char newchar); From 0cff4439450ac48e6ea37e9c9b29e242819220b9 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Wed, 30 Nov 2016 20:59:09 +0100 Subject: [PATCH 2/6] - Fixed missing linebreak in 'currentpos' error message. --- src/c_cmds.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_cmds.cpp b/src/c_cmds.cpp index f9cea4005..56739502e 100644 --- a/src/c_cmds.cpp +++ b/src/c_cmds.cpp @@ -1144,7 +1144,7 @@ CCMD(currentpos) } else { - Printf("You are not in game!"); + Printf("You are not in game!\n"); } } From 0ed0c47a45909d18a8c230311db1f6511f641ecb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 13:57:54 +0100 Subject: [PATCH 3/6] Optimize capped sky rendering by writing 4 pixels at a time in 5 bands (solid, fade, texture, fade, solid) --- src/r_draw.cpp | 337 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 242 insertions(+), 95 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5f592223b..0f3d04884 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2201,59 +2201,126 @@ void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + int textureheight0 = bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - int textureheight0 = bufheight[0]; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } @@ -2321,66 +2388,146 @@ void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - for (int col = 0; col < 4; col++) + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; + const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; + int textureheight0 = bufheight[0]; + uint32_t maxtextureheight1 = bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; + int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) { - uint8_t *dest = dc_dest + col; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[col]; - const uint8_t *source1 = bufplce2[col]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int32_t frac = vplce[col]; - int32_t fracstep = vince[col]; + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; if (fg == 0) { uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; + fg = source1[col][sample_index2]; } + output[col] = fg; - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; + frac[col] += fracstep[col]; } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; } } From d0cf34890c580585ed7552d9d8c7b17b9630b7e8 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:01:48 +0100 Subject: [PATCH 4/6] - disabled R_DrawColumnHorizP_ASM because that function appears prone to crashing when rendering decals. --- src/r_draw.cpp | 2 +- src/r_draw.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0f3d04884..099904f25 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2556,7 +2556,7 @@ void R_InitColumnDrawers () { #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; R_DrawShadedColumn = R_DrawShadedColumnP_C; diff --git a/src/r_draw.h b/src/r_draw.h index fa84e5ae9..053149382 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -202,9 +202,10 @@ extern "C" void R_DrawFuzzColumnP_ASM (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); +void R_DrawColumnHorizP_C(void); + #else -void R_DrawColumnHorizP_C (void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); void R_DrawTranslatedColumnP_C (void); From 93163d12f121704a0c70a915e5c56d500524fcad Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:31:08 +0100 Subject: [PATCH 5/6] - removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes. --- src/asm_ia32/tmap.asm | 227 ------------------------------------------ 1 file changed, 227 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index fb372d488..d9e689ee1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM: ret -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnHorizP_ASM -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnHorizP_ASM@0 -GLOBAL _R_DrawColumnHorizP_ASM -GLOBAL R_DrawColumnHorizP_ASM - - align 16 - -@R_DrawColumnHorizP_ASM@0: -_R_DrawColumnHorizP_ASM: -R_DrawColumnHorizP_ASM: - -; count = dc_yh - dc_yl; - - mov eax,[dc_yh] - mov ecx,[dc_yl] - sub eax,ecx - mov edx,[dc_x] - - jl near .leave ; count < 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - - inc eax ; make 0 count mean 0 pixels - and edx,3 - push eax - mov eax,[dc_temp] - mov esi,[dc_ctspan+edx*4] - add eax,edx - lea eax,[eax+ecx*4] ; eax = top of column in buffer - mov ebp,[dc_yh] - mov [esi],ecx - mov [esi+4],ebp - add esi,8 - mov edi,[dc_source] - mov [dc_ctspan+edx*4],esi - mov esi,[dc_iscale] - mov ecx,[dc_texturefrac] ; ecx = frac - mov dl,[edi] ; load cache - mov ebx,[esp] - and ebx,0xfffffff8 - jnz .mthan8 - -; Register usage in the following code is: -; -; eax: dest -; edi: source -; ecx: frac (16.16) -; esi: fracstep (16.16) -; ebx: add1 -; ebp: add2 -; dl: texel1 -; dh: texel2 -;[esp] count - -; there are fewer than 8 pixels to draw - - mov ebx,[esp] -.lthan8 shr ebx,1 - jnc .even - -; do one pixel before loop (little opportunity for pairing) - - mov ebp,ecx ; copy frac to ebx - add ecx,esi ; increment frac - shr ebp,16 ; shift frac over to low end - add eax,4 - mov dl,[edi+ebp] - mov [eax-4],dl - -.even test ebx,ebx - jz near .done - -.loop2 mov [esp],ebx ; save counter - mov ebx,ecx ; copy frac for texel1 to ebx - shr ebx,16 ; shift frac for texel1 to low end - add ecx,esi ; increment frac - mov ebp,ecx ; copy frac for texel2 to ebp - shr ebp,16 ; shift frac for texel2 to low end - add ecx,esi ; increment frac - mov dl,[edi+ebx] ; read texel1 - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; read texel2 - mov [eax],dl ; write texel1 - mov [eax+4],dh ; write texel2 - add eax,8 ; increment dest - dec ebx ; decrement counter - jnz .loop2 ; loop until it hits 0 - - jmp .done - -; there are more than 8 pixels to draw. position eax as close to a 32 byte -; boundary as possible, then do whatever is left. - -.mthan8 test eax,4 - jz .try2 - - mov ebp,ecx ; frac: in ebp - add ecx,esi ; step - shr ebp,16 ; frac: shift - add eax,4 ; increment dest - mov ebx,[esp] ; fetch counter - mov dl,[edi+ebp] ; tex: read - dec ebx ; decrement counter - mov [eax-4],dl ; tex: write - mov [esp],ebx ; store counter - -.try2 test eax,8 - jz .try4 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex2: read - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - sub ebx,2 ; decrement counter - add eax,8 ; increment dest - mov [esp],ebx ; store counter - -.try4 test eax,16 - jz .try8 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - shr ebx,16 ; frac3: shift - mov dh,[edi+ebp] ; tex2: read - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex4: read - sub ebx,4 ; decrement counter - mov [esp],ebx ; store counter - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add eax,16 ; increment dest - -.try8 mov ebx,[esp] ; make counter count groups of 8 - sub esp,4 - shr ebx,3 - jmp .tail8 - - align 16 - -.loop8 mov [esp],ebx ; save counter - mov ebx,ecx ; frac1: in ebx - shr ebx,16 ; frac1: shift - add ecx,esi ; step - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - mov dh,[edi+ebp] ; tex2: read - shr ebx,16 ; frac3: shift - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,ecx ; frac5: in ebx - mov dh,[edi+ebp] ; tex4: read - shr ebx,16 ; frac5: shift - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add ecx,esi ; step - mov ebp,ecx ; frac6: in ebp - shr ebp,16 ; frac6: shift - mov dl,[edi+ebx] ; tex5: read - add ecx,esi ; step - mov ebx,ecx ; frac7: in ebx - mov [eax+16],dl ; tex5: write - shr ebx,16 ; frac7: shift - mov dh,[edi+ebp] ; tex6: read - add ecx,esi ; step - mov ebp,ecx ; frac8: in ebp - mov [eax+20],dh ; tex6: write - shr ebp,16 ; frac8: shift - add eax,32 ; increment dest pointer - mov dl,[edi+ebx] ; tex7: read - mov ebx,[esp] ; fetch counter - mov [eax-8],dl ; tex7: write - mov dh,[edi+ebp] ; tex8: read - add ecx,esi ; step - mov [eax-4],dh ; tex8: write - mov dl,[eax] ; load cache - dec ebx ; decrement counter -.tail8 jnz near .loop8 ; loop if more to do - - pop ebp - mov ebx,[esp] - and ebx,7 - jnz near .lthan8 - -.done pop eax - pop esi - pop edi - pop ebx - pop ebp -.leave ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm From c59925085c5a9f59293abd96d2f51f962e2f3d8b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 4 Dec 2016 10:05:01 -0500 Subject: [PATCH 6/6] - Removal of the ASM functions resulted in failed compile when NO_ASM is set. --- src/r_draw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 06227167b..290256d58 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -247,6 +247,8 @@ extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); +void R_DrawColumnHorizP_C(void); + #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); @@ -258,8 +260,6 @@ extern "C" void R_DrawFuzzColumnP_ASM (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); -void R_DrawColumnHorizP_C(void); - #else void R_DrawColumnP_C (void);