From 77c4786b9d716ab018ec4b082490b6ed78f5cc36 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 23:05:20 +0200 Subject: [PATCH] Minor code cleanup --- src/f_wipe.cpp | 5 +- src/r_draw.cpp | 57 +++-------- src/r_draw.h | 19 +++- src/r_draw_rgba.h | 213 +++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 32 +++---- src/r_main.h | 223 ------------------------------------------- src/r_plane.cpp | 28 +++--- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 5 +- src/v_draw.cpp | 1 + 10 files changed, 277 insertions(+), 308 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 84b6036e4..aa9038eeb 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -78,7 +78,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,8 +271,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to; - BYTE *fromold, *fromnew; + BYTE *to, *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ecb4441f8..4dcdc3e6b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -63,7 +63,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE* dc_destorg; +BYTE *dc_destorg; } int scaledviewwidth; @@ -276,7 +276,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + *dest = colormap[source[frac>>FRACBITS]]; dest += pitch; frac += fracstep; @@ -321,13 +321,12 @@ void R_FillAddColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -348,13 +347,12 @@ void R_FillAddClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -381,13 +379,12 @@ void R_FillSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; + int pitch = dc_pitch; do { @@ -413,13 +410,12 @@ void R_FillRevSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -672,13 +668,14 @@ void R_DrawTranslatedColumnP_C (void) { *dest = colormap[translation[source[frac>>FRACBITS]]]; dest += pitch; + frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C() +void R_DrawTlatedAddColumnP_C (void) { int count; BYTE *dest; @@ -772,15 +769,15 @@ void R_DrawAddClampColumnP_C () frac = dc_texturefrac; { - const BYTE *source = dc_source; BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { - DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; DWORD b = a; a |= 0x01f07c1f; @@ -788,7 +785,7 @@ void R_DrawAddClampColumnP_C () a &= 0x3fffffff; b = b - (b >> 5); a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB32k.All[a & (a>>15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1190,9 +1187,6 @@ void R_DrawSpanP_C (void) } while (--count); } } -#endif - -#ifndef X86_ASM // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) @@ -1282,8 +1276,6 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; - uint32_t light = calc_light_multiplier(ds_light); - if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1334,8 +1326,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1426,7 +1416,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1436,7 +1425,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); @@ -1449,7 +1437,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1459,14 +1446,12 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); } } - void R_DrawSpanMaskedAddClampP_C (void) { dsfixed_t xfrac; @@ -1481,8 +1466,6 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1552,7 +1535,7 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan_C (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); } @@ -1759,7 +1742,7 @@ DWORD vlinec1 () do { - *dest = colormap[source[frac >> bits]]; + *dest = colormap[source[frac>>bits]]; frac += fracstep; dest += pitch; } while (--count); @@ -1830,9 +1813,7 @@ DWORD mvlinec1 () return frac; } -#endif -#if !defined(X86_ASM) void mvlinec4 () { BYTE *dest = dc_dest; @@ -1843,6 +1824,7 @@ void mvlinec4 () do { BYTE pix; + pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; @@ -1879,7 +1861,6 @@ static void R_DrawFogBoundaryLine (int y, int x) int x2 = spanend[y]; BYTE *colormap = dc_colormap; BYTE *dest = ylookup[y] + dc_destorg; - do { dest[x] = colormap[dest[x]]; @@ -1996,8 +1977,6 @@ fixed_t tmvline1_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; @@ -2024,12 +2003,6 @@ void tmvline4_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - do { for (int i = 0; i < 4; ++i) @@ -2062,8 +2035,6 @@ fixed_t tmvline1_addclamp_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; diff --git a/src/r_draw.h b/src/r_draw.h index cea05e469..a31183405 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -32,7 +32,20 @@ extern "C" int fuzzpos; extern "C" int fuzzviewheight; struct FColormap; -struct ShadeConstants; + +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; extern "C" int ylookup[MAXHEIGHT]; @@ -58,7 +71,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE* dc_dest, *dc_destorg; +extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -68,7 +81,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 9f07ff0bf..47ea75260 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -24,6 +24,7 @@ #define __R_DRAW_RGBA__ #include "r_draw.h" +#include "v_palette.h" #include #include #include @@ -273,4 +274,216 @@ public: void Execute(DrawerThread *thread) override; }; +///////////////////////////////////////////////////////////////////////////// +// Pixel shading macros and inline functions: + +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) inline +#else +#define FORCEINLINE inline +#endif +#endif + +// calculates the light constant passed to the shade_pal_index function +FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index c829c2dc4..837093044 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -340,13 +340,13 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -374,14 +374,13 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -434,6 +433,7 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_c (int hx, int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -445,14 +445,12 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -466,6 +464,7 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -477,14 +476,12 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val; @@ -523,14 +520,13 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -639,13 +635,13 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -674,13 +670,13 @@ void rt_subclamp4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; diff --git a/src/r_main.h b/src/r_main.h index d71d44fe1..fa8fe0bb1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,229 +90,6 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) -struct ShadeConstants -{ - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; -}; - -// calculates the light constant passed to the shade_pal_index function -inline uint32_t calc_light_multiplier(dsfixed_t light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = (fg >> 24) & 0xff; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculate constants for a simple shade -#define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; - -// Calculate constants for a simple shade with different light levels for each pixel -#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); - -// Simple shade 4 pixels -#define SSE_SHADE_SIMPLE(fg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -// Calculate constants for a complex shade -#define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Calculate constants for a complex shade with different light levels for each pixel -#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Complex shade 4 pixels -#define SSE_SHADE(fg, shade_constants) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - \ - __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ - intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ - \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ - \ - __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ - intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ - \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - extern bool r_swtruecolor; extern double GlobVis; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1a08d1793..807066f77 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,6 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_draw_rgba.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -506,7 +507,7 @@ void R_MapTiltedPlane_rgba (int y, int x1) void R_MapColoredPlane_C (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); + memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } void R_MapColoredPlane_rgba(int y, int x1) @@ -1710,7 +1711,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1745,7 +1746,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI * 3 / 2 - ViewAngle.Radians(); + ang = M_PI*3/2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1756,25 +1757,25 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; - // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); - // VectorScale2 (m, 64.f/VectorLength(m)); +// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); +// VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI / 2 - n[0] = -xscale * cos(ang); + ang += M_PI/2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif - // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); - // VectorScale2 (n, 64.f/VectorLength(n)); +// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); +// VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1807,7 +1808,6 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a if (pl->height.fC() > 0) planelightfloat = -planelightfloat; - ds_light = 0; if (fixedlightlev >= 0) { R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index fbbd65b17..c4347236d 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -96,7 +96,7 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) else if (cache != 0) { if (r_swtruecolor) - tex->GetPixels(); + tex->GetPixelsBgra(); else tex->GetPixels (); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0858dce2f..836f58690 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2612,8 +2612,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { + DWORD *bg2rgb; int spacing; BYTE *dest; + DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2622,9 +2624,6 @@ void R_DrawParticle_C (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); - DWORD *bg2rgb; - DWORD fg; - // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 02ba591b6..6a8dad047 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -44,6 +44,7 @@ #include "r_utility.h" #ifndef NO_SWRENDER #include "r_draw.h" +#include "r_draw_rgba.h" #include "r_main.h" #include "r_things.h" #endif